From fb5ff51f422e1718c09da01f3c5bb5baecc9d68e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 19 Jul 2011 12:20:14 -0700
Subject: i965: Fix regression in 29a911c50e4443dfebef0a2e32c39b64992fa3cc.

The previous define was the full 32-bit header, while the new define
was just the top 16 bits.
---
 src/mesa/drivers/dri/i965/brw_misc_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 03cebbb824b..f7e6e7c81d1 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -46,7 +46,7 @@ static void upload_drawing_rect(struct brw_context *brw)
    struct gl_context *ctx = &intel->ctx;
 
    BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE);
+   OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
    OUT_BATCH(0); /* xmin, ymin */
    OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
 	    ((ctx->DrawBuffer->Height - 1) << 16));
-- 
cgit v1.2.3


From f7dbcba280e4397cadb14f230aa925b4143cdde4 Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Mon, 18 Jul 2011 00:37:45 -0700
Subject: intel: Fix stencil buffer to be W tiled

Until now, the stencil buffer was allocated as a Y tiled buffer, because
in several locations the PRM states that it is. However, it is actually
W tiled. From the PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section
4.5.2.1 W-Major Format:
    W-Major Tile Format is used for separate stencil.

The GTT is incapable of W fencing, so we allocate the stencil buffer with
I915_TILING_NONE and decode the tile's layout in software.

This fix touches the following portions of code:
    - In intel_allocate_renderbuffer_storage(), allocate the stencil
      buffer with I915_TILING_NONE.
    - In intel_verify_dri2_has_hiz(), verify that the stencil buffer is
      not tiled.
    - In the stencil buffer's span functions, the tile's layout must be
      decoded in software.

This commit mutually depends on the xf86-video-intel commit
    dri: Do not tile stencil buffer
    Author: Chad Versace <chad@chad-versace.us>
    Date:   Mon Jul 18 00:38:00 2011 -0700

On Gen6 with separate stencil enabled, fixes the following Piglit tests:
    bugs/fdo23670-drawpix_stencil
    general/stencil-drawpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-copypixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-drawpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-readpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-copypixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-drawpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-readpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-copypixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-drawpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-readpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-copypixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-drawpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-readpixels
    spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-copypixels
    spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-readpixels
    spec/EXT_packed_depth_stencil/readpixels-24_8

Note: This is a candidate for the 7.11 branch.

Signed-off-by: Chad Versace <chad@chad-versace.us>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/intel/intel_clear.c   |  6 ++
 src/mesa/drivers/dri/intel/intel_context.c |  9 ++-
 src/mesa/drivers/dri/intel/intel_fbo.c     | 12 ++--
 src/mesa/drivers/dri/intel/intel_screen.h  |  9 ++-
 src/mesa/drivers/dri/intel/intel_span.c    | 88 +++++++++++++++++++++++-------
 5 files changed, 93 insertions(+), 31 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index dfca03c14bf..5ab98734cfc 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -143,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
 	     */
             tri_mask |= BUFFER_BIT_STENCIL;
          }
+	 else if (intel->has_separate_stencil &&
+	       stencilRegion->tiling == I915_TILING_NONE) {
+	    /* The stencil buffer is actually W tiled, which the hardware
+	     * cannot blit to. */
+	    tri_mask |= BUFFER_BIT_STENCIL;
+	 }
          else {
             /* clearing all stencil bits, use blitting */
             blit_mask |= BUFFER_BIT_STENCIL;
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 2ba13632569..fe8be082dfc 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -1439,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
       assert(stencil_rb->Base.Format == MESA_FORMAT_S8);
       assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24);
 
-      if (stencil_rb->region->tiling == I915_TILING_Y) {
+      if (stencil_rb->region->tiling == I915_TILING_NONE) {
+	 /*
+	  * The stencil buffer is actually W tiled. The region's tiling is
+	  * I915_TILING_NONE, however, because the GTT is incapable of W
+	  * fencing.
+	  */
 	 intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE;
 	 return;
       } else {
@@ -1527,7 +1532,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
        * Presently, however, no verification or clean up is necessary, and
        * execution should not reach here. If the framebuffer still has a hiz
        * region, then we have already set dri2_has_hiz to true after
-       * confirming above that the stencil buffer is Y tiled.
+       * confirming above that the stencil buffer is W tiled.
        */
       assert(0);
    }
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 55bcc757873..35be3257ab3 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -173,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
 
    if (irb->Base.Format == MESA_FORMAT_S8) {
       /*
+       * The stencil buffer is W tiled. However, we request from the kernel a
+       * non-tiled buffer because the GTT is incapable of W fencing.
+       *
        * The stencil buffer has quirky pitch requirements.  From Vol 2a,
        * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
        *    The pitch must be set to 2x the value computed based on width, as
@@ -180,14 +183,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
        * To accomplish this, we resort to the nasty hack of doubling the drm
        * region's cpp and halving its height.
        *
-       * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt()
-       * maps the memory incorrectly.
+       * If we neglect to double the pitch, then render corruption occurs.
        */
       irb->region = intel_region_alloc(intel->intelScreen,
-				       I915_TILING_Y,
+				       I915_TILING_NONE,
 				       cpp * 2,
-				       width,
-				       height / 2,
+				       ALIGN(width, 64),
+				       ALIGN((height + 1) / 2, 64),
 				       GL_TRUE);
       if (!irb->region)
 	return false;
diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h
index b2013af1a29..9dd6a525566 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@@ -63,9 +63,12 @@
  * x8_z24 and s8).
  *
  * Eventually, intel_update_renderbuffers() makes a DRI2 request for
- * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled,
- * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if
- * nothing happend.
+ * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is
+ * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to
+ * true and continue as if nothing happend.
+ *
+ * [1] The stencil buffer is actually W tiled. However, we request from the
+ *     kernel a non-tiled buffer because the GTT is incapable of W fencing.
  *
  * If the buffers are X tiled, however, the handshake has failed and we must
  * clean up.
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 153803fba09..2e1c80c4766 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -131,38 +131,84 @@ intel_set_span_functions(struct intel_context *intel,
    int miny = 0;							\
    int maxx = rb->Width;						\
    int maxy = rb->Height;						\
-   int stride = rb->RowStride;						\
-   uint8_t *buf = rb->Data;						\
+									\
+   /*									\
+    * Here we ignore rb->Data and rb->RowStride as set by		\
+    * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile	\
+    * manually, the region's *real* base address and stride is		\
+    * required.								\
+    */									\
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
+   uint8_t *buf = irb->region->buffer->virtual;				\
+   unsigned stride = irb->region->pitch;				\
+   unsigned height = 2 * irb->region->height;				\
+   bool flip = rb->Name == 0;						\
+   int y_scale = flip ? -1 : 1;						\
+   int y_bias = flip ? (height - 1) : 0;				\
 
-/* Don't flip y. */
 #undef Y_FLIP
-#define Y_FLIP(y) y
+#define Y_FLIP(y) (y_scale * (y) + y_bias)
 
 /**
  * \brief Get pointer offset into stencil buffer.
  *
- * The stencil buffer interleaves two rows into one. Yay for crazy hardware.
- * The table below demonstrates how the pointer arithmetic behaves for a buffer
- * with positive stride (s=stride).
- *
- *     x    | y     | byte offset
- *     --------------------------
- *     0    | 0     | 0
- *     0    | 1     | 1
- *     1    | 0     | 2
- *     1    | 1     | 3
- *     ...  | ...   | ...
- *     0    | 2     | s
- *     0    | 3     | s + 1
- *     1    | 2     | s + 2
- *     1    | 3     | s + 3
+ * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
+ * must decode the tile's layout in software.
  *
+ * See
+ *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
+ *     Format.
+ *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
  *
+ * Even though the returned offset is always positive, the return type is
+ * signed due to
+ *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
+ *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
  */
 static inline intptr_t
-intel_offset_S8(int stride, GLint x, GLint y)
+intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y)
 {
-   return 2 * ((y / 2) * stride + x) + y % 2;
+   uint32_t tile_size = 4096;
+   uint32_t tile_width = 64;
+   uint32_t tile_height = 64;
+   uint32_t row_size = 64 * stride;
+
+   uint32_t tile_x = x / tile_width;
+   uint32_t tile_y = y / tile_height;
+
+   /* The byte's address relative to the tile's base addres. */
+   uint32_t byte_x = x % tile_width;
+   uint32_t byte_y = y % tile_height;
+
+   uintptr_t u = tile_y * row_size
+               + tile_x * tile_size
+               + 512 * (byte_x / 8)
+               +  64 * (byte_y / 8)
+               +  32 * ((byte_y / 4) % 2)
+               +  16 * ((byte_x / 4) % 2)
+               +   8 * ((byte_y / 2) % 2)
+               +   4 * ((byte_x / 2) % 2)
+               +   2 * (byte_y % 2)
+               +   1 * (byte_x % 2);
+
+   /*
+    * Errata for Gen5:
+    *
+    * An additional offset is needed which is not documented in the PRM.
+    *
+    * if ((byte_x / 8) % 2 == 1) {
+    *    if ((byte_y / 8) % 2) == 0) {
+    *       u += 64;
+    *    } else {
+    *       u -= 64;
+    *    }
+    * }
+    *
+    * The offset is expressed more tersely as
+    * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1));
+    */
+
+   return u;
 }
 
 #define WRITE_STENCIL(x, y, src)  buf[intel_offset_S8(stride, x, y)] = src;
-- 
cgit v1.2.3


From f0e306c3430e4d8f6c8e085537807007a488f1e2 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 15:24:47 -0600
Subject: mesa: update, shorten some comments in dd.h

---
 src/mesa/main/dd.h | 51 +++++++++++++--------------------------------------
 1 file changed, 13 insertions(+), 38 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 9fe6d527f92..e1ae30fe4d4 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -189,31 +189,22 @@ struct dd_function_table {
    /*@{*/
 
    /**
-    * Choose texture format.
-    * 
-    * This is called by the \c _mesa_store_tex[sub]image[123]d() fallback
-    * functions.  The driver should examine \p internalFormat and return a
-    * gl_format value.
+    * Choose actual hardware texture format given the user-provided source
+    * image format and type and the desired internal format.  In some
+    * cases, srcFormat and srcType can be GL_NONE.
+    * Called by glTexImage(), etc.
     */
    GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat,
                                      GLenum srcFormat, GLenum srcType );
 
    /**
-    * Called by glTexImage1D().
-    * 
-    * \param target user specified.
-    * \param format user specified.
-    * \param type user specified.
-    * \param pixels user specified.
-    * \param packing indicates the image packing of pixels.
+    * Called by glTexImage1D().  Simply copy the source texture data into the
+    * destination texture memory.  The gl_texture_image fields, etc. will be
+    * fully initialized.
+    * The parameters are the same as glTexImage1D(), plus:
+    * \param packing describes how to unpack the source data.
     * \param texObj is the target texture object.
-    * \param texImage is the target texture image.  It will have the texture \p
-    * width, \p height, \p depth, \p border and \p internalFormat information.
-    * 
-    * \p retainInternalCopy is returned by this function and indicates whether
-    * core Mesa should keep an internal copy of the texture image.
-    *
-    * Drivers should call a fallback routine from texstore.c if needed.
+    * \param texImage is the target texture image.
     */
    void (*TexImage1D)( struct gl_context *ctx, GLenum target, GLint level,
                        GLint internalFormat,
@@ -250,25 +241,9 @@ struct dd_function_table {
                        struct gl_texture_image *texImage );
 
    /**
-    * Called by glTexSubImage1D().
-    *
-    * \param target user specified.
-    * \param level user specified.
-    * \param xoffset user specified.
-    * \param yoffset user specified.
-    * \param zoffset user specified.
-    * \param width user specified.
-    * \param height user specified.
-    * \param depth user specified.
-    * \param format user specified.
-    * \param type user specified.
-    * \param pixels user specified.
-    * \param packing indicates the image packing of pixels.
-    * \param texObj is the target texture object.
-    * \param texImage is the target texture image.  It will have the texture \p
-    * width, \p height, \p border and \p internalFormat information.
-    *
-    * The driver should use a fallback routine from texstore.c if needed.
+    * Called by glTexSubImage1D().  Replace a subset of the target texture
+    * with new texel data.
+    * \sa dd_function_table::TexImage1D.
     */
    void (*TexSubImage1D)( struct gl_context *ctx, GLenum target, GLint level,
                           GLint xoffset, GLsizei width,
-- 
cgit v1.2.3


From d84791a72b33f96fab54ff2399e8053c50205454 Mon Sep 17 00:00:00 2001
From: Fredrik Höglund <fredrik@kde.org>
Date: Tue, 19 Jul 2011 15:25:32 -0600
Subject: st/mesa: fix the texture format in st_context_teximage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 1a339b6c71ebab6e1a64f05b2e133022d3bbcd15 made
st_ChooseTextureFormat map GL_RGBA with type GL_UNSIGNED_BYTE
to PIPE_FORMAT_A8B8G8R8_UNORM.

The image format for ARGB pixmaps is PIPE_FORMAT_B8G8R8A8_UNORM
however. This mismatch caused the texture to be recreated in
st_finalize_texture.

NOTE: This is a candidate for the 7.11 branch.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39209
Signed-off-by: Fredrik Höglund <fredrik@kde.org>
Reviewed-by: Stéphane Marchesin <marcheu@chromium.org>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/state_tracker/st_manager.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 7bd82aae206..d5228d387f7 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -587,7 +587,7 @@ st_context_teximage(struct st_context_iface *stctxi,
          internalFormat = GL_RGB;
 
       texFormat = st_ChooseTextureFormat(ctx, internalFormat,
-                                         GL_RGBA, GL_UNSIGNED_BYTE);
+                                         GL_BGRA, GL_UNSIGNED_BYTE);
 
       _mesa_init_teximage_fields(ctx, target, texImage,
                                  tex->width0, tex->height0, 1, 0,
-- 
cgit v1.2.3


From 5874890c26f434f54e9218b83fae4eb8175c24e9 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: mesa: stop using ctx->Driver.CopyTexImage1D/2D() hooks

---
 src/mesa/main/teximage.c | 49 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 17 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 6f53686e7ff..302fd65cb27 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2797,29 +2797,43 @@ copyteximage(struct gl_context *ctx, GLuint dims,
 	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
       }
       else {
-         gl_format texFormat;
-
-         if (texImage->Data) {
-            ctx->Driver.FreeTexImageData( ctx, texImage );
-         }
+         /* choose actual hw format */
+         gl_format texFormat = _mesa_choose_texture_format(ctx, texObj,
+                                                           target, level,
+                                                           internalFormat,
+                                                           GL_NONE, GL_NONE);
 
-         ASSERT(texImage->Data == NULL);
+         if (legal_texture_size(ctx, texFormat, width, height, 1)) {
+            GLint srcX = x, srcY = y, dstX = 0, dstY = 0;
 
-         texFormat = _mesa_choose_texture_format(ctx, texObj, target, level,
-                                                 internalFormat, GL_NONE,
-                                                 GL_NONE);
+            /* Free old texture image */
+            ctx->Driver.FreeTexImageData(ctx, texImage);
 
-         if (legal_texture_size(ctx, texFormat, width, height, 1)) {
             _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1,
                                        border, internalFormat, texFormat);
 
-            ASSERT(ctx->Driver.CopyTexImage2D);
-            if (dims == 1)
-               ctx->Driver.CopyTexImage1D(ctx, target, level, internalFormat,
-                                          x, y, width, border);
-            else
-               ctx->Driver.CopyTexImage2D(ctx, target, level, internalFormat,
-                                          x, y, width, height, border);
+            /* Allocate texture memory (no pixel data yet) */
+            if (dims == 1) {
+               ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+                                      width, border, GL_NONE, GL_NONE, NULL,
+                                      &ctx->Unpack, texObj, texImage);
+            }
+            else {
+               ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                                      width, height, border, GL_NONE, GL_NONE,
+                                      NULL, &ctx->Unpack, texObj, texImage);
+            }
+
+            if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY,
+                                           &width, &height)) {
+               if (dims == 1)
+                  ctx->Driver.CopyTexSubImage1D(ctx, target, level, dstX,
+                                                srcX, srcY, width);
+                                                
+               else
+                  ctx->Driver.CopyTexSubImage2D(ctx, target, level, dstX, dstY,
+                                                srcX, srcY, width, height);
+            }
 
             check_gen_mipmap(ctx, target, texObj, level);
 
@@ -2830,6 +2844,7 @@ copyteximage(struct gl_context *ctx, GLuint dims,
             ctx->NewState |= _NEW_TEXTURE;
          }
          else {
+            /* probably too large of image */
             _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
          }
       }
-- 
cgit v1.2.3


From 1da28fa959e80610ebc9b7a28bfb83e3cad3aee4 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: mesa: remove comments referring to Driver.TexImage1D/2D

---
 src/mesa/main/texstore.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 6e1e63bdfb0..c4aeaa8f16d 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -4577,8 +4577,7 @@ texture_row_stride(const struct gl_texture_image *texImage)
 
 
 /**
- * This is the software fallback for Driver.TexImage1D()
- * and Driver.CopyTexImage1D().
+ * This is the software fallback for Driver.TexImage1D().
  * \sa _mesa_store_teximage2d()
  */
 void
@@ -4629,8 +4628,7 @@ _mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level,
 
 
 /**
- * This is the software fallback for Driver.TexImage2D()
- * and Driver.CopyTexImage2D().
+ * This is the software fallback for Driver.TexImage2D().
  *
  * This function is oriented toward storing images in main memory, rather
  * than VRAM.  Device driver's can easily plug in their own replacement.
@@ -4684,8 +4682,7 @@ _mesa_store_teximage2d(struct gl_context *ctx, GLenum target, GLint level,
 
 
 /**
- * This is the software fallback for Driver.TexImage3D()
- * and Driver.CopyTexImage3D().
+ * This is the software fallback for Driver.TexImage3D().
  * \sa _mesa_store_teximage2d()
  */
 void
-- 
cgit v1.2.3


From fbe6836043dff2798571b838096ed59c60ec4438 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: intel: remove intelCopyTexImage1D/2D()

---
 src/mesa/drivers/dri/intel/intel_tex_copy.c | 97 -----------------------------
 1 file changed, 97 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 1a3643da593..e89e91dee3e 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -163,101 +163,6 @@ intel_copy_texsubimage(struct intel_context *intel,
 }
 
 
-static void
-intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
-                    GLenum internalFormat,
-                    GLint x, GLint y, GLsizei width, GLint border)
-{
-   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-   int srcx, srcy, dstx, dsty, height;
-
-   if (border)
-      goto fail;
-
-   /* Setup or redefine the texture object, mipmap tree and texture
-    * image.  Don't populate yet.  
-    */
-   ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
-                          width, border,
-                          GL_RGBA, CHAN_TYPE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-   srcx = x;
-   srcy = y;
-   dstx = 0;
-   dsty = 0;
-   height = 1;
-   if (!_mesa_clip_copytexsubimage(ctx,
-				   &dstx, &dsty,
-				   &srcx, &srcy,
-				   &width, &height))
-      return;
-
-   if (!intel_copy_texsubimage(intel_context(ctx), target,
-                               intel_texture_image(texImage),
-                               internalFormat, 0, 0, x, y, width, height))
-      goto fail;
-
-   return;
-
- fail:
-   fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
-   _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y,
-                             width, border);
-}
-
-
-static void
-intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
-                    GLenum internalFormat,
-                    GLint x, GLint y, GLsizei width, GLsizei height,
-                    GLint border)
-{
-   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-   int srcx, srcy, dstx, dsty;
-
-   if (border)
-      goto fail;
-
-   /* Setup or redefine the texture object, mipmap tree and texture
-    * image.  Don't populate yet.
-    */
-   ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                          width, height, border,
-                          GL_RGBA, GL_UNSIGNED_BYTE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-
-   srcx = x;
-   srcy = y;
-   dstx = 0;
-   dsty = 0;
-   if (!_mesa_clip_copytexsubimage(ctx,
-				   &dstx, &dsty,
-				   &srcx, &srcy,
-				   &width, &height))
-      return;
-
-   if (!intel_copy_texsubimage(intel_context(ctx), target,
-                               intel_texture_image(texImage),
-                               internalFormat, 0, 0, x, y, width, height))
-      goto fail;
-
-   return;
-
- fail:
-   fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
-   _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
-                             width, height, border);
-}
-
-
 static void
 intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
                        GLint xoffset, GLint x, GLint y, GLsizei width)
@@ -312,8 +217,6 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level,
 void
 intelInitTextureCopyImageFuncs(struct dd_function_table *functions)
 {
-   functions->CopyTexImage1D = intelCopyTexImage1D;
-   functions->CopyTexImage2D = intelCopyTexImage2D;
    functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
    functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
 }
-- 
cgit v1.2.3


From 9ed87c4463cf265b06566d15ba86bf20661c70de Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: radeon: remove radeonCopyTexImage2D()

---
 src/mesa/drivers/dri/r200/r200_tex.c          |  1 -
 src/mesa/drivers/dri/r300/r300_tex.c          |  1 -
 src/mesa/drivers/dri/r600/evergreen_tex.c     |  1 -
 src/mesa/drivers/dri/r600/r600_tex.c          |  1 -
 src/mesa/drivers/dri/radeon/radeon_tex.c      |  1 -
 src/mesa/drivers/dri/radeon/radeon_tex_copy.c | 55 ---------------------------
 src/mesa/drivers/dri/radeon/radeon_texture.h  |  5 ---
 7 files changed, 65 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
index d42e8f12041..91e77f9f7da 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.c
+++ b/src/mesa/drivers/dri/r200/r200_tex.c
@@ -527,7 +527,6 @@ void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *fu
    functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
 
    if (radeon->radeonScreen->kernel_mm) {
-      functions->CopyTexImage2D = radeonCopyTexImage2D;
       functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
    }
 
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index 590d9afe14a..93d8fe185ef 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -379,7 +379,6 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun
 	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
 
 	if (radeon->radeonScreen->kernel_mm) {
-		functions->CopyTexImage2D = radeonCopyTexImage2D;
 		functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
 	}
 
diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c
index 33a5f277683..9784a8484f2 100644
--- a/src/mesa/drivers/dri/r600/evergreen_tex.c
+++ b/src/mesa/drivers/dri/r600/evergreen_tex.c
@@ -1688,7 +1688,6 @@ void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table
 	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
 
 	if (radeon->radeonScreen->kernel_mm) {
-		functions->CopyTexImage2D = radeonCopyTexImage2D;
 		functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
 	}
 
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index eb7ed30c7a3..3efa1d197fa 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -470,7 +470,6 @@ void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun
 	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
 
 	if (radeon->radeonScreen->kernel_mm) {
-		functions->CopyTexImage2D = radeonCopyTexImage2D;
 		functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
 	}
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
index 25a8ddf7b6a..a0b5506ae76 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -455,7 +455,6 @@ void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *
    functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
 
    if (radeon->radeonScreen->kernel_mm) {
-      functions->CopyTexImage2D = radeonCopyTexImage2D;
       functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
    }
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
index f14dfa25d40..94ff3c4a727 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
@@ -140,61 +140,6 @@ do_copy_texsubimage(struct gl_context *ctx,
                              dstx, dsty, width, height, flip_y);
 }
 
-void
-radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                     GLenum internalFormat,
-                     GLint x, GLint y, GLsizei width, GLsizei height,
-                     GLint border)
-{
-    struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
-    struct gl_texture_object *texObj =
-        _mesa_select_tex_object(ctx, texUnit, target);
-    struct gl_texture_image *texImage =
-        _mesa_select_tex_image(ctx, texObj, target, level);
-    int srcx, srcy, dstx, dsty;
-
-    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-    radeon_prepare_render(radeon);
-
-    if (border)
-        goto fail;
-
-    /* Setup or redefine the texture object, mipmap tree and texture
-     * image.  Don't populate yet.
-     */
-    ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                           width, height, border,
-                           GL_RGBA, GL_UNSIGNED_BYTE, NULL,
-                           &ctx->DefaultPacking, texObj, texImage);
-
-    srcx = x;
-    srcy = y;
-    dstx = 0;
-    dsty = 0;
-    if (!_mesa_clip_copytexsubimage(ctx,
-                                    &dstx, &dsty,
-                                    &srcx, &srcy,
-                                    &width, &height)) {
-        return;
-    }
-
-    if (!do_copy_texsubimage(ctx, target, level,
-                             radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
-                             0, 0, x, y, width, height)) {
-        goto fail;
-    }
-
-    return;
-
-fail:
-    radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
-                 "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n",
-                 _mesa_lookup_enum_by_nr(internalFormat), border);
-
-    _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
-                              width, height, border);
-}
-
 void
 radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
                         GLint xoffset, GLint yoffset,
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
index 538a07fbba8..6fc06d967dd 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.h
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
@@ -126,11 +126,6 @@ void radeonGetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint le
 				 struct gl_texture_object *texObj,
 				 struct gl_texture_image *texImage);
 
-void radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-			GLenum internalFormat,
-			GLint x, GLint y, GLsizei width, GLsizei height,
-			GLint border);
-
 void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
 			GLint xoffset, GLint yoffset,
 			GLint x, GLint y,
-- 
cgit v1.2.3


From 0823ef84a5c3a6332ea76d0001febf6aaa440dc3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: st/mesa: remove st_CopyTexImage1D/2D()

---
 src/mesa/state_tracker/st_cb_texture.c | 55 ----------------------------------
 1 file changed, 55 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 6907cfc03cf..83e83695aae 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1609,59 +1609,6 @@ st_copy_texsubimage(struct gl_context *ctx,
 
 
-static void
-st_CopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
-                  GLenum internalFormat,
-                  GLint x, GLint y, GLsizei width, GLint border)
-{
-   struct gl_texture_unit *texUnit =
-      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-
-   /* Setup or redefine the texture object, texture and texture
-    * image.  Don't populate yet.  
-    */
-   ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
-                          width, border,
-                          GL_RGBA, CHAN_TYPE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-
-   st_copy_texsubimage(ctx, target, level,
-                       0, 0, 0,  /* destX,Y,Z */
-                       x, y, width, 1);  /* src X, Y, size */
-}
-
-
-static void
-st_CopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
-                  GLenum internalFormat,
-                  GLint x, GLint y, GLsizei width, GLsizei height,
-                  GLint border)
-{
-   struct gl_texture_unit *texUnit =
-      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-
-   /* Setup or redefine the texture object, texture and texture
-    * image.  Don't populate yet.  
-    */
-   ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                          width, height, border,
-                          GL_RGBA, CHAN_TYPE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-
-   st_copy_texsubimage(ctx, target, level,
-                       0, 0, 0,  /* destX,Y,Z */
-                       x, y, width, height);  /* src X, Y, size */
-}
-
-
 static void
 st_CopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
                      GLint xoffset, GLint x, GLint y, GLsizei width)
@@ -1947,8 +1894,6 @@ st_init_texture_functions(struct dd_function_table *functions)
    functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D;
    functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D;
    functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D;
-   functions->CopyTexImage1D = st_CopyTexImage1D;
-   functions->CopyTexImage2D = st_CopyTexImage2D;
    functions->CopyTexSubImage1D = st_CopyTexSubImage1D;
    functions->CopyTexSubImage2D = st_CopyTexSubImage2D;
    functions->CopyTexSubImage3D = st_CopyTexSubImage3D;
-- 
cgit v1.2.3


From 774311fb5403e3da7ff0197199ffad8f34089e6a Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: meta: remove _mesa_meta_CopyTexImage1D/2D()

---
 src/mesa/drivers/common/driverfuncs.c |   2 -
 src/mesa/drivers/common/meta.c        | 113 ----------------------------------
 src/mesa/drivers/common/meta.h        |  10 ---
 3 files changed, 125 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 8ab129dd73d..76630264bf7 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -95,8 +95,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
    driver->TexSubImage2D = _mesa_store_texsubimage2d;
    driver->TexSubImage3D = _mesa_store_texsubimage3d;
    driver->GetTexImage = _mesa_get_teximage;
-   driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D;
-   driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D;
    driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D;
    driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D;
    driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D;
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 0e58aeca3f5..706239c7736 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -2677,119 +2677,6 @@ get_temp_image_type(struct gl_context *ctx, GLenum baseFormat)
 }
 
 
-/**
- * Helper for _mesa_meta_CopyTexImage1/2D() functions.
- * Have to be careful with locking and meta state for pixel transfer.
- */
-static void
-copy_tex_image(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
-               GLenum internalFormat, GLint x, GLint y,
-               GLsizei width, GLsizei height, GLint border)
-{
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
-   GLenum format, type;
-   GLint bpp;
-   void *buf;
-   struct gl_renderbuffer *read_rb = ctx->ReadBuffer->_ColorReadBuffer;
-
-   texObj = _mesa_get_current_tex_object(ctx, target);
-   texImage = _mesa_get_tex_image(ctx, texObj, target, level);
-
-   /* Choose format/type for temporary image buffer */
-   format = _mesa_base_tex_format(ctx, internalFormat);
-
-   if (format == GL_LUMINANCE &&
-       _mesa_get_format_base_format(read_rb->Format) != GL_LUMINANCE) {
-      /* The glReadPixels() path will convert RGB to luminance by
-       * summing R+G+B.  glCopyTexImage() is supposed to behave as
-       * glCopyPixels, which doesn't do that change, and instead
-       * leaves it up to glTexImage which converts RGB to luminance by
-       * just taking the R channel.  To avoid glReadPixels() trashing
-       * our data, use RGBA for our temporary image.
-       */
-      format = GL_RGBA;
-   }
-
-   type = get_temp_image_type(ctx, format);
-   bpp = _mesa_bytes_per_pixel(format, type);
-   if (bpp <= 0) {
-      _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()");
-      return;
-   }
-
-   /*
-    * Alloc image buffer (XXX could use a PBO)
-    */
-   buf = malloc(width * height * bpp);
-   if (!buf) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
-      return;
-   }
-
-   _mesa_unlock_texture(ctx, texObj); /* need to unlock first */
-
-   /*
-    * Read image from framebuffer (disable pixel transfer ops)
-    */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
-   ctx->Driver.ReadPixels(ctx, x, y, width, height,
-			  format, type, &ctx->Pack, buf);
-   _mesa_meta_end(ctx);
-
-   if (texImage->Data) {
-      ctx->Driver.FreeTexImageData(ctx, texImage);
-   }
-
-   /* The texture's format was already chosen in _mesa_CopyTexImage() */
-   ASSERT(texImage->TexFormat != MESA_FORMAT_NONE);
-
-   /*
-    * Store texture data (with pixel transfer ops)
-    */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE);
-
-   _mesa_update_state(ctx); /* to update pixel transfer state */
-
-   if (target == GL_TEXTURE_1D) {
-      ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
-                             width, border, format, type,
-                             buf, &ctx->Unpack, texObj, texImage);
-   }
-   else {
-      ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                             width, height, border, format, type,
-                             buf, &ctx->Unpack, texObj, texImage);
-   }
-   _mesa_meta_end(ctx);
-
-   _mesa_lock_texture(ctx, texObj); /* re-lock */
-
-   free(buf);
-}
-
-
-void
-_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLint border)
-{
-   copy_tex_image(ctx, 1, target, level, internalFormat, x, y,
-                  width, 1, border);
-}
-
-
-void
-_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLsizei height, GLint border)
-{
-   copy_tex_image(ctx, 2, target, level, internalFormat, x, y,
-                  width, height, border);
-}
-
-
-
 /**
  * Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions.
  * Have to be careful with locking and meta state for pixel transfer.
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index b0797d3d91a..7190dee768a 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -68,16 +68,6 @@ extern void
 _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
                           struct gl_texture_object *texObj);
 
-extern void
-_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLint border);
-
-extern void
-_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLsizei height, GLint border);
-
 extern void
 _mesa_meta_CopyTexSubImage1D(struct gl_context *ctx, GLenum target, GLint level,
                              GLint xoffset,
-- 
cgit v1.2.3


From 1c1fc62e388534b6c0751fc9f8ab34a89e25efd0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: mesa: remove unused dd_function_table::CopyTexImage1D/2D() hooks

---
 src/mesa/main/dd.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index e1ae30fe4d4..e0c5844e193 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -289,24 +289,6 @@ struct dd_function_table {
                         struct gl_texture_object *texObj,
                         struct gl_texture_image *texImage );
 
-   /**
-    * Called by glCopyTexImage1D().
-    * 
-    * Drivers should use a fallback routine from texstore.c if needed.
-    */
-   void (*CopyTexImage1D)( struct gl_context *ctx, GLenum target, GLint level,
-                           GLenum internalFormat, GLint x, GLint y,
-                           GLsizei width, GLint border );
-
-   /**
-    * Called by glCopyTexImage2D().
-    * 
-    * Drivers should use a fallback routine from texstore.c if needed.
-    */
-   void (*CopyTexImage2D)( struct gl_context *ctx, GLenum target, GLint level,
-                           GLenum internalFormat, GLint x, GLint y,
-                           GLsizei width, GLsizei height, GLint border );
-
    /**
     * Called by glCopyTexSubImage1D().
     * 
-- 
cgit v1.2.3


From d5e32397762a3bd55fa69ad6332351512083f9c6 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: st/mesa: get rid of redundant clipping code in st_copy_texsubimage()

---
 src/mesa/state_tracker/st_cb_texture.c | 28 ----------------------------
 1 file changed, 28 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 83e83695aae..25f08aa4d09 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1466,34 +1466,6 @@ st_copy_texsubimage(struct gl_context *ctx,
       depth/stencil samples per pixel? Need some transfer clarifications. */
    assert(sample_count < 2);
 
-   if (srcX < 0) {
-      width -= -srcX;
-      destX += -srcX;
-      srcX = 0;
-   }
-
-   if (srcY < 0) {
-      height -= -srcY;
-      destY += -srcY;
-      srcY = 0;
-   }
-
-   if (destX < 0) {
-      width -= -destX;
-      srcX += -destX;
-      destX = 0;
-   }
-
-   if (destY < 0) {
-      height -= -destY;
-      srcY += -destY;
-      destY = 0;
-   }
-
-   if (width < 0 || height < 0)
-      return;
-
-
    assert(strb);
    assert(strb->surface);
    assert(stImage->pt);
-- 
cgit v1.2.3


From eee570290aebc8a339acd063033e3daefcef2bc6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 17 Jul 2011 14:53:16 -0700
Subject: meta: Add a GLSL-based _mesa_meta_Clear() variant.

This cuts out a large portion of the overhead of glClear() from
resetting the texenv state and recomputing the fixed function
programs.  It also means less use of fixed function internally in our
GLES2 drivers, which is rather bogus.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/drivers/common/meta.c | 160 ++++++++++++++++++++++++++++++++++++++++-
 src/mesa/drivers/common/meta.h |   3 +
 2 files changed, 162 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 0e58aeca3f5..887118b9417 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -62,6 +62,7 @@
 #include "main/teximage.h"
 #include "main/texparam.h"
 #include "main/texstate.h"
+#include "main/uniforms.h"
 #include "main/varray.h"
 #include "main/viewport.h"
 #include "program/program.h"
@@ -235,6 +236,8 @@ struct clear_state
 {
    GLuint ArrayObj;
    GLuint VBO;
+   GLuint ShaderProg;
+   GLint ColorLocation;
 };
 
 
@@ -1589,10 +1592,165 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
    _mesa_meta_end(ctx);
 }
 
+static void
+meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear)
+{
+   const char *vs_source =
+      "attribute vec4 position;\n"
+      "void main()\n"
+      "{\n"
+      "   gl_Position = position;\n"
+      "}\n";
+   const char *fs_source =
+      "uniform vec4 color;\n"
+      "void main()\n"
+      "{\n"
+      "   gl_FragColor = color;\n"
+      "}\n";
+   GLuint vs, fs;
+
+   if (clear->ArrayObj != 0)
+      return;
+
+   /* create vertex array object */
+   _mesa_GenVertexArrays(1, &clear->ArrayObj);
+   _mesa_BindVertexArray(clear->ArrayObj);
+
+   /* create vertex array buffer */
+   _mesa_GenBuffersARB(1, &clear->VBO);
+   _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+
+   /* setup vertex arrays */
+   _mesa_VertexAttribPointerARB(0, 3, GL_FLOAT, GL_FALSE, 0, (void *)0);
+   _mesa_EnableVertexAttribArrayARB(0);
+
+   vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER);
+   _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL);
+   _mesa_CompileShaderARB(vs);
+
+   fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER);
+   _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL);
+   _mesa_CompileShaderARB(fs);
+
+   clear->ShaderProg = _mesa_CreateProgramObjectARB();
+   _mesa_AttachShader(clear->ShaderProg, fs);
+   _mesa_AttachShader(clear->ShaderProg, vs);
+   _mesa_BindAttribLocationARB(clear->ShaderProg, 0, "position");
+   _mesa_LinkProgramARB(clear->ShaderProg);
+
+   clear->ColorLocation = _mesa_GetUniformLocationARB(clear->ShaderProg,
+						      "color");
+}
+
+/**
+ * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering.
+ */
+void
+_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
+{
+   struct clear_state *clear = &ctx->Meta->Clear;
+   GLbitfield metaSave;
+   const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   const float x0 = ((float)fb->_Xmin / fb->Width)  * 2.0f - 1.0f;
+   const float y0 = ((float)fb->_Ymin / fb->Height) * 2.0f - 1.0f;
+   const float x1 = ((float)fb->_Xmax / fb->Width)  * 2.0f - 1.0f;
+   const float y1 = ((float)fb->_Ymax / fb->Height) * 2.0f - 1.0f;
+   const float z = -invert_z(ctx->Depth.Clear);
+   struct vertex {
+      GLfloat x, y, z;
+   } verts[4];
+
+   metaSave = (META_ALPHA_TEST |
+	       META_BLEND |
+	       META_DEPTH_TEST |
+	       META_RASTERIZATION |
+	       META_SHADER |
+	       META_STENCIL_TEST |
+	       META_VERTEX |
+	       META_VIEWPORT |
+	       META_CLAMP_FRAGMENT_COLOR);
+
+   if (!(buffers & BUFFER_BITS_COLOR)) {
+      /* We'll use colormask to disable color writes.  Otherwise,
+       * respect color mask
+       */
+      metaSave |= META_COLOR_MASK;
+   }
+
+   _mesa_meta_begin(ctx, metaSave);
+
+   meta_glsl_clear_init(ctx, clear);
+
+   _mesa_UseProgramObjectARB(clear->ShaderProg);
+   _mesa_Uniform4fvARB(clear->ColorLocation, 1,
+		       ctx->Color.ClearColorUnclamped);
+
+   _mesa_BindVertexArray(clear->ArrayObj);
+   _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+
+   /* GL_COLOR_BUFFER_BIT */
+   if (buffers & BUFFER_BITS_COLOR) {
+      /* leave colormask, glDrawBuffer state as-is */
+
+      /* Clears never have the color clamped. */
+      _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
+   }
+   else {
+      ASSERT(metaSave & META_COLOR_MASK);
+      _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
+   }
+
+   /* GL_DEPTH_BUFFER_BIT */
+   if (buffers & BUFFER_BIT_DEPTH) {
+      _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE);
+      _mesa_DepthFunc(GL_ALWAYS);
+      _mesa_DepthMask(GL_TRUE);
+   }
+   else {
+      assert(!ctx->Depth.Test);
+   }
+
+   /* GL_STENCIL_BUFFER_BIT */
+   if (buffers & BUFFER_BIT_STENCIL) {
+      _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE);
+      _mesa_StencilOpSeparate(GL_FRONT_AND_BACK,
+                              GL_REPLACE, GL_REPLACE, GL_REPLACE);
+      _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS,
+                                ctx->Stencil.Clear & stencilMax,
+                                ctx->Stencil.WriteMask[0]);
+   }
+   else {
+      assert(!ctx->Stencil.Enabled);
+   }
+
+   /* vertex positions */
+   verts[0].x = x0;
+   verts[0].y = y0;
+   verts[0].z = z;
+   verts[1].x = x1;
+   verts[1].y = y0;
+   verts[1].z = z;
+   verts[2].x = x1;
+   verts[2].y = y1;
+   verts[2].z = z;
+   verts[3].x = x0;
+   verts[3].y = y1;
+   verts[3].z = z;
+
+   /* upload new vertex data */
+   _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts,
+		       GL_DYNAMIC_DRAW_ARB);
+
+   /* draw quad */
+   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+   _mesa_meta_end(ctx);
+}
 
 /**
  * Meta implementation of ctx->Driver.CopyPixels() in terms
- * of texture mapping and polygon rendering.
+ * of texture mapping and polygon rendering and GLSL shaders.
  */
 void
 _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY,
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index b0797d3d91a..32c789ea638 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -42,6 +42,9 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
 extern void
 _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers);
 
+extern void
+_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers);
+
 extern void
 _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
                       GLsizei width, GLsizei height,
-- 
cgit v1.2.3


From 540e66b3bebb5ae82422e386aa178147ea14a39e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 17 Jul 2011 14:55:10 -0700
Subject: intel: Use the GLSL-based meta clear when available.

Improves firefox-talos-gfx performance under GL when 3D clears are
enabled:
[  0]       gl-before     firefox-talos-gfx   20.193   20.251   0.27%    3/3
[  0]       gl-after      firefox-talos-gfx   18.013   18.040   0.19%    3/3
---
 src/mesa/drivers/dri/intel/intel_clear.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index 5ab98734cfc..81c062fba53 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -188,7 +188,10 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
 
    if (tri_mask) {
       debug_mask("tri", tri_mask);
-      _mesa_meta_Clear(&intel->ctx, tri_mask);
+      if (ctx->Extensions.ARB_fragment_shader)
+	 _mesa_meta_glsl_Clear(&intel->ctx, tri_mask);
+      else
+	 _mesa_meta_Clear(&intel->ctx, tri_mask);
    }
 }
 
-- 
cgit v1.2.3


From dc7422405f6f3c201993251e4665bb9ec1b59db0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 18 Jul 2011 15:25:10 -0700
Subject: i965: Avoid kernel BUG_ON if we happen to wait on the pipe_control
 w/a BO.

For this and occlusion queries, we're trying to avoid setting
I915_GEM_DOMAIN_RENDER for the write domain, because the data written
is definitely not going through the render cache, but we do need to
tell the kernel that the object has been written.  However, with using
I915_GEM_DOMAIN_GTT, the kernel on retiring the batchbuffer sees that
the w/a BO has a write domain of GTT, and puts it on the flushing
list.  If something tries to wait for that BO to finish rendering
(such as the AUB dumper reading the contents of BOs), we get into
wait_request (since obj->active) but with a 0 seqno (since the object
is on the flushing list, not actually on a ringbuffer), and BUG_ONs.

To avoid the kernel bug (which I'm hoping to delete soon anyway), just
use I915_GEM_DOMAIN_INSTRUCTION like occlusion queries do.  This
doesn't result in more flushing, because we invalidate INSTRUCTION on
every batchbuffer now that we're state streaming, anyway.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Tested-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/intel/intel_batchbuffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index b61a2ffef19..9c97ef22888 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -325,7 +325,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
    OUT_BATCH(_3DSTATE_PIPE_CONTROL);
    OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
    OUT_RELOC(intel->batch.workaround_bo,
-	     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0);
+	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
    OUT_BATCH(0); /* write data */
    ADVANCE_BATCH();
 
-- 
cgit v1.2.3


From 407785d0e97abd0cc51a6e360089111973748e7c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 18 Jul 2011 17:17:03 -0700
Subject: i965: Enable the PIPE_CONTROL workaround workaround out of paranoia.

There's scary stuff going on in PIPE_CONTROL internals, and if the
BSpec says to do this to make PIPE_CONTROL work, I'll go ahead and do
it because we'll probably never be able to debug it after the fact.

v2: Use stall at scoreboard instead of depth stall, as noted by Ken.
---
 src/mesa/drivers/dri/intel/intel_batchbuffer.c | 31 +++++++++++++++++++++++---
 src/mesa/drivers/dri/intel/intel_reg.h         |  1 +
 2 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 9c97ef22888..97cc219ce6d 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -308,12 +308,29 @@ emit:
  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
  *
- * XXX: There is also a workaround that would appear to apply to this
- * workaround, but it doesn't appear to be necessary so far:
+ * And the workaround for these two requires this workaround first:
  *
- * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
  * BEFORE the pipe-control with a post-sync op and no write-cache
  * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
  */
 void
 intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
@@ -321,6 +338,14 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
    if (!intel->batch.need_workaround_flush)
       return;
 
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+   OUT_BATCH(PIPE_CONTROL_CS_STALL |
+	     PIPE_CONTROL_STALL_AT_SCOREBOARD);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_PIPE_CONTROL);
    OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index 5aa629150cf..a98a669af21 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -75,6 +75,7 @@
 #define PIPE_CONTROL_VF_CACHE_INVALIDATE	(1 << 4)
 #define PIPE_CONTROL_CONST_CACHE_INVALIDATE	(1 << 3)
 #define PIPE_CONTROL_STATE_CACHE_INVALIDATE	(1 << 2)
+#define PIPE_CONTROL_STALL_AT_SCOREBOARD	(1 << 1)
 #define PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1 << 0)
 #define PIPE_CONTROL_PPGTT_WRITE	(0 << 2)
 #define PIPE_CONTROL_GLOBAL_GTT_WRITE	(1 << 2)
-- 
cgit v1.2.3


From 3e5d36267d8c9536490c902f785137a7fa0637fc Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 19 Jul 2011 15:06:15 -0700
Subject: i965: Apply a homebrew workaround for GPU hang in OGLC api-texcoord.

The behavior of flushes in the hardware is a maze of twisty passages,
and strangely the VS constants appear to be loaded during a pipeline
flush instead of at the time of the packet emit according to the
simulator.  On moving the STATE_BASE_ADDRESS packet to where it really
needed to live (in order for data loads by other packets to be
correct), we sometimes no longer got a flush between those packets
where we apparently needed it.  This replicates the flushes implied by
a STATE_BASE_ADDRESS update, fixing the GPU hangs in OGLC and the
"engine" demo.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36821
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39257
Tested-by: Keith Packard <keithp@keithp.com> (bzflag and etracer fixed)
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/gen6_vs_state.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index fb4cdbaadf9..e70454416bf 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -160,6 +160,32 @@ upload_vs_state(struct brw_context *brw)
 	     GEN6_VS_STATISTICS_ENABLE |
 	     GEN6_VS_ENABLE);
    ADVANCE_BATCH();
+
+   /* Based on my reading of the simulator, the VS constants don't get
+    * pulled into the VS FF unit until an appropriate pipeline flush
+    * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
+    * references to them into a little FIFO.  The flushes are common,
+    * but don't reliably happen between this and a 3DPRIMITIVE, causing
+    * the primitive to use the wrong constants.  Then the FIFO
+    * containing the constant setup gets added to again on the next
+    * constants change, and eventually when a flush does happen the
+    * unit is overwhelmed by constant changes and dies.
+    *
+    * To avoid this, send a PIPE_CONTROL down the line that will
+    * update the unit immediately loading the constants.  The flush
+    * type bits here were those set by the STATE_BASE_ADDRESS whose
+    * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
+    * bug reports that led to this workaround, and may be more than
+    * what is strictly required to avoid the issue.
+    */
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
+	     PIPE_CONTROL_INSTRUCTION_FLUSH |
+	     PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state gen6_vs_state = {
-- 
cgit v1.2.3


From 000896c0bb99f356e52854608a29476d3ade387c Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Tue, 19 Jul 2011 03:05:07 +0200
Subject: mesa: GLES2 should return different error enums for invalid fbo
 queries

ES 2.0.25 page 127 says:

  If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, then
  querying any other pname will generate INVALID_ENUM.

See also:
b9e9df78a03edb35472c2e231aef4747e09db792

NOTE: This is a candidate for the 7.10 and 7.11 branches.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/main/fbobject.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 84969360d92..82eb7fb718d 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -2134,10 +2134,14 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
 {
    const struct gl_renderbuffer_attachment *att;
    struct gl_framebuffer *buffer;
+   GLenum err;
    GET_CURRENT_CONTEXT(ctx);
 
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
+   /* The error differs in GL andd GLES. */
+   err = ctx->API == API_OPENGL ? GL_INVALID_OPERATION : GL_INVALID_ENUM;
+
    buffer = get_framebuffer_target(ctx, target);
    if (!buffer) {
       _mesa_error(ctx, GL_INVALID_ENUM,
@@ -2188,7 +2192,12 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
       }
       else {
          assert(att->Type == GL_NONE);
-         *params = 0;
+         if (ctx->API == API_OPENGL) {
+            *params = 0;
+         } else {
+            _mesa_error(ctx, GL_INVALID_ENUM,
+                        "glGetFramebufferAttachmentParameterivEXT(pname)");
+         }
       }
       return;
    case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT:
@@ -2196,7 +2205,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
 	 *params = att->TextureLevel;
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2214,7 +2223,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
          }
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2232,7 +2241,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
          }
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2246,7 +2255,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2267,7 +2276,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
          return;
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2301,7 +2310,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else if (att->Texture) {
-- 
cgit v1.2.3


From 12c22cab77f35a887d9f6790e0de4a8fa4b3b575 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Thu, 7 Jul 2011 13:03:45 -0700
Subject: mesa: Add an ifndef guard around the definition of the INLINE macro

Several Mesa headers redundantly define the INLINE macro.  Adding this
guard prevents the compiler from complaining about macro redefinition.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/main/compiler.h | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index 743841be4ef..d736fdfc58a 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -139,26 +139,28 @@ extern "C" {
 /**
  * Function inlining
  */
-#if defined(__GNUC__)
-#  define INLINE __inline__
-#elif defined(__MSC__)
-#  define INLINE __inline
-#elif defined(_MSC_VER)
-#  define INLINE __inline
-#elif defined(__ICL)
-#  define INLINE __inline
-#elif defined(__INTEL_COMPILER)
-#  define INLINE inline
-#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
-#  define INLINE __inline
-#elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
-#  define INLINE inline
-#  define __inline inline
-#  define __inline__ inline
-#elif (__STDC_VERSION__ >= 199901L) /* C99 */
-#  define INLINE inline
-#else
-#  define INLINE
+#ifndef INLINE
+#  if defined(__GNUC__)
+#    define INLINE __inline__
+#  elif defined(__MSC__)
+#    define INLINE __inline
+#  elif defined(_MSC_VER)
+#    define INLINE __inline
+#  elif defined(__ICL)
+#    define INLINE __inline
+#  elif defined(__INTEL_COMPILER)
+#    define INLINE inline
+#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
+#    define INLINE __inline
+#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+#    define INLINE inline
+#    define __inline inline
+#    define __inline__ inline
+#  elif (__STDC_VERSION__ >= 199901L) /* C99 */
+#    define INLINE inline
+#  else
+#    define INLINE
+#  endif
 #endif
 
 
-- 
cgit v1.2.3


From d6e1a8f71437d4a65e65f93271b2892dd62b0d23 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 17 Jul 2011 23:15:54 -0700
Subject: ir_to_mesa: Add each relative address to the previous

This fixes many cases of accessing arrays of matrices using
non-constant indices at each level.

Fixes i965 piglit:

    vs-temp-array-mat[234]-index-col-rd
    vs-temp-array-mat[234]-index-col-row-rd
    vs-temp-array-mat[234]-index-col-wr
    vs-uniform-array-mat[234]-index-col-rd

Fixes swrast piglit:

    fs-temp-array-mat[234]-index-col-rd
    fs-temp-array-mat[234]-index-col-row-rd
    fs-temp-array-mat[234]-index-col-wr
    fs-uniform-array-mat[234]-index-col-rd
    fs-uniform-array-mat[234]-index-col-row-rd
    fs-varying-array-mat[234]-index-col-rd
    fs-varying-array-mat[234]-index-col-row-rd
    vs-temp-array-mat[234]-index-col-rd
    vs-temp-array-mat[234]-index-col-row-rd
    vs-temp-array-mat[234]-index-col-wr
    vs-uniform-array-mat[234]-index-col-rd
    vs-uniform-array-mat[234]-index-col-row-rd
    vs-varying-array-mat[234]-index-col-rd
    vs-varying-array-mat[234]-index-col-row-rd
    vs-varying-array-mat[234]-index-col-wr

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index d8e5a3a9772..beb481b3a3b 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1496,6 +1496,18 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir)
 	      this->result, src_reg_for_float(element_size));
       }
 
+      /* If there was already a relative address register involved, add the
+       * new and the old together to get the new offset.
+       */
+      if (src.reladdr != NULL)  {
+	 src_reg accum_reg = get_temp(glsl_type::float_type);
+
+	 emit(ir, OPCODE_ADD, dst_reg(accum_reg),
+	      index_reg, *src.reladdr);
+
+	 index_reg = accum_reg;
+      }
+
       src.reladdr = ralloc(mem_ctx, src_reg);
       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
    }
-- 
cgit v1.2.3


From f7cd9a858c043e609fcdbf9ac9dfc1ef7ad002bf Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 17 Jul 2011 23:35:26 -0700
Subject: ir_to_mesa: Copy reladdr in src_reg(dst_reg) constructor

Fixes i965 piglit:

    vs-temp-array-mat[234]-col-row-wr
    vs-temp-array-mat[234]-index-col-row-wr
    vs-temp-array-mat[234]-index-row-wr
    vs-temp-mat[234]-col-row-wr

Fixes swrast piglit:

    fs-temp-array-mat[234]-col-row-wr
    fs-temp-array-mat[234]-index-col-row-wr
    fs-temp-array-mat[234]-index-row-wr
    fs-temp-mat[234]-col-row-wr
    vs-temp-array-mat[234]-col-row-wr
    vs-temp-array-mat[234]-index-col-row-wr
    vs-temp-array-mat[234]-index-row-wr
    vs-temp-mat[234]-col-row-wr

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index beb481b3a3b..8b4a535b75f 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -134,7 +134,7 @@ src_reg::src_reg(dst_reg reg)
    this->index = reg.index;
    this->swizzle = SWIZZLE_XYZW;
    this->negate = 0;
-   this->reladdr = NULL;
+   this->reladdr = reg.reladdr;
 }
 
 dst_reg::dst_reg(src_reg reg)
-- 
cgit v1.2.3


From fbeb68e880318808f90c779cd3f8b8c4160eecf8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 20 Jul 2011 18:02:17 -0700
Subject: prog_optimize: Set unused regs to PROGRAM_UNDEFINED after CMP->MOV
 conversion

Leaving the unused registers with other values caused assertion
failures and other problems in places that blindly iterate over all
sources.

brw_vs_emit.c:1381: get_src_reg: Assertion `c->regs[file][index].nr !=
0' failed.

Fixes i965 piglit:

    vs-uniform-array-mat[234]-col-row-rd
    vs-uniform-array-mat[234]-index-col-row-rd
    vs-uniform-array-mat[234]-index-row-rd
    vs-uniform-mat[234]-col-row-rd

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/prog_optimize.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index 8a40fa69eca..f4a7a638d5f 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -1319,6 +1319,15 @@ _mesa_simplify_cmp(struct gl_program * program)
 
          inst->Opcode = OPCODE_MOV;
          inst->SrcReg[0] = inst->SrcReg[1];
+
+	 /* Unused operands are expected to have the file set to
+	  * PROGRAM_UNDEFINED.  This is how _mesa_init_instructions initializes
+	  * all of the sources.
+	  */
+	 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+	 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+	 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
       }
    }
    if (dbg) {
-- 
cgit v1.2.3


From 337e2dfad0bcd567755272271abd2593a1d0fd1f Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 20 Jul 2011 16:04:17 -0700
Subject: i965: When emitting a src/dst write of an output, keep the write mask

Fixes i965 piglit:

    vs-varying-array-mat[234]-col-row-wr
    vs-varying-array-mat[234]-index-col-row-wr
    vs-varying-array-mat[234]-index-row-wr
    vs-varying-array-mat[234]-row-wr
    vs-varying-mat[234]-col-row-wr
    vs-varying-mat[234]-row-wr

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_vs_emit.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 9d733344a26..5ef8b0720ba 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1993,7 +1993,11 @@ void brw_vs_emit(struct brw_vs_compile *c )
       index = inst->DstReg.Index;
       file = inst->DstReg.File;
       if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
-	  dst = c->output_regs[index].reg;
+	 /* Can't just make get_dst "do the right thing" here because other
+	  * callers of get_dst don't expect any special behavior for the
+	  * c->output_regs[index].used_in_src case.
+	  */
+	 dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask);
       else
 	  dst = get_dst(c, inst->DstReg);
 
-- 
cgit v1.2.3


From 1d3f09f15998c60326bf6c53a8d32c82496264ae Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 20 Jul 2011 18:07:50 -0700
Subject: i965: When emitting a src/dst read of an output, keep the swizzle and
 neg

Fixes i965 piglit vs-varying-array-mat[234]-row-rd.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_vs_emit.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 5ef8b0720ba..d8cb0f7cb69 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1980,9 +1980,22 @@ void brw_vs_emit(struct brw_vs_compile *c )
 	      const struct prog_src_register *src = &inst->SrcReg[i];
 	      index = src->Index;
 	      file = src->File;	
-	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
-		  args[i] = c->output_regs[index].reg;
-	      else
+	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) {
+		 /* Can't just make get_arg "do the right thing" here because
+		  * other callers of get_arg and get_src_reg don't expect any
+		  * special behavior for the c->output_regs[index].used_in_src
+		  * case.
+		  */
+		 args[i] = c->output_regs[index].reg;
+		 args[i].dw1.bits.swizzle =
+		    BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+				 GET_SWZ(src->Swizzle, 1),
+				 GET_SWZ(src->Swizzle, 2),
+				 GET_SWZ(src->Swizzle, 3));
+
+		 /* Note this is ok for non-swizzle ARB_vp instructions */
+		 args[i].negate = src->Negate ? 1 : 0;
+	      } else
                   args[i] = get_arg(c, inst, i);
 	  }
 
-- 
cgit v1.2.3


From 156cef0fbacf242e8fc67e39ab964e5f8f3739cb Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 21 Jul 2011 21:17:10 -0700
Subject: i965/fs: Clear result before visiting shadow comparitor and LOD info.

Commit 53c89c67f33639afef951e178f93f4e29acc5d53 ("i965: Avoid generating
MOVs for assignments of expressions.") added the line "this->result =
reg_undef" all over the code.  Unfortunately, since Eric developed his
patch before I landed Ivybridge support, he missed adding it to
fs_visitor::emit_texture_gen7() after rebasing.

Furthermore, since I developed TXD support before Eric's patch, I
neglected to add it to the gradient handling when I rebased.

Neglecting to set this causes the visitor to use this->result as storage
rather than generating a new temporary.  These missing statements
resulted in the same register being used to store several different
values.

Fixes the following piglit tests on Ivybridge:
- glsl-fs-shadow2dproj.shader_test
- glsl-fs-shadow2dproj-bias.shader_test

NOTE: This is a candidate for the 7.11 branch.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index cbe5cf428c5..9632aae64b0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -603,9 +603,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
       mlen += 3;
    } else if (ir->op == ir_txd) {
+      this->result = reg_undef;
       ir->lod_info.grad.dPdx->accept(this);
       fs_reg dPdx = this->result;
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdy->accept(this);
       fs_reg dPdy = this->result;
 
@@ -786,9 +788,11 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       inst = emit(FS_OPCODE_TXL, dst);
       break;
    case ir_txd: {
+      this->result = reg_undef;
       ir->lod_info.grad.dPdx->accept(this);
       fs_reg dPdx = this->result;
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdy->accept(this);
       fs_reg dPdy = this->result;
 
@@ -850,6 +854,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    }
 
    if (ir->shadow_comparitor && ir->op != ir_txd) {
+      this->result = reg_undef;
       ir->shadow_comparitor->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
@@ -860,11 +865,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    case ir_tex:
       break;
    case ir_txb:
+      this->result = reg_undef;
       ir->lod_info.bias->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
       break;
    case ir_txl:
+      this->result = reg_undef;
       ir->lod_info.lod->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
@@ -873,9 +880,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       if (c->dispatch_width == 16)
 	 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdx->accept(this);
       fs_reg dPdx = this->result;
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdy->accept(this);
       fs_reg dPdy = this->result;
 
@@ -1070,6 +1079,7 @@ fs_visitor::visit(ir_texture *ir)
       if (hw_compare_supported) {
 	 inst->shadow_compare = true;
       } else {
+	 this->result = reg_undef;
 	 ir->shadow_comparitor->accept(this);
 	 fs_reg ref = this->result;
 
-- 
cgit v1.2.3


From 572f6318954f31fcf3d396ac5df8e9eff3f37c74 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 12 May 2011 04:02:32 -0700
Subject: i965/gen7: Fix shadow sampling in the old brw_wm_emit backend.

On Ivybridge, the shadow comparitor goes in the first slot, rather than
at the end.  It's not necessary to send u, v, and r.

Fixes tests texturing/texdepth and glean/fbo.

NOTE: This is a candidate for the 7.11 branch.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_wm_emit.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index f61757a8cac..6ea4a7d6e50 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1094,9 +1094,16 @@ void emit_tex(struct brw_wm_compile *c,
    if (intel->gen < 5 && c->dispatch_width == 8)
       nr_texcoords = 3;
 
-   /* For shadow comparisons, we have to supply u,v,r. */
-   if (shadow)
-      nr_texcoords = 3;
+   if (shadow) {
+      if (intel->gen < 7) {
+	 /* For shadow comparisons, we have to supply u,v,r. */
+	 nr_texcoords = 3;
+      } else {
+	 /* On Ivybridge, the shadow comparitor comes first. Just load it. */
+	 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
+	 cur_mrf += mrf_per_channel;
+      }
+   }
 
    /* Emit the texcoords. */
    for (i = 0; i < nr_texcoords; i++) {
@@ -1113,7 +1120,7 @@ void emit_tex(struct brw_wm_compile *c,
    }
 
    /* Fill in the shadow comparison reference value. */
-   if (shadow) {
+   if (shadow && intel->gen < 7) {
       if (intel->gen >= 5) {
 	 /* Fill in the cube map array index value. */
 	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
-- 
cgit v1.2.3


From d92463d5dc42aca09a54588c322fc60582cf9131 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Fri, 22 Jul 2011 14:05:52 -0700
Subject: i965: vs optimization fix: Check val.{negate,abs} in
 accumulator_contains()

When emitting a MAC instruction in a vertex shader, brw_vs_emit()
calls accumulator_contains() to determine whether the accumulator
already contains the appropriate addend; if it does, then we can avoid
emitting an unnecessary MOV instruction.

However, accumulator_contains() wasn't checking the val.negate or
val.abs flags.  As a result, if the desired value was the negation, or
the absolute value, of what was already in the accumulator, we would
generate an incorrect shader.

Fixes piglit test vs-refract-vec4-vec4-float.

Tested on Gen5 and Gen6.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_vs_emit.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index d8cb0f7cb69..674a994bace 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1821,6 +1821,9 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val)
    if (val.address_mode != BRW_ADDRESS_DIRECT)
       return GL_FALSE;
 
+   if (val.negate || val.abs)
+      return GL_FALSE;
+
    switch (prev_insn->header.opcode) {
    case BRW_OPCODE_MOV:
    case BRW_OPCODE_MAC:
-- 
cgit v1.2.3


From 185868c9c2e6a31a7313df2dbe29490547b65f61 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 25 Jul 2011 11:50:27 -0700
Subject: i965: Emit texture cache flushes on gen6 along with render cache
 flushes.

It turns out that internally the texture cache gets flushed in a
couple of cases, particularly around 2D operations mixed with 3D.  In
almost all cases one of those happens between rendering to an
FBO-attached texture and rendering from that texture.  However, as of
the next patch, glean tfbo (and the new fbo-flushing-2 test) would
manage to get stale texture values because one of those flushes didn't
occur.  The intention of this code was always to get the render cache
cleared and ready to be used from the sampler cache (and it does on <=
gen4), so this just catches gen5 up.

This patch was also tested to fix fbo-flushing on gen7.
---
 src/mesa/drivers/dri/intel/intel_batchbuffer.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 97cc219ce6d..db4343be10c 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -390,6 +390,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
 	 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
 		   PIPE_CONTROL_WRITE_FLUSH |
 		   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		   PIPE_CONTROL_TC_FLUSH |
 		   PIPE_CONTROL_NO_WRITE);
 	 OUT_BATCH(0); /* write address */
 	 OUT_BATCH(0); /* write data */
-- 
cgit v1.2.3


From 808024689247561d3de225856fb6ef17430fd39e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 21 Jul 2011 09:15:05 -0700
Subject: meta: Also save/restore clip planes for GLSL.

Fixes user-clip on 965 with 3D clears enabled.  I created a separate
flag because I wanted to avoid the overhead of the matrix operations
in this path.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/drivers/common/meta.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index fa78674e4eb..26c89519679 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -90,13 +90,14 @@
 #define META_SCISSOR         0x100
 #define META_SHADER          0x200
 #define META_STENCIL_TEST    0x400
-#define META_TRANSFORM       0x800 /**< modelview, projection, clip planes */
+#define META_TRANSFORM       0x800 /**< modelview/projection matrix state */
 #define META_TEXTURE        0x1000
 #define META_VERTEX         0x2000
 #define META_VIEWPORT       0x4000
 #define META_CLAMP_FRAGMENT_COLOR 0x8000
 #define META_CLAMP_VERTEX_COLOR 0x10000
 #define META_CONDITIONAL_RENDER 0x20000
+#define META_CLIP          0x40000
 /*@}*/
 
 
@@ -165,6 +166,8 @@ struct save_state
    GLfloat ModelviewMatrix[16];
    GLfloat ProjectionMatrix[16];
    GLfloat TextureMatrix[16];
+
+   /** META_CLIP */
    GLbitfield ClipPlanesEnabled;
 
    /** META_TEXTURE */
@@ -547,6 +550,9 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       _mesa_Ortho(0.0, ctx->DrawBuffer->Width,
                   0.0, ctx->DrawBuffer->Height,
                   -1.0, 1.0);
+   }
+
+   if (state & META_CLIP) {
       save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled;
       if (ctx->Transform.ClipPlanesEnabled) {
          GLuint i;
@@ -846,7 +852,9 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_LoadMatrixf(save->ProjectionMatrix);
 
       _mesa_MatrixMode(save->MatrixMode);
+   }
 
+   if (state & META_CLIP) {
       if (save->ClipPlanesEnabled) {
          GLuint i;
          for (i = 0; i < ctx->Const.MaxClipPlanes; i++) {
@@ -1669,6 +1677,7 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
 	       META_STENCIL_TEST |
 	       META_VERTEX |
 	       META_VIEWPORT |
+	       META_CLIP |
 	       META_CLAMP_FRAGMENT_COLOR);
 
    if (!(buffers & BUFFER_BITS_COLOR)) {
@@ -1783,6 +1792,7 @@ _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY,
                           META_SHADER |
                           META_TEXTURE |
                           META_TRANSFORM |
+                          META_CLIP |
                           META_VERTEX |
                           META_VIEWPORT));
 
@@ -2104,6 +2114,7 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
                           META_SHADER |
                           META_TEXTURE |
                           META_TRANSFORM |
+                          META_CLIP |
                           META_VERTEX |
                           META_VIEWPORT |
 			  META_CLAMP_FRAGMENT_COLOR |
@@ -2313,6 +2324,7 @@ _mesa_meta_Bitmap(struct gl_context *ctx,
                           META_SHADER |
                           META_TEXTURE |
                           META_TRANSFORM |
+                          META_CLIP |
                           META_VERTEX |
                           META_VIEWPORT));
 
-- 
cgit v1.2.3


From a0e5affb22da50aeb30262f5ba0912b059d858ea Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 19 May 2011 11:02:14 -0700
Subject: i965: Use 3D clears on gen6+ to avoid inter-ring synchronization.

Improves firefox-talos-gfx around 5%.
---
 src/mesa/drivers/dri/intel/intel_clear.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index 81c062fba53..76d33f9b37e 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -116,13 +116,13 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
    }
 
    /* HW color buffers (front, back, aux, generic FBO, etc) */
-   if (colorMask == ~0) {
+   if (intel->gen < 6 && colorMask == ~0) {
       /* clear all R,G,B,A */
       blit_mask |= (mask & BUFFER_BITS_COLOR);
    }
    else {
       /* glColorMask in effect */
-      tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT));
+      tri_mask |= (mask & BUFFER_BITS_COLOR);
    }
 
    /* Make sure we have up to date buffers before we start looking at
-- 
cgit v1.2.3


From 818db3848bfaa002d0e7cf6b9b615a31eb82ba25 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 10:56:10 -0700
Subject: i965: Fix many of the trivial WebGL demos that broke due to IB
 optimization.

The index buffer state emit only occurred if there was an IB in place
and we were in either a new batch or a new IB state.  But because we
only flagged new IB state if IB state changed from the last IB state
we calculated, we could simply never emit IB state after batchbuffer
wraps if the first draw didn't use the IB and we didn't actually
change the IB.

Fixes piglit glx-multi-context-ib-1.
---
 src/mesa/drivers/dri/i965/brw_vtbl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 55dbd4fa8b0..40360b23fff 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -213,6 +213,7 @@ static void brw_new_batch( struct intel_context *intel )
    brw->state_batch_count = 0;
 
    brw->vb.nr_current_buffers = 0;
+   brw->ib.type = -1;
 
    /* Mark that the current program cache BO has been used by the GPU.
     * It will be reallocated if we need to put new programs in for the
-- 
cgit v1.2.3


From 84f8548dfcc7de55e162359e2e39af2614903cbe Mon Sep 17 00:00:00 2001
From: Tobias Droste <tdroste@gmx.de>
Date: Mon, 18 Jul 2011 07:14:06 +0200
Subject: r300/compiler: simplify code in peephole_add_presub_add
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Tobias Droste <tdroste@gmx.de>
Signed-off-by: Marek Olšák <maraeo@gmail.com>
---
 .../drivers/dri/r300/compiler/radeon_optimize.c    | 35 +++++++++++-----------
 1 file changed, 18 insertions(+), 17 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index b24274259f4..39dcb21d4f4 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -561,28 +561,29 @@ static int peephole_add_presub_add(
 	struct rc_instruction * inst_add)
 {
 	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
-	struct rc_src_register * src1 = NULL;
-	unsigned int i;
-
-	if (!is_presub_candidate(c, inst_add))
-		return 0;
+        unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
+        unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
 
 	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
 		return 0;
 
-	/* XXX This isn't fully implemented, is it? */
-	/*   src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
-	for (i = 0; i < 2; i++) {
-		if (inst_add->U.I.SrcReg[i].Abs)
-			return 0;
+	/* src0 and src1 can't have absolute values */
+	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
+	        return 0;
 
-		/* XXX This looks weird, but it's basically what was here before this commit (see git blame): */
-		if ((inst_add->U.I.SrcReg[i].Negate & dstmask) != dstmask && !src1) {
-			src1 = &inst_add->U.I.SrcReg[i];
-		}
-	}
+	/* presub_replace_add() assumes only one is negative */
+	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
+	        return 0;
+
+        /* if src0 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
+	        return 0;
 
-	if (!src1)
+        /* if src1 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
+	        return 0;
+
+	if (!is_presub_candidate(c, inst_add))
 		return 0;
 
 	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
@@ -615,7 +616,7 @@ static void presub_replace_inv(
  * of the add instruction must have the constatnt 1 swizzle.  This function
  * does not check const registers to see if their value is 1.0, so it should
  * be called after the constant_folding optimization.
- * @return 
+ * @return
  * 	0 if the ADD instruction is still part of the program.
  * 	1 if the ADD instruction is no longer part of the program.
  */
-- 
cgit v1.2.3


From 3daa2d97eb13f41de4cbab9301a167be85d48642 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 25 Jul 2011 15:39:03 -0700
Subject: i965/fs: Fix MRT drawing since the m0->m2 move for shader debug.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 1d89b8f1d11..eecfc92eb5b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -59,7 +59,8 @@ fs_visitor::generate_fb_write(fs_inst *inst)
 
 	 if (inst->target > 0) {
 	    /* Set the render target index for choosing BLEND_STATE. */
-	    brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
+	    brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+					   inst->base_mrf, 2),
 			      BRW_REGISTER_TYPE_UD),
 		    brw_imm_ud(inst->target));
 	 }
-- 
cgit v1.2.3


From 09916e877fc14723d7950f892e181df9f7d7f36f Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Fri, 22 Jul 2011 15:25:55 -0700
Subject: mesa: Add utility function to get base format from a GL compressed
 format

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/texcompress.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
 src/mesa/main/texcompress.h |  3 ++
 2 files changed, 91 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index d820ae92747..040be943e82 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -39,6 +39,94 @@
 #include "texcompress.h"
 
 
+/**
+ * Get the GL base format of a specified GL compressed texture format
+ *
+ * From page 232 of the OpenGL 3.3 (Compatiblity Profile) spec:
+ *
+ *     "Compressed Internal Format      Base Internal Format    Type
+ *     ---------------------------     --------------------    ---------
+ *     COMPRESSED_ALPHA                ALPHA                   Generic
+ *     COMPRESSED_LUMINANCE            LUMINANCE               Generic
+ *     COMPRESSED_LUMINANCE_ALPHA      LUMINANCE_ALPHA         Generic
+ *     COMPRESSED_INTENSITY            INTENSITY               Generic
+ *     COMPRESSED_RED                  RED                     Generic
+ *     COMPRESSED_RG                   RG                      Generic
+ *     COMPRESSED_RGB                  RGB                     Generic
+ *     COMPRESSED_RGBA                 RGBA                    Generic
+ *     COMPRESSED_SRGB                 RGB                     Generic
+ *     COMPRESSED_SRGB_ALPHA           RGBA                    Generic
+ *     COMPRESSED_SLUMINANCE           LUMINANCE               Generic
+ *     COMPRESSED_SLUMINANCE_ALPHA     LUMINANCE_ALPHA         Generic
+ *     COMPRESSED_RED_RGTC1            RED                     Specific
+ *     COMPRESSED_SIGNED_RED_RGTC1     RED                     Specific
+ *     COMPRESSED_RG_RGTC2             RG                      Specific
+ *     COMPRESSED_SIGNED_RG_RGTC2      RG                      Specific"
+ *
+ * \return
+ * The base format of \c format if \c format is a compressed format (either
+ * generic or specific.  Otherwise 0 is returned.
+ */
+GLenum
+_mesa_gl_compressed_format_base_format(GLenum format)
+{
+   switch (format) {
+   case GL_COMPRESSED_RED:
+   case GL_COMPRESSED_RED_RGTC1:
+   case GL_COMPRESSED_SIGNED_RED_RGTC1:
+      return GL_RED;
+
+   case GL_COMPRESSED_RG:
+   case GL_COMPRESSED_RG_RGTC2:
+   case GL_COMPRESSED_SIGNED_RG_RGTC2:
+      return GL_RG;
+
+   case GL_COMPRESSED_RGB:
+   case GL_COMPRESSED_SRGB:
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_RGB_FXT1_3DFX:
+   case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+      return GL_RGB;
+
+   case GL_COMPRESSED_RGBA:
+   case GL_COMPRESSED_SRGB_ALPHA:
+   case GL_COMPRESSED_RGBA_BPTC_UNORM_ARB:
+   case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB:
+   case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB:
+   case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB:
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+   case GL_COMPRESSED_RGBA_FXT1_3DFX:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+      return GL_RGBA;
+
+   case GL_COMPRESSED_ALPHA:
+      return GL_ALPHA;
+
+   case GL_COMPRESSED_LUMINANCE:
+   case GL_COMPRESSED_SLUMINANCE:
+   case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
+   case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
+      return GL_LUMINANCE;
+
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+   case GL_COMPRESSED_SLUMINANCE_ALPHA:
+   case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
+   case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
+   case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
+      return GL_LUMINANCE_ALPHA;
+
+   case GL_COMPRESSED_INTENSITY:
+      return GL_INTENSITY;
+
+   default:
+      return 0;
+   }
+}
+
 /**
  * Return list of (and count of) all specific texture compression
  * formats that are supported.
diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h
index 19b08bbadf6..922da00912d 100644
--- a/src/mesa/main/texcompress.h
+++ b/src/mesa/main/texcompress.h
@@ -33,6 +33,9 @@ struct gl_context;
 
 #if _HAVE_FULL_GL
 
+extern GLenum
+_mesa_gl_compressed_format_base_format(GLenum format);
+
 extern GLuint
 _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all);
 
-- 
cgit v1.2.3


From 143b65f7612c255f29d08392192098b1c2bf4b62 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Fri, 22 Jul 2011 15:26:24 -0700
Subject: mesa: Return the correct internal fmt when a generic compressed fmt
 was used

If an application requests a generic compressed format for a texture
and the driver does not pick a specific compressed format, return the
generic base format (e.g., GL_RGBA) for the GL_TEXTURE_INTERNAL_FORMAT
query.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=3165
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/texparam.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 4b9dcb5d3b5..c4ec29533e2 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -915,9 +915,23 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
             *params = _mesa_compressed_format_to_glenum(ctx, texFormat);
          }
          else {
-            /* return the user's requested internal format */
-            *params = img->InternalFormat;
-         }
+	    /* If the true internal format is not compressed but the user
+	     * requested a generic compressed format, we have to return the
+	     * generic base format that matches.
+	     *
+	     * From page 119 (page 129 of the PDF) of the OpenGL 1.3 spec:
+	     *
+	     *     "If no specific compressed format is available,
+	     *     internalformat is instead replaced by the corresponding base
+	     *     internal format."
+	     *
+	     * Otherwise just return the user's requested internal format
+	     */
+	    const GLenum f =
+	       _mesa_gl_compressed_format_base_format(img->InternalFormat);
+
+	    *params = (f != 0) ? f : img->InternalFormat;
+	 }
          break;
       case GL_TEXTURE_BORDER:
          *params = img->Border;
-- 
cgit v1.2.3


From b189d1635d89cd7d900e8f9a5eed88d7dc0b46cb Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Fri, 22 Jul 2011 16:45:50 -0700
Subject: mesa: Make _mesa_get_compressed_formats match the texture compression
 specs

The implementation deviated slightly from the GL_EXT_texture_sRGB spec
and from other implementations.  A giant comment block was added to
justify the somewhat odd behavior of this function.

In addition, the interface had unnecessary cruft.  The 'all' parameter
was false at all callers, so it has been removed.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/get.c         |   4 +-
 src/mesa/main/texcompress.c | 117 ++++++++++++++++++++++++++++++++++----------
 src/mesa/main/texcompress.h |   2 +-
 3 files changed, 93 insertions(+), 30 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 0492e1585c3..d32c68a53a4 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1569,11 +1569,11 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
       break;
 
    case GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB:
-      v->value_int = _mesa_get_compressed_formats(ctx, NULL, GL_FALSE);
+      v->value_int = _mesa_get_compressed_formats(ctx, NULL);
       break;
    case GL_COMPRESSED_TEXTURE_FORMATS_ARB:
       v->value_int_n.n = 
-	 _mesa_get_compressed_formats(ctx, v->value_int_n.ints, GL_FALSE);
+	 _mesa_get_compressed_formats(ctx, v->value_int_n.ints);
       ASSERT(v->value_int_n.n <= 100);
       break;
 
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 040be943e82..42bd1eee5ca 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -131,16 +131,101 @@ _mesa_gl_compressed_format_base_format(GLenum format)
  * Return list of (and count of) all specific texture compression
  * formats that are supported.
  *
+ * Some formats are \b not returned by this function.  The
+ * \c GL_COMPRESSED_TEXTURE_FORMATS query only returns formats that are
+ * "suitable for general-purpose usage."  All texture compression extensions
+ * have taken this to mean either linear RGB or linear RGBA.
+ *
+ * The GL_ARB_texture_compress_rgtc spec says:
+ *
+ *    "19) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS queries return the RGTC formats?
+ *
+ *        RESOLVED:  No.
+ *
+ *        The OpenGL 2.1 specification says "The only values returned
+ *        by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those
+ *        corresponding to formats suitable for general-purpose usage.
+ *        The renderer will not enumerate formats with restrictions that
+ *        need to be specifically understood prior to use."
+ *
+ *        Compressed textures with just red or red-green components are
+ *        not general-purpose so should not be returned by these queries
+ *        because they have restrictions.
+ *
+ *        Applications that seek to use the RGTC formats should do so
+ *        by looking for this extension's name in the string returned by
+ *        glGetString(GL_EXTENSIONS) rather than
+ *        what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS return."
+ *
+ * There is nearly identical wording in the GL_EXT_texture_compression_rgtc
+ * spec.
+ *
+ * The GL_EXT_texture_rRGB spec says:
+ *
+ *    "22) Should the new COMPRESSED_SRGB_* formats be listed in an
+ *        implementation's GL_COMPRESSED_TEXTURE_FORMATS list?
+ *
+ *        RESOLVED:  No.  Section 3.8.1 says formats listed by
+ *        GL_COMPRESSED_TEXTURE_FORMATS are "suitable for general-purpose
+ *        usage."  The non-linear distribution of red, green, and
+ *        blue for these sRGB compressed formats makes them not really
+ *        general-purpose."
+ *
+ * The GL_EXT_texture_compression_latc spec says:
+ *
+ *    "16) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS queries return the LATC formats?
+ *
+ *        RESOLVED:  No.
+ *
+ *        The OpenGL 2.1 specification says "The only values returned
+ *        by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those
+ *        corresponding to formats suitable for general-purpose usage.
+ *        The renderer will not enumerate formats with restrictions that
+ *        need to be specifically understood prior to use."
+ *
+ *        Historically, OpenGL implementation have advertised the RGB and
+ *        RGBA versions of the S3TC extensions compressed format tokens
+ *        through this mechanism.
+ *
+ *        The specification is not sufficiently clear about what "suitable
+ *        for general-purpose usage" means.  Historically that seems to mean
+ *        unsigned RGB or unsigned RGBA.  The DXT1 format supporting alpha
+ *        (GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) is not exposed in the list (at
+ *        least for NVIDIA drivers) because the alpha is always 1.0 expect
+ *        when it is 0.0 when RGB is required to be black.  NVIDIA's even
+ *        limits itself to true linear RGB or RGBA formats, specifically
+ *        not including EXT_texture_sRGB's sRGB S3TC compressed formats.
+ *
+ *        Adding luminance and luminance-alpha texture formats (and
+ *        certainly signed versions of luminance and luminance-alpha
+ *        formats!) invites potential comptaibility problems with old
+ *        applications using this mechanism since old applications are
+ *        unlikely to expect non-RGB or non-RGBA formats to be advertised
+ *        through this mechanism.  However no specific misinteractions
+ *        with old applications is known.
+ *
+ *        Applications that seek to use the LATC formats should do so
+ *        by looking for this extension's name in the string returned by
+ *        glGetString(GL_EXTENSIONS) rather than
+ *        what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS return."
+ *
+ * There is no formal spec for GL_ATI_texture_compression_3dc.  Since the
+ * formats added by this extension are luminance-alpha formats, it is
+ * reasonable to expect them to follow the same rules as
+ * GL_EXT_texture_compression_latc.  At the very least, Catalyst 11.6 does not
+ * expose the 3dc formats through this mechanism.
+ *
  * \param ctx  the GL context
  * \param formats  the resulting format list (may be NULL).
- * \param all  if true return all formats, even those with  some kind
- *             of restrictions/limitations (See GL_ARB_texture_compression
- *             spec for more info).
  *
  * \return number of formats.
  */
 GLuint
-_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all)
+_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats)
 {
    GLuint n = 0;
    if (ctx->Extensions.TDFX_texture_compression_FXT1) {
@@ -152,24 +237,15 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a
          n += 2;
       }
    }
-   /* don't return RGTC - ARB_texture_compression_rgtc query 19 */
+
    if (ctx->Extensions.EXT_texture_compression_s3tc) {
       if (formats) {
          formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
-         /* This format has some restrictions/limitations and so should
-          * not be returned via the GL_COMPRESSED_TEXTURE_FORMATS query.
-          * Specifically, all transparent pixels become black.  NVIDIA
-          * omits this format too.
-          */
-         if (all)
-             formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
          formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
          formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
       }
       else {
          n += 3;
-         if (all)
-             n += 1;
       }
    }
    if (ctx->Extensions.S3_s3tc) {
@@ -183,19 +259,6 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a
          n += 4;
       }
    }
-#if FEATURE_EXT_texture_sRGB
-   if (ctx->Extensions.EXT_texture_sRGB) {
-      if (formats) {
-         formats[n++] = GL_COMPRESSED_SRGB_S3TC_DXT1_EXT;
-         formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT;
-         formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT;
-         formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT;
-      }
-      else {
-         n += 4;
-      }
-   }
-#endif /* FEATURE_EXT_texture_sRGB */
    return n;
 
 #if FEATURE_ES1 || FEATURE_ES2
diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h
index 922da00912d..375cf90c8a2 100644
--- a/src/mesa/main/texcompress.h
+++ b/src/mesa/main/texcompress.h
@@ -37,7 +37,7 @@ extern GLenum
 _mesa_gl_compressed_format_base_format(GLenum format);
 
 extern GLuint
-_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all);
+_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats);
 
 extern gl_format
 _mesa_glenum_to_compressed_format(GLenum format);
-- 
cgit v1.2.3


From 95739f19ccc8d3915c437238ca057ddbecd193c6 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 25 Jul 2011 13:30:17 -0500
Subject: st/mesa: respect force_s3tc_enable environment variable

NOTE: This is a candidate for the 7.10 and 7.11 branches.
---
 src/mesa/state_tracker/st_extensions.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 99b231d9706..b5f6d356eb0 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -208,6 +208,15 @@ void st_init_limits(struct st_context *st)
 }
 
 
+static GLboolean st_get_s3tc_override(void)
+{
+   const char *override = _mesa_getenv("force_s3tc_enable");
+   if (override && !strcmp(override, "true"))
+      return GL_TRUE;
+   return GL_FALSE;
+}
+
+
 /**
  * Use pipe_screen::get_param() to query PIPE_CAP_ values to determine
  * which GL extensions are supported.
@@ -426,7 +435,7 @@ void st_init_extensions(struct st_context *st)
    if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA,
                                    PIPE_TEXTURE_2D, 0,
                                    PIPE_BIND_SAMPLER_VIEW) &&
-       ctx->Mesa_DXTn) {
+       (ctx->Mesa_DXTn || st_get_s3tc_override())) {
       ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE;
       ctx->Extensions.S3_s3tc = GL_TRUE;
    }
-- 
cgit v1.2.3


From 58c04435b12a104b1996fac4d3a3d345f31bd4e7 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 27 Jul 2011 12:13:37 +0200
Subject: mesa: don't forget about sampleBuffers in framebuffer visual update

Otherwise multisample will never been enabled for multisample
renderbuffers.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/framebuffer.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/mesa')

diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index e27569a6fac..23fa1b2c11e 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -548,6 +548,7 @@ _mesa_update_framebuffer_visual(struct gl_context *ctx,
             fb->Visual.rgbBits = fb->Visual.redBits
                + fb->Visual.greenBits + fb->Visual.blueBits;
             fb->Visual.samples = rb->NumSamples;
+            fb->Visual.sampleBuffers = rb->NumSamples > 0 ? 1 : 0;
             if (_mesa_get_format_color_encoding(fmt) == GL_SRGB)
                 fb->Visual.sRGBCapable = ctx->Const.sRGBCapable;
             break;
-- 
cgit v1.2.3


From 3e1fd13f605f16e8b48f3a9b71910a3c66eb84b5 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 25 Jul 2011 14:27:07 -0700
Subject: i965/gen4: Fix message parameter loading for 1D TXD sampling.

We were neglecting to load dvdx and dvdy.  v is not optional.

Fixes glslparsertests tex-grad-0[12345].frag on Broadwater/Crestline.
(We still need an execution test using sampler1D.)

NOTE: This is a candidate for the 7.11 branch.

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 9632aae64b0..b82dfd5ead4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -622,6 +622,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        * dPdx = dudx, dvdx, drdx
        * dPdy = dudy, dvdy, drdy
        *
+       * 1-arg: Does not exist.
+       *
        * 2-arg: dudx   dvdx   dudy   dvdy
        *        dPdx.x dPdx.y dPdy.x dPdy.y
        *        m4     m5     m6     m7
@@ -633,14 +635,14 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx);
 	 dPdx.reg_offset++;
-	 mlen++;
       }
+      mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
 
       for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy);
 	 dPdy.reg_offset++;
-	 mlen++;
       }
+      mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
    } else {
       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
        * instructions.  We'll need to do SIMD16 here.
-- 
cgit v1.2.3


From 15c0bc5eefc89bec537e412c02965f201fb1c011 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 25 Jul 2011 17:06:13 -0700
Subject: i965: Check actual tile offsets in Gen4 miptree workaround.

The purpose of the (irb->draw_offset & 4095) != 0 check was to ensure
that we don't have XYy offsets into a tile, since Gen4 hardware doesn't
support that.  However, it's insufficient: there are cases where
draw_offset & 4095 is 0 but we still have a Y-offset.  This leads to an
assertion failure in brw_update_renderbuffer_surface with tile_y != 0.

Instead, simply call intel_renderbuffer_tile_offsets to compute the
actual X/Y offsets and check if either are non-zero.  This makes both
the workaround and the assertion check the same things.

Fixes piglit test fbo-generatemipmap-formats, and should also fix
bugs #34009 and #39487.

NOTE: This is a candidate for stable release branches.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34009
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39487
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Chad Versace <chad@chad-versace.us>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/intel/intel_fbo.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 35be3257ab3..7d6d9f271e6 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -647,6 +647,22 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb,
    }
 }
 
+#ifndef I915
+static bool
+need_tile_offset_workaround(struct brw_context *brw,
+			    struct intel_renderbuffer *irb)
+{
+   uint32_t tile_x, tile_y;
+
+   if (brw->has_surface_tile_offset)
+      return false;
+
+   intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
+
+   return tile_x != 0 || tile_y != 0;
+}
+#endif
+
 /**
  * Called by glFramebufferTexture[123]DEXT() (and other places) to
  * prepare for rendering into texture memory.  This might be called
@@ -700,8 +716,7 @@ intel_render_texture(struct gl_context * ctx,
    intel_image->used_as_render_target = GL_TRUE;
 
 #ifndef I915
-   if (!brw_context(ctx)->has_surface_tile_offset &&
-       (irb->draw_offset & 4095) != 0) {
+   if (need_tile_offset_workaround(brw_context(ctx), irb)) {
       /* Original gen4 hardware couldn't draw to a non-tile-aligned
        * destination in a miptree unless you actually setup your
        * renderbuffer as a miptree and used the fragile
-- 
cgit v1.2.3


From f73caddd3339d284556036d031ab30ce8057a510 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 25 Jul 2011 21:13:43 -0700
Subject: i965: Remove the now unused intel_renderbuffer::draw_offset field.

The previous commit removed the last use of this field.

Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/intel/intel_fbo.c | 1 -
 src/mesa/drivers/dri/intel/intel_fbo.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 7d6d9f271e6..e48d6ef9cbd 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -606,7 +606,6 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb,
 				  zoffset,
 				  &dst_x, &dst_y);
 
-   irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp;
    irb->draw_x = dst_x;
    irb->draw_y = dst_y;
 }
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index f7f99a4f00c..2487994fde5 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -58,7 +58,6 @@ struct intel_renderbuffer
 
    /** \} */
 
-   GLuint draw_offset; /**< Offset of drawing address within the region */
    GLuint draw_x, draw_y; /**< Offset of drawing within the region */
 };
 
-- 
cgit v1.2.3


From 58d6aa82878fc901d4dadd39e308a5d88b064997 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 27 Jul 2011 15:49:39 -0600
Subject: st/mesa: fix comment language

---
 src/mesa/state_tracker/st_atom_texture.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index 800a9f1f0e0..3115a2511ce 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -221,9 +221,9 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_
 
       if ((samp->sRGBDecode == GL_SKIP_DECODE_EXT) &&
 	  (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) {
-	 /* don't do sRGB->RGB conversion.  Interpret the texture
-	  * texture data as linear values.
-	  */
+         /* Don't do sRGB->RGB conversion.  Interpret the texture data as
+          * linear values.
+          */
 	 const gl_format linearFormat =
 	    _mesa_get_srgb_format_linear(texFormat);
 	 firstImageFormat = st_mesa_format_to_pipe_format(linearFormat);
-- 
cgit v1.2.3


From 26684e0b1a857cc16a2c6f2b542e5ccf3da5acf5 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 28 Jul 2011 09:43:09 -0600
Subject: mesa: test against MESA_FORMAT_NONE in _mesa_GetTexLevelParameteriv()

---
 src/mesa/main/texparam.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index c4ec29533e2..3f771f08bc6 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -888,7 +888,7 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
    texObj = _mesa_select_tex_object(ctx, texUnit, target);
 
    img = _mesa_select_tex_image(ctx, texObj, target, level);
-   if (!img || !img->TexFormat) {
+   if (!img || img->TexFormat == MESA_FORMAT_NONE) {
       /* undefined texture image */
       if (pname == GL_TEXTURE_COMPONENTS)
          *params = 1;
-- 
cgit v1.2.3


From e4fdc95277bd323d8945e20635d3a1702a2e695d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 28 Jul 2011 09:51:30 -0600
Subject: mesa: fix format selection for meta CopyTexSubImage()

When we do a glReadPixels into the temporary buffer, we don't want to
use GL_LUMINANCE, GL_LUMINANCE_ALPHA or GL_INTENSITY since they will
compute L=R+G+B which is not what we want.

This bug has existed all along but was only exposed by the elimination
of the driver hook for glCopyTexImage() in
5874890c26f434f54e9218b83fae4eb8175c24e9.

Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39604
Tested-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/common/meta.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 26c89519679..f9b4755988b 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -2869,6 +2869,16 @@ copy_tex_sub_image(struct gl_context *ctx,
 
    /* Choose format/type for temporary image buffer */
    format = _mesa_get_format_base_format(texImage->TexFormat);
+   if (format == GL_LUMINANCE ||
+       format == GL_LUMINANCE_ALPHA ||
+       format == GL_INTENSITY) {
+      /* We don't want to use GL_LUMINANCE, GL_INTENSITY, etc. for the
+       * temp image buffer because glReadPixels will do L=R+G+B which is
+       * not what we want (should be L=R).
+       */
+      format = GL_RGBA;
+   }
+
    type = get_temp_image_type(ctx, format);
    bpp = _mesa_bytes_per_pixel(format, type);
    if (bpp <= 0) {
-- 
cgit v1.2.3


From 83f5d5e6aa58754f52c3579c27d810c497fe13a3 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 18:42:21 -0700
Subject: Add dependency generation for Mesa and GLSL dricore objects.

Reviewed-By: Christopher James Halse Rogers
	     <christopher.halse.rogers@canonical.com>
---
 src/glsl/Makefile | 1 +
 src/mesa/Makefile | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 005b51d724b..c20a6c9edd9 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -164,6 +164,7 @@ depend: $(ALL_SOURCES) Makefile
 	rm -f depend
 	touch depend
 	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(ALL_SOURCES) 2> /dev/null
+	$(MKDEP) $(MKDEP_OPTIONS) -a -p $(DRICORE_OBJ_DIR)/ $(INCLUDES) $(ALL_SOURCES) 2> /dev/null
 
 # Remove .o and backup files
 clean: clean-dricore
diff --git a/src/mesa/Makefile b/src/mesa/Makefile
index a903a260ac9..88f31b68695 100644
--- a/src/mesa/Makefile
+++ b/src/mesa/Makefile
@@ -12,11 +12,10 @@ DRICORE_OBJ_DIR := objs-dricore
 include sources.mak
 
 # adjust object dirs
+DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS))
 MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS))
 MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS))
 
-DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS))
-
 # define preprocessor flags
 MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES)
 
@@ -124,6 +123,8 @@ depend: $(ALL_SOURCES)
 	@ touch depend
 	@$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \
 		$(ALL_SOURCES) > /dev/null 2>/dev/null
+	@$(MKDEP) $(MKDEP_OPTIONS) -a -p$(DRICORE_OBJ_DIR)/ $(MESA_CPPFLAGS) \
+		$(ALL_SOURCES) > /dev/null 2>/dev/null
 
 ######################################################################
 # Installation rules
-- 
cgit v1.2.3


From ef1854d09021b6601e59e39fcb71a88fb5e5efb2 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 13 Jul 2011 14:24:41 -0700
Subject: mesa: Fix ff fragment shader inputs calculation when enabling a VS.

The FF VS generation happens just after the FF FS generation in
state.c, so the ctx->VP._Current value is for the previous state
update's vertex shader, not the one that will be chosen as a result of
this state update.  The vertexShader and vertexProgram variables
should be accurately telling us whether there's going to be a
ctx->VP._Current (except on _MaintainTnlProgram drivers, where it's
always true).

The glsl-vs-statechange-1 test was created to test for this, but it
turns out that the bug is hidden by the fact that we call
_mesa_update_state() twice per draw call -- once from
_mesa_valid_to_render() and once from vbo_draw_arrays(), and the
second one was fixing up the first one.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp
index 0b53c28f7ae..dbfa6b57d4d 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -330,8 +330,7 @@ static GLbitfield get_fp_input_mask( struct gl_context *ctx )
       /* _NEW_RENDERMODE */
       fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
    }
-   else if (!(vertexProgram || vertexShader) ||
-            !ctx->VertexProgram._Current) {
+   else if (!(vertexProgram || vertexShader)) {
       /* Fixed function vertex logic */
       /* _NEW_ARRAY */
       GLbitfield varying_inputs = ctx->varying_vp_inputs;
-- 
cgit v1.2.3


From 4fdd289805d14d4f7a234f88cd375be1b3b96764 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 25 Jul 2011 18:50:43 -0700
Subject: i965/fs: Respect ARB_color_buffer_float clamping.

This was done in the old codegen path, but not the new one.  Caught by
piglit fbo tests after the conversion to GLSL ff_fragment_shader.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index b82dfd5ead4..4f599fb477e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1745,6 +1745,7 @@ void
 fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
 {
    int reg_width = c->dispatch_width / 8;
+   fs_inst *inst;
 
    if (c->dispatch_width == 8 || intel->gen == 6) {
       /* SIMD8 write looks like:
@@ -1763,8 +1764,10 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
        * m + 6: a0
        * m + 7: a1
        */
-      emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width),
-	   color);
+      inst = emit(BRW_OPCODE_MOV,
+		  fs_reg(MRF, first_color_mrf + index * reg_width),
+		  color);
+      inst->saturate = c->key.clamp_fragment_color;
    } else {
       /* pre-gen6 SIMD16 single source DP write looks like:
        * m + 0: r0
@@ -1782,16 +1785,22 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
 	  * usual destination + 1 for the second half we get
 	  * destination + 4.
 	  */
-	 emit(BRW_OPCODE_MOV,
-	      fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color);
+	 inst = emit(BRW_OPCODE_MOV,
+		     fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index),
+		     color);
+	 inst->saturate = c->key.clamp_fragment_color;
       } else {
 	 push_force_uncompressed();
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
+	 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index),
+		     color);
+	 inst->saturate = c->key.clamp_fragment_color;
 	 pop_force_uncompressed();
 
 	 push_force_sechalf();
 	 color.sechalf = true;
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
+	 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4),
+		     color);
+	 inst->saturate = c->key.clamp_fragment_color;
 	 pop_force_sechalf();
 	 color.sechalf = false;
       }
-- 
cgit v1.2.3


From 44ffb4ae207e48f78fae55925601b8708ed09c1d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 29 Jul 2011 11:52:39 -0700
Subject: i965/fs: Stop using the exec_list iterator.

The old style has gone out of favor in the project, but I kept copy
and pasting from existing iterator code.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp               | 70 ++++++++++------------
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp          |  4 +-
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  | 16 ++---
 .../dri/i965/brw_fs_schedule_instructions.cpp      | 16 ++---
 .../drivers/dri/i965/brw_fs_vector_splitting.cpp   | 16 ++---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       | 16 ++---
 6 files changed, 67 insertions(+), 71 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b5ea943387d..15475fbae2f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -621,8 +621,8 @@ fs_visitor::assign_curb_setup()
    }
 
    /* Map the offsets in the UNIFORM file to fixed HW regs. */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == UNIFORM) {
@@ -684,8 +684,8 @@ fs_visitor::assign_urb_setup()
    /* Offset all the urb_setup[] index by the actual position of the
     * setup regs, now that the location of the constants has been chosen.
     */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode == FS_OPCODE_LINTERP) {
 	 assert(inst->src[2].file == FIXED_HW_REG);
@@ -739,8 +739,8 @@ fs_visitor::split_virtual_grfs()
       split_grf[this->delta_x.reg] = false;
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       /* Texturing produces 4 contiguous registers, so no splitting. */
       if (inst->is_tex()) {
@@ -763,8 +763,8 @@ fs_visitor::split_virtual_grfs()
       }
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->dst.file == GRF &&
 	  split_grf[inst->dst.reg] &&
@@ -815,8 +815,8 @@ fs_visitor::setup_pull_constants()
    int pull_uniform_base = max_uniform_components;
    int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base;
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (int i = 0; i < 3; i++) {
 	 if (inst->src[i].file != UNIFORM)
@@ -871,8 +871,8 @@ fs_visitor::calculate_live_intervals()
    }
 
    int ip = 0;
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode == BRW_OPCODE_DO) {
 	 if (loop_depth++ == 0)
@@ -945,8 +945,8 @@ fs_visitor::propagate_constants()
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode != BRW_OPCODE_MOV ||
 	  inst->predicated ||
@@ -965,11 +965,9 @@ fs_visitor::propagate_constants()
       /* Found a move of a constant to a GRF.  Find anything else using the GRF
        * before it's written, and replace it with the constant if we can.
        */
-      exec_list_iterator scan_iter = iter;
-      scan_iter.next();
-      for (; scan_iter.has_next(); scan_iter.next()) {
-	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+      for (fs_inst *scan_inst = (fs_inst *)inst->next;
+	   !scan_inst->is_tail_sentinel();
+	   scan_inst = (fs_inst *)scan_inst->next) {
 	 if (scan_inst->opcode == BRW_OPCODE_DO ||
 	     scan_inst->opcode == BRW_OPCODE_WHILE ||
 	     scan_inst->opcode == BRW_OPCODE_ELSE ||
@@ -1077,8 +1075,8 @@ fs_visitor::dead_code_eliminate()
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
 	 inst->remove();
@@ -1101,8 +1099,8 @@ fs_visitor::register_coalesce()
    int if_depth = 0;
    int loop_depth = 0;
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       /* Make sure that we dominate the instructions we're going to
        * scan for interfering with our coalescing, or we won't have
@@ -1141,11 +1139,10 @@ fs_visitor::register_coalesce()
        * program.
        */
       bool interfered = false;
-      exec_list_iterator scan_iter = iter;
-      scan_iter.next();
-      for (; scan_iter.has_next(); scan_iter.next()) {
-	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
 
+      for (fs_inst *scan_inst = (fs_inst *)inst->next;
+	   !scan_inst->is_tail_sentinel();
+	   scan_inst = (fs_inst *)scan_inst->next) {
 	 if (scan_inst->dst.file == GRF) {
 	    if (scan_inst->dst.reg == inst->dst.reg &&
 		(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
@@ -1176,10 +1173,9 @@ fs_visitor::register_coalesce()
       /* Rewrite the later usage to point at the source of the move to
        * be removed.
        */
-      for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
-	   scan_iter.next()) {
-	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+      for (fs_inst *scan_inst = inst;
+	   !scan_inst->is_tail_sentinel();
+	   scan_inst = (fs_inst *)scan_inst->next) {
 	 for (int i = 0; i < 3; i++) {
 	    if (scan_inst->src[i].file == GRF &&
 		scan_inst->src[i].reg == inst->dst.reg &&
@@ -1212,8 +1208,8 @@ fs_visitor::compute_to_mrf()
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       int ip = next_ip;
       next_ip++;
@@ -1392,8 +1388,8 @@ fs_visitor::remove_duplicate_mrf_writes()
 
    memset(last_mrf_move, 0, sizeof(last_mrf_move));
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       switch (inst->opcode) {
       case BRW_OPCODE_DO:
@@ -1527,8 +1523,8 @@ fs_visitor::run()
       /* Generate FS IR for main().  (the visitor only descends into
        * functions called "main").
        */
-      foreach_iter(exec_list_iterator, iter, *shader->ir) {
-	 ir_instruction *ir = (ir_instruction *)iter.get();
+      foreach_list(node, &*shader->ir) {
+	 ir_instruction *ir = (ir_instruction *)node;
 	 base_ir = ir;
 	 this->result = reg_undef;
 	 ir->accept(this);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index eecfc92eb5b..9fb0153d1f8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -609,8 +609,8 @@ fs_visitor::generate_code()
 	     prog->Name, c->dispatch_width);
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
       struct brw_reg src[3], dst;
 
       if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index b4689d2c293..78daa491156 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -75,8 +75,8 @@ fs_visitor::assign_regs_trivial()
    last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] *
 				       reg_width);
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       assign_reg(hw_reg_mapping, &inst->dst, reg_width);
       assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
@@ -283,8 +283,8 @@ fs_visitor::assign_regs()
 			    reg_width);
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       assign_reg(hw_reg_mapping, &inst->dst, reg_width);
       assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
@@ -336,8 +336,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
     * spill/unspill we'll have to do, and guess that the insides of
     * loops run 10 times.
     */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF) {
@@ -394,8 +394,8 @@ fs_visitor::spill_reg(int spill_reg)
     * virtual grf of the same size.  For most instructions, though, we
     * could just spill/unspill the GRF being accessed.
     */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF &&
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index d8218c26edb..9ec3f502764 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -283,8 +283,8 @@ instruction_scheduler::calculate_deps()
    memset(last_mrf_write, 0, sizeof(last_mrf_write));
 
    /* top-to-bottom dependencies: RAW and WAW. */
-   foreach_iter(exec_list_iterator, iter, instructions) {
-      schedule_node *n = (schedule_node *)iter.get();
+   foreach_list(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
       fs_inst *inst = n->inst;
 
       /* read-after-write deps. */
@@ -437,8 +437,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
    int time = 0;
 
    /* Remove non-DAG heads from the list. */
-   foreach_iter(exec_list_iterator, iter, instructions) {
-      schedule_node *n = (schedule_node *)iter.get();
+   foreach_list_safe(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
       if (n->parent_count != 0)
 	 n->remove();
    }
@@ -447,8 +447,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
       schedule_node *chosen = NULL;
       int chosen_time = 0;
 
-      foreach_iter(exec_list_iterator, iter, instructions) {
-	 schedule_node *n = (schedule_node *)iter.get();
+      foreach_list(node, &instructions) {
+	 schedule_node *n = (schedule_node *)node;
 
 	 if (!chosen || n->unblocked_time < chosen_time) {
 	    chosen = n;
@@ -490,8 +490,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
        * progress until the first is done.
        */
       if (chosen->inst->is_math()) {
-	 foreach_iter(exec_list_iterator, iter, instructions) {
-	    schedule_node *n = (schedule_node *)iter.get();
+	 foreach_list(node, &instructions) {
+	    schedule_node *n = (schedule_node *)node;
 
 	    if (n->inst->is_math())
 	       n->unblocked_time = MAX2(n->unblocked_time,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 530ffa26580..a9a60c2fd8a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -122,8 +122,8 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
       break;
    }
 
-   foreach_iter(exec_list_iterator, iter, this->variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list(node, &this->variable_list) {
+      variable_entry *entry = (variable_entry *)node;
       if (entry->var == var)
 	 return entry;
    }
@@ -222,8 +222,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
    if (!var->type->is_vector())
       return NULL;
 
-   foreach_iter(exec_list_iterator, iter, *this->variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list(node, &*this->variable_list) {
+      variable_entry *entry = (variable_entry *)node;
       if (entry->var == var) {
 	 return entry;
       }
@@ -341,8 +341,8 @@ brw_do_vector_splitting(exec_list *instructions)
    visit_list_elements(&refs, instructions);
 
    /* Trim out variables we can't split. */
-   foreach_iter(exec_list_iterator, iter, refs.variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list_safe(node, &refs.variable_list) {
+      variable_entry *entry = (variable_entry *)node;
 
       if (debug) {
 	 printf("vector %s@%p: decl %d, whole_access %d\n",
@@ -363,8 +363,8 @@ brw_do_vector_splitting(exec_list *instructions)
    /* Replace the decls of the vectors to be split with their split
     * components.
     */
-   foreach_iter(exec_list_iterator, iter, refs.variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list(node, &refs.variable_list) {
+      variable_entry *entry = (variable_entry *)node;
       const struct glsl_type *type;
       type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 4f599fb477e..2b769ccbba1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1477,8 +1477,8 @@ fs_visitor::visit(ir_if *ir)
       inst->predicated = true;
    }
 
-   foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
+   foreach_list(node, &ir->then_instructions) {
+      ir_instruction *ir = (ir_instruction *)node;
       this->base_ir = ir;
       this->result = reg_undef;
       ir->accept(this);
@@ -1487,8 +1487,8 @@ fs_visitor::visit(ir_if *ir)
    if (!ir->else_instructions.is_empty()) {
       emit(BRW_OPCODE_ELSE);
 
-      foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
-	 ir_instruction *ir = (ir_instruction *)iter.get();
+      foreach_list(node, &ir->else_instructions) {
+	 ir_instruction *ir = (ir_instruction *)node;
 	 this->base_ir = ir;
 	 this->result = reg_undef;
 	 ir->accept(this);
@@ -1538,8 +1538,8 @@ fs_visitor::visit(ir_loop *ir)
       inst->predicated = true;
    }
 
-   foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
+   foreach_list(node, &ir->body_instructions) {
+      ir_instruction *ir = (ir_instruction *)node;
 
       this->base_ir = ir;
       this->result = reg_undef;
@@ -1595,8 +1595,8 @@ fs_visitor::visit(ir_function *ir)
 
       assert(sig);
 
-      foreach_iter(exec_list_iterator, iter, sig->body) {
-	 ir_instruction *ir = (ir_instruction *)iter.get();
+      foreach_list(node, &sig->body) {
+	 ir_instruction *ir = (ir_instruction *)node;
 	 this->base_ir = ir;
 	 this->result = reg_undef;
 	 ir->accept(this);
-- 
cgit v1.2.3


From 652ef8569c923cf8e1e254dddc160c7995d258aa Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 15:48:53 -0700
Subject: Revert "i965: Don't compute brw->wm.input_size_masks when it's
 unused."

This reverts commit 3412069e23b7fa5656262f3dd1aa86f66980594d.  We're
about to start using it in fragment shaders to handle avoiding
projection for fixed function.
---
 src/mesa/drivers/dri/i965/brw_vs_constval.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c
index 9fdfebe9f76..47cc0a7da7a 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
@@ -194,19 +194,11 @@ static void calc_wm_input_sizes( struct brw_context *brw )
    /* BRW_NEW_VERTEX_PROGRAM */
    const struct brw_vertex_program *vp =
       brw_vertex_program_const(brw->vertex_program);
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
    /* BRW_NEW_INPUT_DIMENSIONS */
    struct tracker t;
    GLuint insn;
    GLuint i;
 
-   /* If we're going to go through brw_fs.cpp, we don't end up using
-    * brw->wm.input_size_masks.
-    */
-   if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
-      return;
-
    memset(&t, 0, sizeof(t));
 
    /* _NEW_LIGHT */
@@ -246,9 +238,7 @@ static void calc_wm_input_sizes( struct brw_context *brw )
 const struct brw_tracked_state brw_wm_input_sizes = {
    .dirty = {
       .mesa  = _NEW_LIGHT,
-      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
-		BRW_NEW_VERTEX_PROGRAM |
-		BRW_NEW_INPUT_DIMENSIONS),
+      .brw   = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
       .cache = 0
    },
    .prepare = calc_wm_input_sizes
-- 
cgit v1.2.3


From eb30820f268608cf451da32de69723036dddbc62 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 15:56:46 -0700
Subject: i965/fs: Port texture projection avoidance optimization from the old
 backend.

This is part of fixing a ~1% performance regression in OpenArena when
changing the fixed function fragment shader to using the new backend.
Right now this just avoids the LINTERP of the projector, not the math
using it.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 15475fbae2f..9c3180fbc1c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -463,9 +463,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
 	 } else {
 	    /* Perspective interpolation case. */
 	    for (unsigned int k = 0; k < type->vector_elements; k++) {
-	       struct brw_reg interp = interp_reg(location, k);
-	       emit(FS_OPCODE_LINTERP, attr,
-		    this->delta_x, this->delta_y, fs_reg(interp));
+	       /* FINISHME: At some point we probably want to push
+		* this farther by giving similar treatment to the
+		* other potentially constant components of the
+		* attribute, as well as making brw_vs_constval.c
+		* handle varyings other than gl_TexCoord.
+		*/
+	       if (location >= FRAG_ATTRIB_TEX0 &&
+		   location <= FRAG_ATTRIB_TEX7 &&
+		   k == 3 && !(c->key.proj_attrib_mask & (1 << location))) {
+		  emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f));
+	       } else {
+		  struct brw_reg interp = interp_reg(location, k);
+		  emit(FS_OPCODE_LINTERP, attr,
+		       this->delta_x, this->delta_y, fs_reg(interp));
+	       }
 	       attr.reg_offset++;
 	    }
 
-- 
cgit v1.2.3


From 6d8d6b41b85a18685351f3023a4cd41266ba9e68 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 16:18:39 -0700
Subject: i965/fs: If we see a RCP of a constant, try to constant fold it.

---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 9c3180fbc1c..351d1dd283e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1056,6 +1056,20 @@ fs_visitor::propagate_constants()
 		  progress = true;
 	       }
 	       break;
+
+	    case FS_OPCODE_RCP:
+	       /* The hardware doesn't do math on immediate values
+		* (because why are you doing that, seriously?), but
+		* the correct answer is to just constant fold it
+		* anyway.
+		*/
+	       assert(i == 0);
+	       if (inst->src[0].imm.f != 0.0f) {
+		  scan_inst->opcode = BRW_OPCODE_MOV;
+		  scan_inst->src[0] = inst->src[0];
+		  progress = true;
+	       }
+	       break;
 	    }
 	 }
 
-- 
cgit v1.2.3


From a8b86459a1bb74cfdf0d63572a9fe194b2b5b53f Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 16:45:15 -0700
Subject: i965/fs: Optimize a * 1.0 -> a.

This appears in our instruction stream as a result of the
brw_vs_constval.c handling.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 43 ++++++++++++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
 2 files changed, 44 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 351d1dd283e..a9617c56e12 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1067,6 +1067,7 @@ fs_visitor::propagate_constants()
 	       if (inst->src[0].imm.f != 0.0f) {
 		  scan_inst->opcode = BRW_OPCODE_MOV;
 		  scan_inst->src[0] = inst->src[0];
+		  scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
 		  progress = true;
 	       }
 	       break;
@@ -1087,6 +1088,47 @@ fs_visitor::propagate_constants()
 
    return progress;
 }
+
+
+/**
+ * Attempts to move immediate constants into the immediate
+ * constant slot of following instructions.
+ *
+ * Immediate constants are a bit tricky -- they have to be in the last
+ * operand slot, you can't do abs/negate on them,
+ */
+
+bool
+fs_visitor::opt_algebraic()
+{
+   bool progress = false;
+
+   calculate_live_intervals();
+
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MUL:
+	 if (inst->src[1].file != IMM)
+	    continue;
+
+	 /* a * 1.0 = a */
+	 if (inst->src[1].type == BRW_REGISTER_TYPE_F &&
+	     inst->src[1].imm.f == 1.0) {
+	    inst->opcode = BRW_OPCODE_MOV;
+	    inst->src[1] = reg_undef;
+	    progress = true;
+	    break;
+	 }
+
+	 break;
+      }
+   }
+
+   return progress;
+}
+
 /**
  * Must be called after calculate_live_intervales() to remove unused
  * writes to registers -- register allocation will fail otherwise
@@ -1572,6 +1614,7 @@ fs_visitor::run()
 	 progress = remove_duplicate_mrf_writes() || progress;
 
 	 progress = propagate_constants() || progress;
+	 progress = opt_algebraic() || progress;
 	 progress = register_coalesce() || progress;
 	 progress = compute_to_mrf() || progress;
 	 progress = dead_code_eliminate() || progress;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 2bf850e5dea..89d6cda7e4f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -485,6 +485,7 @@ public:
    void setup_pull_constants();
    void calculate_live_intervals();
    bool propagate_constants();
+   bool opt_algebraic();
    bool register_coalesce();
    bool compute_to_mrf();
    bool dead_code_eliminate();
-- 
cgit v1.2.3


From f710b8c7501f29f5f8941e757ea1066cbeb03305 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 16:52:54 -0700
Subject: i965/fs: Allow register coalescing where the source is a uniform.

Removes 0.8% of the fragment shader instructions on Unigine Tropics.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a9617c56e12..e07798cebc1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1196,7 +1196,8 @@ fs_visitor::register_coalesce()
       if (inst->opcode != BRW_OPCODE_MOV ||
 	  inst->predicated ||
 	  inst->saturate ||
-	  inst->dst.file != GRF || inst->src[0].file != GRF ||
+	  inst->dst.file != GRF || (inst->src[0].file != GRF &&
+				    inst->src[0].file != UNIFORM)||
 	  inst->dst.type != inst->src[0].type)
 	 continue;
 
@@ -1218,7 +1219,8 @@ fs_visitor::register_coalesce()
 	       interfered = true;
 	       break;
 	    }
-	    if (scan_inst->dst.reg == inst->src[0].reg &&
+	    if (inst->src[0].file == GRF &&
+		scan_inst->dst.reg == inst->src[0].reg &&
 		(scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
 		 scan_inst->is_tex())) {
 	       interfered = true;
@@ -1226,10 +1228,13 @@ fs_visitor::register_coalesce()
 	    }
 	 }
 
-	 /* The gen6 MATH instruction can't handle source modifiers, so avoid
-	  * coalescing those for now.  We should do something more specific.
+	 /* The gen6 MATH instruction can't handle source modifiers or
+	  * unusual register regions, so avoid coalescing those for
+	  * now.  We should do something more specific.
 	  */
-	 if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) {
+	 if (intel->gen >= 6 &&
+	     scan_inst->is_math() &&
+	     (has_source_modifiers || inst->src[0].file == UNIFORM)) {
 	    interfered = true;
 	    break;
 	 }
@@ -1248,11 +1253,10 @@ fs_visitor::register_coalesce()
 	    if (scan_inst->src[i].file == GRF &&
 		scan_inst->src[i].reg == inst->dst.reg &&
 		scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
-	       scan_inst->src[i].reg = inst->src[0].reg;
-	       scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
-	       scan_inst->src[i].abs |= inst->src[0].abs;
-	       scan_inst->src[i].negate ^= inst->src[0].negate;
-	       scan_inst->src[i].smear = inst->src[0].smear;
+	       fs_reg new_src = inst->src[0];
+	       new_src.negate ^= scan_inst->src[i].negate;
+	       new_src.abs |= scan_inst->src[i].abs;
+	       scan_inst->src[i] = new_src;
 	    }
 	 }
       }
-- 
cgit v1.2.3


From dc1f32deae1ab7366792fe5c7d654e02757985c0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 29 Jul 2011 16:49:55 -0600
Subject: mesa: add missing breaks for GL_TEXTURE_CUBE_MAP_SEAMLESS queries

And fix indentation.

NOTE: This is a candidate for the 7.11 branch.
---
 src/mesa/main/texparam.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 3f771f08bc6..134f15346e8 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1266,12 +1266,13 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          break;
 
       case GL_TEXTURE_CUBE_MAP_SEAMLESS:
-      if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
-         *params = (GLfloat) obj->Sampler.CubeMapSeamless;
-      }
-      else {
-         error = GL_TRUE;
-      }
+         if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
+            *params = (GLfloat) obj->Sampler.CubeMapSeamless;
+         }
+         else {
+            error = GL_TRUE;
+         }
+         break;
 
       default:
 	 error = GL_TRUE;
@@ -1441,6 +1442,7 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
          else {
             error = GL_TRUE;
          }
+         break;
 
       default:
          ; /* silence warnings */
-- 
cgit v1.2.3


From 120d71a45cfda1edfa8cd6b1732e209eb98b53d8 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 29 Jul 2011 16:49:55 -0600
Subject: mesa: minor comment changes in teximage.c

---
 src/mesa/main/teximage.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 302fd65cb27..27717cfb0f5 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1,6 +1,5 @@
 /*
- * mesa 3-D graphics library
- * Version:  7.6
+ * Mesa 3-D graphics library
  *
  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
  * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
@@ -556,8 +555,6 @@ _mesa_tex_target_to_face(GLenum target)
  * \param target texture target.
  * \param level image level.
  * \param texImage texture image.
- * 
- * This was basically prompted by the introduction of cube maps.
  */
 void
 _mesa_set_tex_image(struct gl_texture_object *tObj,
@@ -709,15 +706,13 @@ get_proxy_target(GLenum target)
 
 /**
  * Get the texture object that corresponds to the target of the given
- * texture unit.
+ * texture unit.  The target should have already been checked for validity.
  *
  * \param ctx GL context.
  * \param texUnit texture unit.
  * \param target texture target.
  *
  * \return pointer to the texture object on success, or NULL on failure.
- * 
- * \sa gl_texture_unit.
  */
 struct gl_texture_object *
 _mesa_select_tex_object(struct gl_context *ctx,
-- 
cgit v1.2.3


From f379d8f73063a4c4d6cf379318c6b37118d46bfa Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 25 Apr 2011 23:37:47 -0500
Subject: st/mesa: Add a GLSL IR to TGSI translator.

It is still a work in progress at this point, but it produces working and
reasonably well-optimized code.

Originally based on ir_to_mesa and st_mesa_to_tgsi, but does not directly use
Mesa IR instructions in TGSI generation, instead generating TGSI from the
intermediate class glsl_to_tgsi_instruction.  It also has new optimization
passes to replace _mesa_optimize_program.
---
 src/mesa/sources.mak                       |    3 +-
 src/mesa/state_tracker/st_cb_program.c     |   14 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4431 ++++++++++++++++++++++++++++
 src/mesa/state_tracker/st_glsl_to_tgsi.h   |   66 +
 src/mesa/state_tracker/st_mesa_to_tgsi.c   |    4 +-
 src/mesa/state_tracker/st_mesa_to_tgsi.h   |    6 +
 src/mesa/state_tracker/st_program.c        |  399 +--
 src/mesa/state_tracker/st_program.h        |   27 +
 8 files changed, 4767 insertions(+), 183 deletions(-)
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi.h

(limited to 'src/mesa')

diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index 4b2ec08bbb0..ed008f8813e 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -336,7 +336,8 @@ MESA_GALLIUM_SOURCES = \
 
 MESA_GALLIUM_CXX_SOURCES = \
 	$(MAIN_CXX_SOURCES) \
-	$(SHADER_CXX_SOURCES)
+	$(SHADER_CXX_SOURCES) \
+	state_tracker/st_glsl_to_tgsi.cpp
 
 # All the core C sources, for dependency checking
 ALL_SOURCES = \
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index 32694975d17..2abb4d8f082 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -44,6 +44,7 @@
 #include "st_program.h"
 #include "st_mesa_to_tgsi.h"
 #include "st_cb_program.h"
+#include "st_glsl_to_tgsi.h"
 
 
@@ -129,6 +130,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
       {
          struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
          st_release_vp_variants( st, stvp );
+         
+         if (stvp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
       }
       break;
    case MESA_GEOMETRY_PROGRAM:
@@ -137,6 +141,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
             (struct st_geometry_program *) prog;
 
          st_release_gp_variants(st, stgp);
+         
+         if (stgp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi);
 
          if (stgp->tgsi.tokens) {
             st_free_tokens((void *) stgp->tgsi.tokens);
@@ -151,6 +158,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
 
          st_release_fp_variants(st, stfp);
          
+         if (stfp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
+         
          if (stfp->tgsi.tokens) {
             st_free_tokens(stfp->tgsi.tokens);
             stfp->tgsi.tokens = NULL;
@@ -242,4 +252,8 @@ st_init_program_functions(struct dd_function_table *functions)
    functions->DeleteProgram = st_delete_program;
    functions->IsProgramNative = st_is_program_native;
    functions->ProgramStringNotify = st_program_string_notify;
+   
+   functions->NewShader = st_new_shader;
+   functions->NewShaderProgram = st_new_shader_program;
+   functions->LinkShader = st_link_shader;
 }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
new file mode 100644
index 00000000000..e1102503ee0
--- /dev/null
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -0,0 +1,4431 @@
+/*
+ * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2011 Bryan Cain
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file glsl_to_tgsi.cpp
+ *
+ * Translate GLSL IR to Mesa's gl_program representation and to TGSI.
+ */
+
+#include <stdio.h>
+#include "main/compiler.h"
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_print_visitor.h"
+#include "ir_expression_flattening.h"
+#include "glsl_types.h"
+#include "glsl_parser_extras.h"
+#include "../glsl/program.h"
+#include "ir_optimization.h"
+#include "ast.h"
+
+extern "C" {
+#include "main/mtypes.h"
+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "program/hash_table.h"
+#include "program/prog_instruction.h"
+#include "program/prog_optimize.h"
+#include "program/prog_print.h"
+#include "program/program.h"
+#include "program/prog_uniform.h"
+#include "program/prog_parameter.h"
+#include "program/sampler.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_dump.h"
+#include "st_context.h"
+#include "st_program.h"
+#include "st_glsl_to_tgsi.h"
+#include "st_mesa_to_tgsi.h"
+
+#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
+                           (1 << PROGRAM_ENV_PARAM) |    \
+                           (1 << PROGRAM_STATE_VAR) |    \
+                           (1 << PROGRAM_NAMED_PARAM) |  \
+                           (1 << PROGRAM_CONSTANT) |     \
+                           (1 << PROGRAM_UNIFORM))
+}
+
+class st_src_reg;
+class st_dst_reg;
+
+static int swizzle_for_size(int size);
+
+/**
+ * This struct is a corresponding struct to Mesa prog_src_register, with
+ * wider fields.
+ */
+class st_src_reg {
+public:
+   st_src_reg(gl_register_file file, int index, const glsl_type *type)
+   {
+      this->file = file;
+      this->index = index;
+      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+         this->swizzle = swizzle_for_size(type->vector_elements);
+      else
+         this->swizzle = SWIZZLE_XYZW;
+      this->negate = 0;
+      this->reladdr = NULL;
+   }
+
+   st_src_reg()
+   {
+      this->file = PROGRAM_UNDEFINED;
+      this->index = 0;
+      this->swizzle = 0;
+      this->negate = 0;
+      this->reladdr = NULL;
+   }
+
+   explicit st_src_reg(st_dst_reg reg);
+
+   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+   GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
+   int negate; /**< NEGATE_XYZW mask from mesa */
+   /** Register index should be offset by the integer in this reg. */
+   st_src_reg *reladdr;
+};
+
+class st_dst_reg {
+public:
+   st_dst_reg(gl_register_file file, int writemask)
+   {
+      this->file = file;
+      this->index = 0;
+      this->writemask = writemask;
+      this->cond_mask = COND_TR;
+      this->reladdr = NULL;
+   }
+
+   st_dst_reg()
+   {
+      this->file = PROGRAM_UNDEFINED;
+      this->index = 0;
+      this->writemask = 0;
+      this->cond_mask = COND_TR;
+      this->reladdr = NULL;
+   }
+
+   explicit st_dst_reg(st_src_reg reg);
+
+   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+   GLuint cond_mask:4;
+   /** Register index should be offset by the integer in this reg. */
+   st_src_reg *reladdr;
+};
+
+st_src_reg::st_src_reg(st_dst_reg reg)
+{
+   this->file = reg.file;
+   this->index = reg.index;
+   this->swizzle = SWIZZLE_XYZW;
+   this->negate = 0;
+   this->reladdr = NULL;
+}
+
+st_dst_reg::st_dst_reg(st_src_reg reg)
+{
+   this->file = reg.file;
+   this->index = reg.index;
+   this->writemask = WRITEMASK_XYZW;
+   this->cond_mask = COND_TR;
+   this->reladdr = reg.reladdr;
+}
+
+class glsl_to_tgsi_instruction : public exec_node {
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   enum prog_opcode op;
+   st_dst_reg dst;
+   st_src_reg src[3];
+   /** Pointer to the ir source this tree came from for debugging */
+   ir_instruction *ir;
+   GLboolean cond_update;
+   bool saturate;
+   int sampler; /**< sampler index */
+   int tex_target; /**< One of TEXTURE_*_INDEX */
+   GLboolean tex_shadow;
+
+   class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
+};
+
+class variable_storage : public exec_node {
+public:
+   variable_storage(ir_variable *var, gl_register_file file, int index)
+      : file(file), index(index), var(var)
+   {
+      /* empty */
+   }
+
+   gl_register_file file;
+   int index;
+   ir_variable *var; /* variable that maps to this, if any */
+};
+
+class function_entry : public exec_node {
+public:
+   ir_function_signature *sig;
+
+   /**
+    * identifier of this function signature used by the program.
+    *
+    * At the point that Mesa instructions for function calls are
+    * generated, we don't know the address of the first instruction of
+    * the function body.  So we make the BranchTarget that is called a
+    * small integer and rewrite them during set_branchtargets().
+    */
+   int sig_id;
+
+   /**
+    * Pointer to first instruction of the function body.
+    *
+    * Set during function body emits after main() is processed.
+    */
+   glsl_to_tgsi_instruction *bgn_inst;
+
+   /**
+    * Index of the first instruction of the function body in actual
+    * Mesa IR.
+    *
+    * Set after convertion from glsl_to_tgsi_instruction to prog_instruction.
+    */
+   int inst;
+
+   /** Storage for the return value. */
+   st_src_reg return_reg;
+};
+
+class glsl_to_tgsi_visitor : public ir_visitor {
+public:
+   glsl_to_tgsi_visitor();
+   ~glsl_to_tgsi_visitor();
+
+   function_entry *current_function;
+
+   struct gl_context *ctx;
+   struct gl_program *prog;
+   struct gl_shader_program *shader_program;
+   struct gl_shader_compiler_options *options;
+
+   int next_temp;
+   
+   int num_address_regs;
+   bool indirect_addr_temps;
+   bool indirect_addr_consts;
+
+   variable_storage *find_variable_storage(ir_variable *var);
+
+   function_entry *get_function_signature(ir_function_signature *sig);
+
+   st_src_reg get_temp(const glsl_type *type);
+   void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
+
+   st_src_reg st_src_reg_for_float(float val);
+
+   /**
+    * \name Visit methods
+    *
+    * As typical for the visitor pattern, there must be one \c visit method for
+    * each concrete subclass of \c ir_instruction.  Virtual base classes within
+    * the hierarchy should not have \c visit methods.
+    */
+   /*@{*/
+   virtual void visit(ir_variable *);
+   virtual void visit(ir_loop *);
+   virtual void visit(ir_loop_jump *);
+   virtual void visit(ir_function_signature *);
+   virtual void visit(ir_function *);
+   virtual void visit(ir_expression *);
+   virtual void visit(ir_swizzle *);
+   virtual void visit(ir_dereference_variable  *);
+   virtual void visit(ir_dereference_array *);
+   virtual void visit(ir_dereference_record *);
+   virtual void visit(ir_assignment *);
+   virtual void visit(ir_constant *);
+   virtual void visit(ir_call *);
+   virtual void visit(ir_return *);
+   virtual void visit(ir_discard *);
+   virtual void visit(ir_texture *);
+   virtual void visit(ir_if *);
+   /*@}*/
+
+   st_src_reg result;
+
+   /** List of variable_storage */
+   exec_list variables;
+
+   /** List of function_entry */
+   exec_list function_signatures;
+   int next_signature_id;
+
+   /** List of glsl_to_tgsi_instruction */
+   exec_list instructions;
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+        		        st_dst_reg dst, st_src_reg src0);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+        		        st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+        		        st_dst_reg dst,
+        		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
+
+   /**
+    * Emit the correct dot-product instruction for the type of arguments
+    */
+   void emit_dp(ir_instruction *ir,
+                st_dst_reg dst,
+                st_src_reg src0,
+                st_src_reg src1,
+                unsigned elements);
+
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+        	    st_dst_reg dst, st_src_reg src0);
+
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+        	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+
+   void emit_scs(ir_instruction *ir, enum prog_opcode op,
+        	 st_dst_reg dst, const st_src_reg &src);
+
+   GLboolean try_emit_mad(ir_expression *ir,
+        		  int mul_operand);
+   GLboolean try_emit_sat(ir_expression *ir);
+
+   void emit_swz(ir_expression *ir);
+
+   bool process_move_condition(ir_rvalue *ir);
+
+   void rename_temp_register(int index, int new_index);
+   int get_first_temp_read(int index);
+   int get_first_temp_write(int index);
+   int get_last_temp_read(int index);
+   int get_last_temp_write(int index);
+
+   void copy_propagate(void);
+   void eliminate_dead_code(void);
+   void merge_registers(void);
+   void renumber_registers(void);
+
+   void *mem_ctx;
+};
+
+static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL);
+
+static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
+
+static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
+
+static void
+fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
+
+static void
+fail_link(struct gl_shader_program *prog, const char *fmt, ...)
+{
+   va_list args;
+   va_start(args, fmt);
+   ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
+   va_end(args);
+
+   prog->LinkStatus = GL_FALSE;
+}
+
+static int
+swizzle_for_size(int size)
+{
+   int size_swizzles[4] = {
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+   };
+
+   assert((size >= 1) && (size <= 4));
+   return size_swizzles[size - 1];
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+        		 st_dst_reg dst,
+        		 st_src_reg src0, st_src_reg src1, st_src_reg src2)
+{
+   glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
+   int num_reladdr = 0, i;
+
+   /* If we have to do relative addressing, we want to load the ARL
+    * reg directly for one of the regs, and preload the other reladdr
+    * sources into temps.
+    */
+   num_reladdr += dst.reladdr != NULL;
+   num_reladdr += src0.reladdr != NULL;
+   num_reladdr += src1.reladdr != NULL;
+   num_reladdr += src2.reladdr != NULL;
+
+   reladdr_to_temp(ir, &src2, &num_reladdr);
+   reladdr_to_temp(ir, &src1, &num_reladdr);
+   reladdr_to_temp(ir, &src0, &num_reladdr);
+
+   if (dst.reladdr) {
+      emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
+      num_reladdr--;
+   }
+   assert(num_reladdr == 0);
+
+   inst->op = op;
+   inst->dst = dst;
+   inst->src[0] = src0;
+   inst->src[1] = src1;
+   inst->src[2] = src2;
+   inst->ir = ir;
+
+   inst->function = NULL;
+   
+   if (op == OPCODE_ARL)
+      this->num_address_regs = 1;
+   
+   /* Update indirect addressing status used by TGSI */
+   if (dst.reladdr) {
+      switch(dst.file) {
+      case PROGRAM_TEMPORARY:
+         this->indirect_addr_temps = true;
+         break;
+      case PROGRAM_LOCAL_PARAM:
+      case PROGRAM_ENV_PARAM:
+      case PROGRAM_STATE_VAR:
+      case PROGRAM_NAMED_PARAM:
+      case PROGRAM_CONSTANT:
+      case PROGRAM_UNIFORM:
+         this->indirect_addr_consts = true;
+         break;
+      default:
+         break;
+      }
+   }
+   else {
+      for (i=0; i<3; i++) {
+         if(inst->src[i].reladdr) {
+            switch(dst.file) {
+            case PROGRAM_TEMPORARY:
+               this->indirect_addr_temps = true;
+               break;
+            case PROGRAM_LOCAL_PARAM:
+            case PROGRAM_ENV_PARAM:
+            case PROGRAM_STATE_VAR:
+            case PROGRAM_NAMED_PARAM:
+            case PROGRAM_CONSTANT:
+            case PROGRAM_UNIFORM:
+               this->indirect_addr_consts = true;
+               break;
+            default:
+               break;
+            }
+         }
+      }
+   }
+
+   this->instructions.push_tail(inst);
+   
+   return inst;
+}
+
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+        		 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
+{
+   return emit(ir, op, dst, src0, src1, undef_src);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+        		 st_dst_reg dst, st_src_reg src0)
+{
+   assert(dst.writemask != 0);
+   return emit(ir, op, dst, src0, undef_src, undef_src);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op)
+{
+   return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
+}
+
+void
+glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
+        		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
+        		    unsigned elements)
+{
+   static const gl_inst_opcode dot_opcodes[] = {
+      OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
+   };
+
+   emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+}
+
+/**
+ * Emits Mesa scalar opcodes to produce unique answers across channels.
+ *
+ * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
+ * channel determines the result across all channels.  So to do a vec4
+ * of this operation, we want to emit a scalar per source channel used
+ * to produce dest channels.
+ */
+void
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
+        		        st_dst_reg dst,
+        			st_src_reg orig_src0, st_src_reg orig_src1)
+{
+   int i, j;
+   int done_mask = ~dst.writemask;
+
+   /* Mesa RCP is a scalar operation splatting results to all channels,
+    * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
+    * dst channels.
+    */
+   for (i = 0; i < 4; i++) {
+      GLuint this_mask = (1 << i);
+      glsl_to_tgsi_instruction *inst;
+      st_src_reg src0 = orig_src0;
+      st_src_reg src1 = orig_src1;
+
+      if (done_mask & this_mask)
+         continue;
+
+      GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
+      GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
+      for (j = i + 1; j < 4; j++) {
+         /* If there is another enabled component in the destination that is
+          * derived from the same inputs, generate its value on this pass as
+          * well.
+          */
+         if (!(done_mask & (1 << j)) &&
+             GET_SWZ(src0.swizzle, j) == src0_swiz &&
+             GET_SWZ(src1.swizzle, j) == src1_swiz) {
+            this_mask |= (1 << j);
+         }
+      }
+      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+        			   src0_swiz, src0_swiz);
+      src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
+        			  src1_swiz, src1_swiz);
+
+      inst = emit(ir, op, dst, src0, src1);
+      inst->dst.writemask = this_mask;
+      done_mask |= this_mask;
+   }
+}
+
+void
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
+        		        st_dst_reg dst, st_src_reg src0)
+{
+   st_src_reg undef = undef_src;
+
+   undef.swizzle = SWIZZLE_XXXX;
+
+   emit_scalar(ir, op, dst, src0, undef);
+}
+
+/**
+ * Emit an OPCODE_SCS instruction
+ *
+ * The \c SCS opcode functions a bit differently than the other Mesa (or
+ * ARB_fragment_program) opcodes.  Instead of splatting its result across all
+ * four components of the destination, it writes one value to the \c x
+ * component and another value to the \c y component.
+ *
+ * \param ir        IR instruction being processed
+ * \param op        Either \c OPCODE_SIN or \c OPCODE_COS depending on which
+ *                  value is desired.
+ * \param dst       Destination register
+ * \param src       Source register
+ */
+void
+glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
+        		     st_dst_reg dst,
+        		     const st_src_reg &src)
+{
+   /* Vertex programs cannot use the SCS opcode.
+    */
+   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
+      emit_scalar(ir, op, dst, src);
+      return;
+   }
+
+   const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
+   const unsigned scs_mask = (1U << component);
+   int done_mask = ~dst.writemask;
+   st_src_reg tmp;
+
+   assert(op == OPCODE_SIN || op == OPCODE_COS);
+
+   /* If there are compnents in the destination that differ from the component
+    * that will be written by the SCS instrution, we'll need a temporary.
+    */
+   if (scs_mask != unsigned(dst.writemask)) {
+      tmp = get_temp(glsl_type::vec4_type);
+   }
+
+   for (unsigned i = 0; i < 4; i++) {
+      unsigned this_mask = (1U << i);
+      st_src_reg src0 = src;
+
+      if ((done_mask & this_mask) != 0)
+         continue;
+
+      /* The source swizzle specified which component of the source generates
+       * sine / cosine for the current component in the destination.  The SCS
+       * instruction requires that this value be swizzle to the X component.
+       * Replace the current swizzle with a swizzle that puts the source in
+       * the X component.
+       */
+      unsigned src0_swiz = GET_SWZ(src.swizzle, i);
+
+      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+        			   src0_swiz, src0_swiz);
+      for (unsigned j = i + 1; j < 4; j++) {
+         /* If there is another enabled component in the destination that is
+          * derived from the same inputs, generate its value on this pass as
+          * well.
+          */
+         if (!(done_mask & (1 << j)) &&
+             GET_SWZ(src0.swizzle, j) == src0_swiz) {
+            this_mask |= (1 << j);
+         }
+      }
+
+      if (this_mask != scs_mask) {
+         glsl_to_tgsi_instruction *inst;
+         st_dst_reg tmp_dst = st_dst_reg(tmp);
+
+         /* Emit the SCS instruction.
+          */
+         inst = emit(ir, OPCODE_SCS, tmp_dst, src0);
+         inst->dst.writemask = scs_mask;
+
+         /* Move the result of the SCS instruction to the desired location in
+          * the destination.
+          */
+         tmp.swizzle = MAKE_SWIZZLE4(component, component,
+        			     component, component);
+         inst = emit(ir, OPCODE_SCS, dst, tmp);
+         inst->dst.writemask = this_mask;
+      } else {
+         /* Emit the SCS instruction to write directly to the destination.
+          */
+         glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0);
+         inst->dst.writemask = scs_mask;
+      }
+
+      done_mask |= this_mask;
+   }
+}
+
+struct st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
+{
+   st_src_reg src(PROGRAM_CONSTANT, -1, NULL);
+
+   src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
+        				  &val, 1, &src.swizzle);
+
+   return src;
+}
+
+static int
+type_size(const struct glsl_type *type)
+{
+   unsigned int i;
+   int size;
+
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      if (type->is_matrix()) {
+         return type->matrix_columns;
+      } else {
+         /* Regardless of size of vector, it gets a vec4. This is bad
+          * packing for things like floats, but otherwise arrays become a
+          * mess.  Hopefully a later pass over the code can pack scalars
+          * down if appropriate.
+          */
+         return 1;
+      }
+   case GLSL_TYPE_ARRAY:
+      assert(type->length > 0);
+      return type_size(type->fields.array) * type->length;
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i < type->length; i++) {
+         size += type_size(type->fields.structure[i].type);
+      }
+      return size;
+   case GLSL_TYPE_SAMPLER:
+      /* Samplers take up one slot in UNIFORMS[], but they're baked in
+       * at link time.
+       */
+      return 1;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+/**
+ * In the initial pass of codegen, we assign temporary numbers to
+ * intermediate results.  (not SSA -- variable assignments will reuse
+ * storage).  Actual register allocation for the Mesa VM occurs in a
+ * pass over the Mesa IR later.
+ */
+st_src_reg
+glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
+{
+   st_src_reg src;
+   int swizzle[4];
+   int i;
+
+   src.file = PROGRAM_TEMPORARY;
+   src.index = next_temp;
+   src.reladdr = NULL;
+   next_temp += type_size(type);
+
+   if (type->is_array() || type->is_record()) {
+      src.swizzle = SWIZZLE_NOOP;
+   } else {
+      for (i = 0; i < type->vector_elements; i++)
+         swizzle[i] = i;
+      for (; i < 4; i++)
+         swizzle[i] = type->vector_elements - 1;
+      src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
+        			  swizzle[2], swizzle[3]);
+   }
+   src.negate = 0;
+
+   return src;
+}
+
+variable_storage *
+glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
+{
+   
+   variable_storage *entry;
+
+   foreach_iter(exec_list_iterator, iter, this->variables) {
+      entry = (variable_storage *)iter.get();
+
+      if (entry->var == var)
+         return entry;
+   }
+
+   return NULL;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_variable *ir)
+{
+   if (strcmp(ir->name, "gl_FragCoord") == 0) {
+      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+
+      fp->OriginUpperLeft = ir->origin_upper_left;
+      fp->PixelCenterInteger = ir->pixel_center_integer;
+
+   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
+      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+      switch (ir->depth_layout) {
+      case ir_depth_layout_none:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
+         break;
+      case ir_depth_layout_any:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
+         break;
+      case ir_depth_layout_greater:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
+         break;
+      case ir_depth_layout_less:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
+         break;
+      case ir_depth_layout_unchanged:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
+         break;
+      default:
+         assert(0);
+         break;
+      }
+   }
+
+   if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
+      unsigned int i;
+      const ir_state_slot *const slots = ir->state_slots;
+      assert(ir->state_slots != NULL);
+
+      /* Check if this statevar's setup in the STATE file exactly
+       * matches how we'll want to reference it as a
+       * struct/array/whatever.  If not, then we need to move it into
+       * temporary storage and hope that it'll get copy-propagated
+       * out.
+       */
+      for (i = 0; i < ir->num_state_slots; i++) {
+         if (slots[i].swizzle != SWIZZLE_XYZW) {
+            break;
+         }
+      }
+
+      struct variable_storage *storage;
+      st_dst_reg dst;
+      if (i == ir->num_state_slots) {
+         /* We'll set the index later. */
+         storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
+         this->variables.push_tail(storage);
+
+         dst = undef_dst;
+      } else {
+         /* The variable_storage constructor allocates slots based on the size
+          * of the type.  However, this had better match the number of state
+          * elements that we're going to copy into the new temporary.
+          */
+         assert((int) ir->num_state_slots == type_size(ir->type));
+
+         storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
+        					 this->next_temp);
+         this->variables.push_tail(storage);
+         this->next_temp += type_size(ir->type);
+
+         dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
+      }
+
+
+      for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+         int index = _mesa_add_state_reference(this->prog->Parameters,
+        				       (gl_state_index *)slots[i].tokens);
+
+         if (storage->file == PROGRAM_STATE_VAR) {
+            if (storage->index == -1) {
+               storage->index = index;
+            } else {
+               assert(index == storage->index + (int)i);
+            }
+         } else {
+            st_src_reg src(PROGRAM_STATE_VAR, index, NULL);
+            src.swizzle = slots[i].swizzle;
+            emit(ir, OPCODE_MOV, dst, src);
+            /* even a float takes up a whole vec4 reg in a struct/array. */
+            dst.index++;
+         }
+      }
+
+      if (storage->file == PROGRAM_TEMPORARY &&
+          dst.index != storage->index + (int) ir->num_state_slots) {
+         fail_link(this->shader_program,
+        	   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
+        	   ir->name, dst.index - storage->index,
+        	   type_size(ir->type));
+      }
+   }
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_loop *ir)
+{
+   ir_dereference_variable *counter = NULL;
+
+   if (ir->counter != NULL)
+      counter = new(ir) ir_dereference_variable(ir->counter);
+
+   if (ir->from != NULL) {
+      assert(ir->counter != NULL);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
+
+      a->accept(this);
+      delete a;
+   }
+
+   emit(NULL, OPCODE_BGNLOOP);
+
+   if (ir->to) {
+      ir_expression *e =
+         new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
+        		       counter, ir->to);
+      ir_if *if_stmt =  new(ir) ir_if(e);
+
+      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
+
+      if_stmt->then_instructions.push_tail(brk);
+
+      if_stmt->accept(this);
+
+      delete if_stmt;
+      delete e;
+      delete brk;
+   }
+
+   visit_exec_list(&ir->body_instructions, this);
+
+   if (ir->increment) {
+      ir_expression *e =
+         new(ir) ir_expression(ir_binop_add, counter->type,
+        		       counter, ir->increment);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
+
+      a->accept(this);
+      delete a;
+      delete e;
+   }
+
+   emit(NULL, OPCODE_ENDLOOP);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
+{
+   switch (ir->mode) {
+   case ir_loop_jump::jump_break:
+      emit(NULL, OPCODE_BRK);
+      break;
+   case ir_loop_jump::jump_continue:
+      emit(NULL, OPCODE_CONT);
+      break;
+   }
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
+{
+   assert(0);
+   (void)ir;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_function *ir)
+{
+   /* Ignore function bodies other than main() -- we shouldn't see calls to
+    * them since they should all be inlined before we get to glsl_to_tgsi.
+    */
+   if (strcmp(ir->name, "main") == 0) {
+      const ir_function_signature *sig;
+      exec_list empty;
+
+      sig = ir->matching_signature(&empty);
+
+      assert(sig);
+
+      foreach_iter(exec_list_iterator, iter, sig->body) {
+         ir_instruction *ir = (ir_instruction *)iter.get();
+
+         ir->accept(this);
+      }
+   }
+}
+
+GLboolean
+glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
+{
+   int nonmul_operand = 1 - mul_operand;
+   st_src_reg a, b, c;
+
+   ir_expression *expr = ir->operands[mul_operand]->as_expression();
+   if (!expr || expr->operation != ir_binop_mul)
+      return false;
+
+   expr->operands[0]->accept(this);
+   a = this->result;
+   expr->operands[1]->accept(this);
+   b = this->result;
+   ir->operands[nonmul_operand]->accept(this);
+   c = this->result;
+
+   this->result = get_temp(ir->type);
+   emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c);
+
+   return true;
+}
+
+GLboolean
+glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
+{
+   /* Saturates were only introduced to vertex programs in
+    * NV_vertex_program3, so don't give them to drivers in the VP.
+    */
+   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
+      return false;
+
+   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+   if (!sat_src)
+      return false;
+
+   sat_src->accept(this);
+   st_src_reg src = this->result;
+
+   this->result = get_temp(ir->type);
+   glsl_to_tgsi_instruction *inst;
+   inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src);
+   inst->saturate = true;
+
+   return true;
+}
+
+void
+glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
+        			    st_src_reg *reg, int *num_reladdr)
+{
+   if (!reg->reladdr)
+      return;
+
+   emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
+
+   if (*num_reladdr != 1) {
+      st_src_reg temp = get_temp(glsl_type::vec4_type);
+
+      emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg);
+      *reg = temp;
+   }
+
+   (*num_reladdr)--;
+}
+
+void
+glsl_to_tgsi_visitor::emit_swz(ir_expression *ir)
+{
+   /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
+    * This means that each of the operands is either an immediate value of -1,
+    * 0, or 1, or is a component from one source register (possibly with
+    * negation).
+    */
+   uint8_t components[4] = { 0 };
+   bool negate[4] = { false };
+   ir_variable *var = NULL;
+
+   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
+      ir_rvalue *op = ir->operands[i];
+
+      assert(op->type->is_scalar());
+
+      while (op != NULL) {
+         switch (op->ir_type) {
+         case ir_type_constant: {
+
+            assert(op->type->is_scalar());
+
+            const ir_constant *const c = op->as_constant();
+            if (c->is_one()) {
+               components[i] = SWIZZLE_ONE;
+            } else if (c->is_zero()) {
+               components[i] = SWIZZLE_ZERO;
+            } else if (c->is_negative_one()) {
+               components[i] = SWIZZLE_ONE;
+               negate[i] = true;
+            } else {
+               assert(!"SWZ constant must be 0.0 or 1.0.");
+            }
+
+            op = NULL;
+            break;
+         }
+
+         case ir_type_dereference_variable: {
+            ir_dereference_variable *const deref =
+               (ir_dereference_variable *) op;
+
+            assert((var == NULL) || (deref->var == var));
+            components[i] = SWIZZLE_X;
+            var = deref->var;
+            op = NULL;
+            break;
+         }
+
+         case ir_type_expression: {
+            ir_expression *const expr = (ir_expression *) op;
+
+            assert(expr->operation == ir_unop_neg);
+            negate[i] = true;
+
+            op = expr->operands[0];
+            break;
+         }
+
+         case ir_type_swizzle: {
+            ir_swizzle *const swiz = (ir_swizzle *) op;
+
+            components[i] = swiz->mask.x;
+            op = swiz->val;
+            break;
+         }
+
+         default:
+            assert(!"Should not get here.");
+            return;
+         }
+      }
+   }
+
+   assert(var != NULL);
+
+   ir_dereference_variable *const deref =
+      new(mem_ctx) ir_dereference_variable(var);
+
+   this->result.file = PROGRAM_UNDEFINED;
+   deref->accept(this);
+   if (this->result.file == PROGRAM_UNDEFINED) {
+      ir_print_visitor v;
+      printf("Failed to get tree for expression operand:\n");
+      deref->accept(&v);
+      exit(1);
+   }
+
+   st_src_reg src;
+
+   src = this->result;
+   src.swizzle = MAKE_SWIZZLE4(components[0],
+        		       components[1],
+        		       components[2],
+        		       components[3]);
+   src.negate = ((unsigned(negate[0]) << 0)
+        	 | (unsigned(negate[1]) << 1)
+        	 | (unsigned(negate[2]) << 2)
+        	 | (unsigned(negate[3]) << 3));
+
+   /* Storage for our result.  Ideally for an assignment we'd be using the
+    * actual storage for the result here, instead.
+    */
+   const st_src_reg result_src = get_temp(ir->type);
+   st_dst_reg result_dst = st_dst_reg(result_src);
+
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+   emit(ir, OPCODE_SWZ, result_dst, src);
+   this->result = result_src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_expression *ir)
+{
+   unsigned int operand;
+   st_src_reg op[Elements(ir->operands)];
+   st_src_reg result_src;
+   st_dst_reg result_dst;
+
+   /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
+    */
+   if (ir->operation == ir_binop_add) {
+      if (try_emit_mad(ir, 1))
+         return;
+      if (try_emit_mad(ir, 0))
+         return;
+   }
+   if (try_emit_sat(ir))
+      return;
+
+   if (ir->operation == ir_quadop_vector) {
+      this->emit_swz(ir);
+      return;
+   }
+
+   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      this->result.file = PROGRAM_UNDEFINED;
+      ir->operands[operand]->accept(this);
+      if (this->result.file == PROGRAM_UNDEFINED) {
+         ir_print_visitor v;
+         printf("Failed to get tree for expression operand:\n");
+         ir->operands[operand]->accept(&v);
+         exit(1);
+      }
+      op[operand] = this->result;
+
+      /* Matrix expression operands should have been broken down to vector
+       * operations already.
+       */
+      assert(!ir->operands[operand]->type->is_matrix());
+   }
+
+   int vector_elements = ir->operands[0]->type->vector_elements;
+   if (ir->operands[1]) {
+      vector_elements = MAX2(vector_elements,
+        		     ir->operands[1]->type->vector_elements);
+   }
+
+   this->result.file = PROGRAM_UNDEFINED;
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = get_temp(ir->type);
+   /* convenience for the emit functions below. */
+   result_dst = st_dst_reg(result_src);
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+   switch (ir->operation) {
+   case ir_unop_logic_not:
+      emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0));
+      break;
+   case ir_unop_neg:
+      op[0].negate = ~op[0].negate;
+      result_src = op[0];
+      break;
+   case ir_unop_abs:
+      emit(ir, OPCODE_ABS, result_dst, op[0]);
+      break;
+   case ir_unop_sign:
+      emit(ir, OPCODE_SSG, result_dst, op[0]);
+      break;
+   case ir_unop_rcp:
+      emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
+      break;
+
+   case ir_unop_exp2:
+      emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
+      break;
+   case ir_unop_exp:
+   case ir_unop_log:
+      assert(!"not reached: should be handled by ir_explog_to_explog2");
+      break;
+   case ir_unop_log2:
+      emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
+      break;
+   case ir_unop_sin:
+      emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos:
+      emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
+      break;
+   case ir_unop_sin_reduced:
+      emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos_reduced:
+      emit_scs(ir, OPCODE_COS, result_dst, op[0]);
+      break;
+
+   case ir_unop_dFdx:
+      emit(ir, OPCODE_DDX, result_dst, op[0]);
+      break;
+   case ir_unop_dFdy:
+      emit(ir, OPCODE_DDY, result_dst, op[0]);
+      break;
+
+   case ir_unop_noise: {
+      const enum prog_opcode opcode =
+         prog_opcode(OPCODE_NOISE1
+        	     + (ir->operands[0]->type->vector_elements) - 1);
+      assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
+
+      emit(ir, opcode, result_dst, op[0]);
+      break;
+   }
+
+   case ir_binop_add:
+      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_sub:
+      emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_mul:
+      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_div:
+      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+   case ir_binop_mod:
+      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      break;
+
+   case ir_binop_less:
+      emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_greater:
+      emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_lequal:
+      emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_gequal:
+      emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_equal:
+      emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_nequal:
+      emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_all_equal:
+      /* "==" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+          ir->operands[1]->type->is_vector()) {
+         st_src_reg temp = get_temp(glsl_type::vec4_type);
+         emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         emit_dp(ir, result_dst, temp, temp, vector_elements);
+         emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
+      } else {
+         emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
+      }
+      break;
+   case ir_binop_any_nequal:
+      /* "!=" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+          ir->operands[1]->type->is_vector()) {
+         st_src_reg temp = get_temp(glsl_type::vec4_type);
+         emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         emit_dp(ir, result_dst, temp, temp, vector_elements);
+         emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      } else {
+         emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+      }
+      break;
+
+   case ir_unop_any:
+      assert(ir->operands[0]->type->is_vector());
+      emit_dp(ir, result_dst, op[0], op[0],
+              ir->operands[0]->type->vector_elements);
+      emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      break;
+
+   case ir_binop_logic_xor:
+      emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_logic_or:
+      /* This could be a saturated add and skip the SNE. */
+      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+      emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      break;
+
+   case ir_binop_logic_and:
+      /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
+      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_dot:
+      assert(ir->operands[0]->type->is_vector());
+      assert(ir->operands[0]->type == ir->operands[1]->type);
+      emit_dp(ir, result_dst, op[0], op[1],
+              ir->operands[0]->type->vector_elements);
+      break;
+
+   case ir_unop_sqrt:
+      /* sqrt(x) = x * rsq(x). */
+      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
+      emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
+      /* For incoming channels <= 0, set the result to 0. */
+      op[0].negate = ~op[0].negate;
+      emit(ir, OPCODE_CMP, result_dst,
+        		  op[0], result_src, st_src_reg_for_float(0.0));
+      break;
+   case ir_unop_rsq:
+      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
+      break;
+   case ir_unop_i2f:
+   case ir_unop_b2f:
+   case ir_unop_b2i:
+      /* Mesa IR lacks types, ints are stored as truncated floats. */
+      result_src = op[0];
+      break;
+   case ir_unop_f2i:
+      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
+      break;
+   case ir_unop_f2b:
+   case ir_unop_i2b:
+      emit(ir, OPCODE_SNE, result_dst,
+        		  op[0], st_src_reg_for_float(0.0));
+      break;
+   case ir_unop_trunc:
+      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
+      break;
+   case ir_unop_ceil:
+      op[0].negate = ~op[0].negate;
+      emit(ir, OPCODE_FLR, result_dst, op[0]);
+      result_src.negate = ~result_src.negate;
+      break;
+   case ir_unop_floor:
+      emit(ir, OPCODE_FLR, result_dst, op[0]);
+      break;
+   case ir_unop_fract:
+      emit(ir, OPCODE_FRC, result_dst, op[0]);
+      break;
+
+   case ir_binop_min:
+      emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_max:
+      emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_pow:
+      emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
+      break;
+
+   case ir_unop_bit_not:
+   case ir_unop_u2f:
+   case ir_binop_lshift:
+   case ir_binop_rshift:
+   case ir_binop_bit_and:
+   case ir_binop_bit_xor:
+   case ir_binop_bit_or:
+   case ir_unop_round_even:
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+
+   case ir_quadop_vector:
+      /* This operation should have already been handled.
+       */
+      assert(!"Should not get here.");
+      break;
+   }
+
+   this->result = result_src;
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
+{
+   st_src_reg src;
+   int i;
+   int swizzle[4];
+
+   /* Note that this is only swizzles in expressions, not those on the left
+    * hand side of an assignment, which do write masking.  See ir_assignment
+    * for that.
+    */
+
+   ir->val->accept(this);
+   src = this->result;
+   assert(src.file != PROGRAM_UNDEFINED);
+
+   for (i = 0; i < 4; i++) {
+      if (i < ir->type->vector_elements) {
+         switch (i) {
+         case 0:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
+            break;
+         case 1:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
+            break;
+         case 2:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
+            break;
+         case 3:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
+            break;
+         }
+      } else {
+         /* If the type is smaller than a vec4, replicate the last
+          * channel out.
+          */
+         swizzle[i] = swizzle[ir->type->vector_elements - 1];
+      }
+   }
+
+   src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+   this->result = src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
+{
+   variable_storage *entry = find_variable_storage(ir->var);
+   ir_variable *var = ir->var;
+
+   if (!entry) {
+      switch (var->mode) {
+      case ir_var_uniform:
+         entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
+        				       var->location);
+         this->variables.push_tail(entry);
+         break;
+      case ir_var_in:
+      case ir_var_inout:
+         /* The linker assigns locations for varyings and attributes,
+          * including deprecated builtins (like gl_Color), user-assign
+          * generic attributes (glBindVertexLocation), and
+          * user-defined varyings.
+          *
+          * FINISHME: We would hit this path for function arguments.  Fix!
+          */
+         assert(var->location != -1);
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_INPUT,
+                                               var->location);
+         if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
+             var->location >= VERT_ATTRIB_GENERIC0) {
+            _mesa_add_attribute(this->prog->Attributes,
+                                var->name,
+                                _mesa_sizeof_glsl_type(var->type->gl_type),
+                                var->type->gl_type,
+                                var->location - VERT_ATTRIB_GENERIC0);
+         }
+         break;
+      case ir_var_out:
+         assert(var->location != -1);
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_OUTPUT,
+                                               var->location);
+         break;
+      case ir_var_system_value:
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_SYSTEM_VALUE,
+                                               var->location);
+         break;
+      case ir_var_auto:
+      case ir_var_temporary:
+         entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
+        				       this->next_temp);
+         this->variables.push_tail(entry);
+
+         next_temp += type_size(var->type);
+         break;
+      }
+
+      if (!entry) {
+         printf("Failed to make storage for %s\n", var->name);
+         exit(1);
+      }
+   }
+
+   this->result = st_src_reg(entry->file, entry->index, var->type);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
+{
+   ir_constant *index;
+   st_src_reg src;
+   int element_size = type_size(ir->type);
+
+   index = ir->array_index->constant_expression_value();
+
+   ir->array->accept(this);
+   src = this->result;
+
+   if (index) {
+      src.index += index->value.i[0] * element_size;
+   } else {
+      st_src_reg array_base = this->result;
+      /* Variable index array dereference.  It eats the "vec4" of the
+       * base of the array and an index that offsets the Mesa register
+       * index.
+       */
+      ir->array_index->accept(this);
+
+      st_src_reg index_reg;
+
+      if (element_size == 1) {
+         index_reg = this->result;
+      } else {
+         index_reg = get_temp(glsl_type::float_type);
+
+         emit(ir, OPCODE_MUL, st_dst_reg(index_reg),
+              this->result, st_src_reg_for_float(element_size));
+      }
+
+      src.reladdr = ralloc(mem_ctx, st_src_reg);
+      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      src.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      src.swizzle = SWIZZLE_NOOP;
+
+   this->result = src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
+{
+   unsigned int i;
+   const glsl_type *struct_type = ir->record->type;
+   int offset = 0;
+
+   ir->record->accept(this);
+
+   for (i = 0; i < struct_type->length; i++) {
+      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+         break;
+      offset += type_size(struct_type->fields.structure[i].type);
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      this->result.swizzle = SWIZZLE_NOOP;
+
+   this->result.index += offset;
+}
+
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static st_dst_reg
+get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
+{
+   /* The LHS must be a dereference.  If the LHS is a variable indexed array
+    * access of a vector, it must be separated into a series conditional moves
+    * before reaching this point (see ir_vec_index_to_cond_assign).
+    */
+   assert(ir->as_dereference());
+   ir_dereference_array *deref_array = ir->as_dereference_array();
+   if (deref_array) {
+      assert(!deref_array->array->type->is_vector());
+   }
+
+   /* Use the rvalue deref handler for the most part.  We'll ignore
+    * swizzles in it and write swizzles using writemask, though.
+    */
+   ir->accept(v);
+   return st_dst_reg(v->result);
+}
+
+/**
+ * Process the condition of a conditional assignment
+ *
+ * Examines the condition of a conditional assignment to generate the optimal
+ * first operand of a \c CMP instruction.  If the condition is a relational
+ * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
+ * used as the source for the \c CMP instruction.  Otherwise the comparison
+ * is processed to a boolean result, and the boolean result is used as the
+ * operand to the CMP instruction.
+ */
+bool
+glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
+{
+   ir_rvalue *src_ir = ir;
+   bool negate = true;
+   bool switch_order = false;
+
+   ir_expression *const expr = ir->as_expression();
+   if ((expr != NULL) && (expr->get_num_operands() == 2)) {
+      bool zero_on_left = false;
+
+      if (expr->operands[0]->is_zero()) {
+         src_ir = expr->operands[1];
+         zero_on_left = true;
+      } else if (expr->operands[1]->is_zero()) {
+         src_ir = expr->operands[0];
+         zero_on_left = false;
+      }
+
+      /*      a is -  0  +            -  0  +
+       * (a <  0)  T  F  F  ( a < 0)  T  F  F
+       * (0 <  a)  F  F  T  (-a < 0)  F  F  T
+       * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
+       * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
+       * (a >  0)  F  F  T  (-a < 0)  F  F  T
+       * (0 >  a)  T  F  F  ( a < 0)  T  F  F
+       * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
+       * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
+       *
+       * Note that exchanging the order of 0 and 'a' in the comparison simply
+       * means that the value of 'a' should be negated.
+       */
+      if (src_ir != ir) {
+         switch (expr->operation) {
+         case ir_binop_less:
+            switch_order = false;
+            negate = zero_on_left;
+            break;
+
+         case ir_binop_greater:
+            switch_order = false;
+            negate = !zero_on_left;
+            break;
+
+         case ir_binop_lequal:
+            switch_order = true;
+            negate = !zero_on_left;
+            break;
+
+         case ir_binop_gequal:
+            switch_order = true;
+            negate = zero_on_left;
+            break;
+
+         default:
+            /* This isn't the right kind of comparison afterall, so make sure
+             * the whole condition is visited.
+             */
+            src_ir = ir;
+            break;
+         }
+      }
+   }
+
+   src_ir->accept(this);
+
+   /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
+    * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
+    * choose which value OPCODE_CMP produces without an extra instruction
+    * computing the condition.
+    */
+   if (negate)
+      this->result.negate = ~this->result.negate;
+
+   return switch_order;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_assignment *ir)
+{
+   st_dst_reg l;
+   st_src_reg r;
+   int i;
+
+   ir->rhs->accept(this);
+   r = this->result;
+
+   l = get_assignment_lhs(ir->lhs, this);
+
+   /* FINISHME: This should really set to the correct maximal writemask for each
+    * FINISHME: component written (in the loops below).  This case can only
+    * FINISHME: occur for matrices, arrays, and structures.
+    */
+   if (ir->write_mask == 0) {
+      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
+      l.writemask = WRITEMASK_XYZW;
+   } else if (ir->lhs->type->is_scalar()) {
+      /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
+       * FINISHME: W component of fragment shader output zero, work correctly.
+       */
+      l.writemask = WRITEMASK_XYZW;
+   } else {
+      int swizzles[4];
+      int first_enabled_chan = 0;
+      int rhs_chan = 0;
+
+      assert(ir->lhs->type->is_vector());
+      l.writemask = ir->write_mask;
+
+      for (int i = 0; i < 4; i++) {
+         if (l.writemask & (1 << i)) {
+            first_enabled_chan = GET_SWZ(r.swizzle, i);
+            break;
+         }
+      }
+
+      /* Swizzle a small RHS vector into the channels being written.
+       *
+       * glsl ir treats write_mask as dictating how many channels are
+       * present on the RHS while Mesa IR treats write_mask as just
+       * showing which channels of the vec4 RHS get written.
+       */
+      for (int i = 0; i < 4; i++) {
+         if (l.writemask & (1 << i))
+            swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
+         else
+            swizzles[i] = first_enabled_chan;
+      }
+      r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
+        			swizzles[2], swizzles[3]);
+   }
+
+   assert(l.file != PROGRAM_UNDEFINED);
+   assert(r.file != PROGRAM_UNDEFINED);
+
+   if (ir->condition) {
+      const bool switch_order = this->process_move_condition(ir->condition);
+      st_src_reg condition = this->result;
+
+      for (i = 0; i < type_size(ir->lhs->type); i++) {
+         if (switch_order) {
+            emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r);
+         } else {
+            emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l));
+         }
+
+         l.index++;
+         r.index++;
+      }
+   } else {
+      for (i = 0; i < type_size(ir->lhs->type); i++) {
+         emit(ir, OPCODE_MOV, l, r);
+         l.index++;
+         r.index++;
+      }
+   }
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_constant *ir)
+{
+   st_src_reg src;
+   GLfloat stack_vals[4] = { 0 };
+   GLfloat *values = stack_vals;
+   unsigned int i;
+
+   /* Unfortunately, 4 floats is all we can get into
+    * _mesa_add_unnamed_constant.  So, make a temp to store an
+    * aggregate constant and move each constant value into it.  If we
+    * get lucky, copy propagation will eliminate the extra moves.
+    */
+
+   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+      st_src_reg temp_base = get_temp(ir->type);
+      st_dst_reg temp = st_dst_reg(temp_base);
+
+      foreach_iter(exec_list_iterator, iter, ir->components) {
+         ir_constant *field_value = (ir_constant *)iter.get();
+         int size = type_size(field_value->type);
+
+         assert(size > 0);
+
+         field_value->accept(this);
+         src = this->result;
+
+         for (i = 0; i < (unsigned int)size; i++) {
+            emit(ir, OPCODE_MOV, temp, src);
+
+            src.index++;
+            temp.index++;
+         }
+      }
+      this->result = temp_base;
+      return;
+   }
+
+   if (ir->type->is_array()) {
+      st_src_reg temp_base = get_temp(ir->type);
+      st_dst_reg temp = st_dst_reg(temp_base);
+      int size = type_size(ir->type->fields.array);
+
+      assert(size > 0);
+
+      for (i = 0; i < ir->type->length; i++) {
+         ir->array_elements[i]->accept(this);
+         src = this->result;
+         for (int j = 0; j < size; j++) {
+            emit(ir, OPCODE_MOV, temp, src);
+
+            src.index++;
+            temp.index++;
+         }
+      }
+      this->result = temp_base;
+      return;
+   }
+
+   if (ir->type->is_matrix()) {
+      st_src_reg mat = get_temp(ir->type);
+      st_dst_reg mat_column = st_dst_reg(mat);
+
+      for (i = 0; i < ir->type->matrix_columns; i++) {
+         assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+         values = &ir->value.f[i * ir->type->vector_elements];
+
+         src = st_src_reg(PROGRAM_CONSTANT, -1, NULL);
+         src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
+        					values,
+        					ir->type->vector_elements,
+        					&src.swizzle);
+         emit(ir, OPCODE_MOV, mat_column, src);
+
+         mat_column.index++;
+      }
+
+      this->result = mat;
+      return;
+   }
+
+   src.file = PROGRAM_CONSTANT;
+   switch (ir->type->base_type) {
+   case GLSL_TYPE_FLOAT:
+      values = &ir->value.f[0];
+      break;
+   case GLSL_TYPE_UINT:
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         values[i] = ir->value.u[i];
+      }
+      break;
+   case GLSL_TYPE_INT:
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         values[i] = ir->value.i[i];
+      }
+      break;
+   case GLSL_TYPE_BOOL:
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         values[i] = ir->value.b[i];
+      }
+      break;
+   default:
+      assert(!"Non-float/uint/int/bool constant");
+   }
+
+   this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
+   this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
+        					   values,
+        					   ir->type->vector_elements,
+        					   &this->result.swizzle);
+}
+
+function_entry *
+glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
+{
+   function_entry *entry;
+
+   foreach_iter(exec_list_iterator, iter, this->function_signatures) {
+      entry = (function_entry *)iter.get();
+
+      if (entry->sig == sig)
+         return entry;
+   }
+
+   entry = ralloc(mem_ctx, function_entry);
+   entry->sig = sig;
+   entry->sig_id = this->next_signature_id++;
+   entry->bgn_inst = NULL;
+
+   /* Allocate storage for all the parameters. */
+   foreach_iter(exec_list_iterator, iter, sig->parameters) {
+      ir_variable *param = (ir_variable *)iter.get();
+      variable_storage *storage;
+
+      storage = find_variable_storage(param);
+      assert(!storage);
+
+      storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
+        				      this->next_temp);
+      this->variables.push_tail(storage);
+
+      this->next_temp += type_size(param->type);
+   }
+
+   if (!sig->return_type->is_void()) {
+      entry->return_reg = get_temp(sig->return_type);
+   } else {
+      entry->return_reg = undef_src;
+   }
+
+   this->function_signatures.push_tail(entry);
+   return entry;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_call *ir)
+{
+   glsl_to_tgsi_instruction *call_inst;
+   ir_function_signature *sig = ir->get_callee();
+   function_entry *entry = get_function_signature(sig);
+   int i;
+
+   /* Process in parameters. */
+   exec_list_iterator sig_iter = sig->parameters.iterator();
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
+      ir_variable *param = (ir_variable *)sig_iter.get();
+
+      if (param->mode == ir_var_in ||
+          param->mode == ir_var_inout) {
+         variable_storage *storage = find_variable_storage(param);
+         assert(storage);
+
+         param_rval->accept(this);
+         st_src_reg r = this->result;
+
+         st_dst_reg l;
+         l.file = storage->file;
+         l.index = storage->index;
+         l.reladdr = NULL;
+         l.writemask = WRITEMASK_XYZW;
+         l.cond_mask = COND_TR;
+
+         for (i = 0; i < type_size(param->type); i++) {
+            emit(ir, OPCODE_MOV, l, r);
+            l.index++;
+            r.index++;
+         }
+      }
+
+      sig_iter.next();
+   }
+   assert(!sig_iter.has_next());
+
+   /* Emit call instruction */
+   call_inst = emit(ir, OPCODE_CAL);
+   call_inst->function = entry;
+
+   /* Process out parameters. */
+   sig_iter = sig->parameters.iterator();
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
+      ir_variable *param = (ir_variable *)sig_iter.get();
+
+      if (param->mode == ir_var_out ||
+          param->mode == ir_var_inout) {
+         variable_storage *storage = find_variable_storage(param);
+         assert(storage);
+
+         st_src_reg r;
+         r.file = storage->file;
+         r.index = storage->index;
+         r.reladdr = NULL;
+         r.swizzle = SWIZZLE_NOOP;
+         r.negate = 0;
+
+         param_rval->accept(this);
+         st_dst_reg l = st_dst_reg(this->result);
+
+         for (i = 0; i < type_size(param->type); i++) {
+            emit(ir, OPCODE_MOV, l, r);
+            l.index++;
+            r.index++;
+         }
+      }
+
+      sig_iter.next();
+   }
+   assert(!sig_iter.has_next());
+
+   /* Process return value. */
+   this->result = entry->return_reg;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_texture *ir)
+{
+   st_src_reg result_src, coord, lod_info, projector, dx, dy;
+   st_dst_reg result_dst, coord_dst;
+   glsl_to_tgsi_instruction *inst = NULL;
+   prog_opcode opcode = OPCODE_NOP;
+
+   ir->coordinate->accept(this);
+
+   /* Put our coords in a temp.  We'll need to modify them for shadow,
+    * projection, or LOD, so the only case we'd use it as is is if
+    * we're doing plain old texturing.  Mesa IR optimization should
+    * handle cleaning up our mess in that case.
+    */
+   coord = get_temp(glsl_type::vec4_type);
+   coord_dst = st_dst_reg(coord);
+   emit(ir, OPCODE_MOV, coord_dst, this->result);
+
+   if (ir->projector) {
+      ir->projector->accept(this);
+      projector = this->result;
+   }
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = get_temp(glsl_type::vec4_type);
+   result_dst = st_dst_reg(result_src);
+
+   switch (ir->op) {
+   case ir_tex:
+      opcode = OPCODE_TEX;
+      break;
+   case ir_txb:
+      opcode = OPCODE_TXB;
+      ir->lod_info.bias->accept(this);
+      lod_info = this->result;
+      break;
+   case ir_txl:
+      opcode = OPCODE_TXL;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      break;
+   case ir_txd:
+      opcode = OPCODE_TXD;
+      ir->lod_info.grad.dPdx->accept(this);
+      dx = this->result;
+      ir->lod_info.grad.dPdy->accept(this);
+      dy = this->result;
+      break;
+   case ir_txf: // TODO: use TGSI_OPCODE_TXF here
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+   }
+
+   if (ir->projector) {
+      if (opcode == OPCODE_TEX) {
+         /* Slot the projector in as the last component of the coord. */
+         coord_dst.writemask = WRITEMASK_W;
+         emit(ir, OPCODE_MOV, coord_dst, projector);
+         coord_dst.writemask = WRITEMASK_XYZW;
+         opcode = OPCODE_TXP;
+      } else {
+         st_src_reg coord_w = coord;
+         coord_w.swizzle = SWIZZLE_WWWW;
+
+         /* For the other TEX opcodes there's no projective version
+          * since the last slot is taken up by lod info.  Do the
+          * projective divide now.
+          */
+         coord_dst.writemask = WRITEMASK_W;
+         emit(ir, OPCODE_RCP, coord_dst, projector);
+
+         /* In the case where we have to project the coordinates "by hand,"
+          * the shadow comparitor value must also be projected.
+          */
+         st_src_reg tmp_src = coord;
+         if (ir->shadow_comparitor) {
+            /* Slot the shadow value in as the second to last component of the
+             * coord.
+             */
+            ir->shadow_comparitor->accept(this);
+
+            tmp_src = get_temp(glsl_type::vec4_type);
+            st_dst_reg tmp_dst = st_dst_reg(tmp_src);
+
+            tmp_dst.writemask = WRITEMASK_Z;
+            emit(ir, OPCODE_MOV, tmp_dst, this->result);
+
+            tmp_dst.writemask = WRITEMASK_XY;
+            emit(ir, OPCODE_MOV, tmp_dst, coord);
+         }
+
+         coord_dst.writemask = WRITEMASK_XYZ;
+         emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
+
+         coord_dst.writemask = WRITEMASK_XYZW;
+         coord.swizzle = SWIZZLE_XYZW;
+      }
+   }
+
+   /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
+    * comparitor was put in the correct place (and projected) by the code,
+    * above, that handles by-hand projection.
+    */
+   if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
+      /* Slot the shadow value in as the second to last component of the
+       * coord.
+       */
+      ir->shadow_comparitor->accept(this);
+      coord_dst.writemask = WRITEMASK_Z;
+      emit(ir, OPCODE_MOV, coord_dst, this->result);
+      coord_dst.writemask = WRITEMASK_XYZW;
+   }
+
+   if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
+      /* Mesa IR stores lod or lod bias in the last channel of the coords. */
+      coord_dst.writemask = WRITEMASK_W;
+      emit(ir, OPCODE_MOV, coord_dst, lod_info);
+      coord_dst.writemask = WRITEMASK_XYZW;
+   }
+
+   if (opcode == OPCODE_TXD)
+      inst = emit(ir, opcode, result_dst, coord, dx, dy);
+   else
+      inst = emit(ir, opcode, result_dst, coord);
+
+   if (ir->shadow_comparitor)
+      inst->tex_shadow = GL_TRUE;
+
+   inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
+        					   this->shader_program,
+        					   this->prog);
+
+   const glsl_type *sampler_type = ir->sampler->type;
+
+   switch (sampler_type->sampler_dimensionality) {
+   case GLSL_SAMPLER_DIM_1D:
+      inst->tex_target = (sampler_type->sampler_array)
+         ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_2D:
+      inst->tex_target = (sampler_type->sampler_array)
+         ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_3D:
+      inst->tex_target = TEXTURE_3D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_CUBE:
+      inst->tex_target = TEXTURE_CUBE_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_RECT:
+      inst->tex_target = TEXTURE_RECT_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_BUF:
+      assert(!"FINISHME: Implement ARB_texture_buffer_object");
+      break;
+   default:
+      assert(!"Should not get here.");
+   }
+
+   this->result = result_src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_return *ir)
+{
+   if (ir->get_value()) {
+      st_dst_reg l;
+      int i;
+
+      assert(current_function);
+
+      ir->get_value()->accept(this);
+      st_src_reg r = this->result;
+
+      l = st_dst_reg(current_function->return_reg);
+
+      for (i = 0; i < type_size(current_function->sig->return_type); i++) {
+         emit(ir, OPCODE_MOV, l, r);
+         l.index++;
+         r.index++;
+      }
+   }
+
+   emit(ir, OPCODE_RET);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_discard *ir)
+{
+   struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+
+   if (ir->condition) {
+      ir->condition->accept(this);
+      this->result.negate = ~this->result.negate;
+      emit(ir, OPCODE_KIL, undef_dst, this->result);
+   } else {
+      emit(ir, OPCODE_KIL_NV);
+   }
+
+   fp->UsesKill = GL_TRUE;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_if *ir)
+{
+   glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL;
+   glsl_to_tgsi_instruction *prev_inst;
+
+   prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+
+   ir->condition->accept(this);
+   assert(this->result.file != PROGRAM_UNDEFINED);
+
+   if (this->options->EmitCondCodes) {
+      cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+
+      /* See if we actually generated any instruction for generating
+       * the condition.  If not, then cook up a move to a temp so we
+       * have something to set cond_update on.
+       */
+      if (cond_inst == prev_inst) {
+         st_src_reg temp = get_temp(glsl_type::bool_type);
+         cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result);
+      }
+      cond_inst->cond_update = GL_TRUE;
+
+      if_inst = emit(ir->condition, OPCODE_IF);
+      if_inst->dst.cond_mask = COND_NE;
+   } else {
+      if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
+   }
+
+   this->instructions.push_tail(if_inst);
+
+   visit_exec_list(&ir->then_instructions, this);
+
+   if (!ir->else_instructions.is_empty()) {
+      else_inst = emit(ir->condition, OPCODE_ELSE);
+      visit_exec_list(&ir->else_instructions, this);
+   }
+
+   if_inst = emit(ir->condition, OPCODE_ENDIF);
+}
+
+glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
+{
+   result.file = PROGRAM_UNDEFINED;
+   next_temp = 1;
+   next_signature_id = 1;
+   current_function = NULL;
+   num_address_regs = 0;
+   indirect_addr_temps = false;
+   indirect_addr_consts = false;
+   mem_ctx = ralloc_context(NULL);
+}
+
+glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
+{
+   ralloc_free(mem_ctx);
+}
+
+extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
+{
+   delete v;
+}
+
+static struct prog_src_register
+mesa_st_src_reg_from_ir_st_src_reg(st_src_reg reg)
+{
+   struct prog_src_register mesa_reg;
+
+   mesa_reg.File = reg.file;
+   assert(reg.index < (1 << INST_INDEX_BITS));
+   mesa_reg.Index = reg.index;
+   mesa_reg.Swizzle = reg.swizzle;
+   mesa_reg.RelAddr = reg.reladdr != NULL;
+   mesa_reg.Negate = reg.negate;
+   mesa_reg.Abs = 0;
+   mesa_reg.HasIndex2 = GL_FALSE;
+   mesa_reg.RelAddr2 = 0;
+   mesa_reg.Index2 = 0;
+
+   return mesa_reg;
+}
+
+static void
+set_branchtargets(glsl_to_tgsi_visitor *v,
+        	  struct prog_instruction *mesa_instructions,
+        	  int num_instructions)
+{
+   int if_count = 0, loop_count = 0;
+   int *if_stack, *loop_stack;
+   int if_stack_pos = 0, loop_stack_pos = 0;
+   int i, j;
+
+   for (i = 0; i < num_instructions; i++) {
+      switch (mesa_instructions[i].Opcode) {
+      case OPCODE_IF:
+         if_count++;
+         break;
+      case OPCODE_BGNLOOP:
+         loop_count++;
+         break;
+      case OPCODE_BRK:
+      case OPCODE_CONT:
+         mesa_instructions[i].BranchTarget = -1;
+         break;
+      default:
+         break;
+      }
+   }
+
+   if_stack = rzalloc_array(v->mem_ctx, int, if_count);
+   loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
+
+   for (i = 0; i < num_instructions; i++) {
+      switch (mesa_instructions[i].Opcode) {
+      case OPCODE_IF:
+         if_stack[if_stack_pos] = i;
+         if_stack_pos++;
+         break;
+      case OPCODE_ELSE:
+         mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
+         if_stack[if_stack_pos - 1] = i;
+         break;
+      case OPCODE_ENDIF:
+         mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
+         if_stack_pos--;
+         break;
+      case OPCODE_BGNLOOP:
+         loop_stack[loop_stack_pos] = i;
+         loop_stack_pos++;
+         break;
+      case OPCODE_ENDLOOP:
+         loop_stack_pos--;
+         /* Rewrite any breaks/conts at this nesting level (haven't
+          * already had a BranchTarget assigned) to point to the end
+          * of the loop.
+          */
+         for (j = loop_stack[loop_stack_pos]; j < i; j++) {
+            if (mesa_instructions[j].Opcode == OPCODE_BRK ||
+        	mesa_instructions[j].Opcode == OPCODE_CONT) {
+               if (mesa_instructions[j].BranchTarget == -1) {
+        	  mesa_instructions[j].BranchTarget = i;
+               }
+            }
+         }
+         /* The loop ends point at each other. */
+         mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
+         mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
+         break;
+      case OPCODE_CAL:
+         foreach_iter(exec_list_iterator, iter, v->function_signatures) {
+            function_entry *entry = (function_entry *)iter.get();
+
+            if (entry->sig_id == mesa_instructions[i].BranchTarget) {
+               mesa_instructions[i].BranchTarget = entry->inst;
+               break;
+            }
+         }
+         break;
+      default:
+         break;
+      }
+   }
+}
+
+static void
+print_program(struct prog_instruction *mesa_instructions,
+              ir_instruction **mesa_instruction_annotation,
+              int num_instructions)
+{
+   /*ir_instruction *last_ir = NULL;*/
+   int i;
+   int indent = 0;
+
+   for (i = 0; i < num_instructions; i++) {
+      struct prog_instruction *mesa_inst = mesa_instructions + i;
+
+      fprintf(stdout, "%3d: ", i);
+
+#if 0
+/* Disable this for now, since printing GLSL IR along with its corresponding 
+ * Mesa IR makes the Mesa IR unreadable. */
+      ir_instruction *ir = mesa_instruction_annotation[i];
+      if (last_ir != ir && ir) {
+         int j;
+
+         for (j = 0; j < indent; j++) {
+            fprintf(stdout, " ");
+         }
+         ir->print();
+         printf("\n");
+         last_ir = ir;
+
+         fprintf(stdout, "     "); /* line number spacing. */
+      }
+#endif
+
+      indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
+        				    PROG_PRINT_DEBUG, NULL);
+   }
+}
+
+
+/**
+ * Count resources used by the given gpu program (number of texture
+ * samplers, etc).
+ */
+static void
+count_resources(struct gl_program *prog)
+{
+   unsigned int i;
+
+   prog->SamplersUsed = 0;
+
+   for (i = 0; i < prog->NumInstructions; i++) {
+      struct prog_instruction *inst = &prog->Instructions[i];
+
+      if (_mesa_is_tex_instruction(inst->Opcode)) {
+         prog->SamplerTargets[inst->TexSrcUnit] =
+            (gl_texture_index)inst->TexSrcTarget;
+         prog->SamplersUsed |= 1 << inst->TexSrcUnit;
+         if (inst->TexShadow) {
+            prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
+         }
+      }
+   }
+
+   _mesa_update_shader_textures_used(prog);
+}
+
+
+/**
+ * Check if the given vertex/fragment/shader program is within the
+ * resource limits of the context (number of texture units, etc).
+ * If any of those checks fail, record a linker error.
+ *
+ * XXX more checks are needed...
+ */
+static void
+check_resources(const struct gl_context *ctx,
+                struct gl_shader_program *shader_program,
+                struct gl_program *prog)
+{
+   switch (prog->Target) {
+   case GL_VERTEX_PROGRAM_ARB:
+      if (_mesa_bitcount(prog->SamplersUsed) >
+          ctx->Const.MaxVertexTextureImageUnits) {
+         fail_link(shader_program, "Too many vertex shader texture samplers");
+      }
+      if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
+         fail_link(shader_program, "Too many vertex shader constants");
+      }
+      break;
+   case MESA_GEOMETRY_PROGRAM:
+      if (_mesa_bitcount(prog->SamplersUsed) >
+          ctx->Const.MaxGeometryTextureImageUnits) {
+         fail_link(shader_program, "Too many geometry shader texture samplers");
+      }
+      if (prog->Parameters->NumParameters >
+          MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
+         fail_link(shader_program, "Too many geometry shader constants");
+      }
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      if (_mesa_bitcount(prog->SamplersUsed) >
+          ctx->Const.MaxTextureImageUnits) {
+         fail_link(shader_program, "Too many fragment shader texture samplers");
+      }
+      if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
+         fail_link(shader_program, "Too many fragment shader constants");
+      }
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected program type in check_resources()");
+   }
+}
+
+
+
+struct uniform_sort {
+   struct gl_uniform *u;
+   int pos;
+};
+
+/* The shader_program->Uniforms list is almost sorted in increasing
+ * uniform->{Frag,Vert}Pos locations, but not quite when there are
+ * uniforms shared between targets.  We need to add parameters in
+ * increasing order for the targets.
+ */
+static int
+sort_uniforms(const void *a, const void *b)
+{
+   struct uniform_sort *u1 = (struct uniform_sort *)a;
+   struct uniform_sort *u2 = (struct uniform_sort *)b;
+
+   return u1->pos - u2->pos;
+}
+
+/* Add the uniforms to the parameters.  The linker chose locations
+ * in our parameters lists (which weren't created yet), which the
+ * uniforms code will use to poke values into our parameters list
+ * when uniforms are updated.
+ */
+static void
+add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
+        			struct gl_shader *shader,
+        			struct gl_program *prog)
+{
+   unsigned int i;
+   unsigned int next_sampler = 0, num_uniforms = 0;
+   struct uniform_sort *sorted_uniforms;
+
+   sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
+        			  shader_program->Uniforms->NumUniforms);
+
+   for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
+      struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
+      int parameter_index = -1;
+
+      switch (shader->Type) {
+      case GL_VERTEX_SHADER:
+         parameter_index = uniform->VertPos;
+         break;
+      case GL_FRAGMENT_SHADER:
+         parameter_index = uniform->FragPos;
+         break;
+      case GL_GEOMETRY_SHADER:
+         parameter_index = uniform->GeomPos;
+         break;
+      }
+
+      /* Only add uniforms used in our target. */
+      if (parameter_index != -1) {
+         sorted_uniforms[num_uniforms].pos = parameter_index;
+         sorted_uniforms[num_uniforms].u = uniform;
+         num_uniforms++;
+      }
+   }
+
+   qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
+         sort_uniforms);
+
+   for (i = 0; i < num_uniforms; i++) {
+      struct gl_uniform *uniform = sorted_uniforms[i].u;
+      int parameter_index = sorted_uniforms[i].pos;
+      const glsl_type *type = uniform->Type;
+      unsigned int size;
+
+      if (type->is_vector() ||
+          type->is_scalar()) {
+         size = type->vector_elements;
+      } else {
+         size = type_size(type) * 4;
+      }
+
+      gl_register_file file;
+      if (type->is_sampler() ||
+          (type->is_array() && type->fields.array->is_sampler())) {
+         file = PROGRAM_SAMPLER;
+      } else {
+         file = PROGRAM_UNIFORM;
+      }
+
+      GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
+        					 uniform->Name);
+
+      if (index < 0) {
+         index = _mesa_add_parameter(prog->Parameters, file,
+        			     uniform->Name, size, type->gl_type,
+        			     NULL, NULL, 0x0);
+
+         /* Sampler uniform values are stored in prog->SamplerUnits,
+          * and the entry in that array is selected by this index we
+          * store in ParameterValues[].
+          */
+         if (file == PROGRAM_SAMPLER) {
+            for (unsigned int j = 0; j < size / 4; j++)
+               prog->Parameters->ParameterValues[index + j][0] = next_sampler++;
+         }
+
+         /* The location chosen in the Parameters list here (returned
+          * from _mesa_add_uniform) has to match what the linker chose.
+          */
+         if (index != parameter_index) {
+            fail_link(shader_program, "Allocation of uniform `%s' to target "
+        	      "failed (%d vs %d)\n",
+        	      uniform->Name, index, parameter_index);
+         }
+      }
+   }
+
+   ralloc_free(sorted_uniforms);
+}
+
+static void
+set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
+        		struct gl_shader_program *shader_program,
+        		const char *name, const glsl_type *type,
+        		ir_constant *val)
+{
+   if (type->is_record()) {
+      ir_constant *field_constant;
+
+      field_constant = (ir_constant *)val->components.get_head();
+
+      for (unsigned int i = 0; i < type->length; i++) {
+         const glsl_type *field_type = type->fields.structure[i].type;
+         const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
+        				    type->fields.structure[i].name);
+         set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
+        			 field_type, field_constant);
+         field_constant = (ir_constant *)field_constant->next;
+      }
+      return;
+   }
+
+   int loc = _mesa_get_uniform_location(ctx, shader_program, name);
+
+   if (loc == -1) {
+      fail_link(shader_program,
+        	"Couldn't find uniform for initializer %s\n", name);
+      return;
+   }
+
+   for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
+      ir_constant *element;
+      const glsl_type *element_type;
+      if (type->is_array()) {
+         element = val->array_elements[i];
+         element_type = type->fields.array;
+      } else {
+         element = val;
+         element_type = type;
+      }
+
+      void *values;
+
+      if (element_type->base_type == GLSL_TYPE_BOOL) {
+         int *conv = ralloc_array(mem_ctx, int, element_type->components());
+         for (unsigned int j = 0; j < element_type->components(); j++) {
+            conv[j] = element->value.b[j];
+         }
+         values = (void *)conv;
+         element_type = glsl_type::get_instance(GLSL_TYPE_INT,
+        					element_type->vector_elements,
+        					1);
+      } else {
+         values = &element->value;
+      }
+
+      if (element_type->is_matrix()) {
+         _mesa_uniform_matrix(ctx, shader_program,
+        		      element_type->matrix_columns,
+        		      element_type->vector_elements,
+        		      loc, 1, GL_FALSE, (GLfloat *)values);
+         loc += element_type->matrix_columns;
+      } else {
+         _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
+        	       values, element_type->gl_type);
+         loc += type_size(element_type);
+      }
+   }
+}
+
+static void
+set_uniform_initializers(struct gl_context *ctx,
+        		 struct gl_shader_program *shader_program)
+{
+   void *mem_ctx = NULL;
+
+   for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
+      struct gl_shader *shader = shader_program->_LinkedShaders[i];
+
+      if (shader == NULL)
+         continue;
+
+      foreach_iter(exec_list_iterator, iter, *shader->ir) {
+         ir_instruction *ir = (ir_instruction *)iter.get();
+         ir_variable *var = ir->as_variable();
+
+         if (!var || var->mode != ir_var_uniform || !var->constant_value)
+            continue;
+
+         if (!mem_ctx)
+            mem_ctx = ralloc_context(NULL);
+
+         set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
+        			 var->type, var->constant_value);
+      }
+   }
+
+   ralloc_free(mem_ctx);
+}
+
+/* Replaces all references to a temporary register index with another index. */
+void
+glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
+{
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      unsigned j;
+      
+      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            inst->src[j].index = new_index;
+         }
+      }
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
+         inst->dst.index = new_index;
+      }
+   }
+}
+
+int
+glsl_to_tgsi_visitor::get_first_temp_read(int index)
+{
+   int depth = 0; /* loop depth */
+   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+   unsigned i = 0, j;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            return (depth == 0) ? i : loop_start;
+         }
+      }
+      
+      if (inst->op == OPCODE_BGNLOOP) {
+         if(depth++ == 0)
+            loop_start = i;
+      } else if (inst->op == OPCODE_ENDLOOP) {
+         if (--depth == 0)
+            loop_start = -1;
+      }
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   return -1;
+}
+
+int
+glsl_to_tgsi_visitor::get_first_temp_write(int index)
+{
+   int depth = 0; /* loop depth */
+   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+   int i = 0;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
+         return (depth == 0) ? i : loop_start;
+      }
+      
+      if (inst->op == OPCODE_BGNLOOP) {
+         if(depth++ == 0)
+            loop_start = i;
+      } else if (inst->op == OPCODE_ENDLOOP) {
+         if (--depth == 0)
+            loop_start = -1;
+      }
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   return -1;
+}
+
+int
+glsl_to_tgsi_visitor::get_last_temp_read(int index)
+{
+   int depth = 0; /* loop depth */
+   int last = -1; /* index of last instruction that reads the temporary */
+   unsigned i = 0, j;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            last = (depth == 0) ? i : -2;
+         }
+      }
+      
+      if (inst->op == OPCODE_BGNLOOP)
+         depth++;
+      else if (inst->op == OPCODE_ENDLOOP)
+         if (--depth == 0 && last == -2)
+            last = i;
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   assert(last >= -1);
+   return last;
+}
+
+int
+glsl_to_tgsi_visitor::get_last_temp_write(int index)
+{
+   int depth = 0; /* loop depth */
+   int last = -1; /* index of last instruction that writes to the temporary */
+   int i = 0;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
+         last = (depth == 0) ? i : -2;
+      
+      if (inst->op == OPCODE_BGNLOOP)
+         depth++;
+      else if (inst->op == OPCODE_ENDLOOP)
+         if (--depth == 0 && last == -2)
+            last = i;
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   assert(last >= -1);
+   return last;
+}
+
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY register
+ * channels for copy propagation and updates following instructions to
+ * use the original versions.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.  As an example, a TXP production before this pass:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
+ *
+ * and after:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * which allows for dead code elimination on TEMP[1]'s writes.
+ */
+void
+glsl_to_tgsi_visitor::copy_propagate(void)
+{
+   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
+        					    glsl_to_tgsi_instruction *,
+        					    this->next_temp * 4);
+   int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+   int level = 0;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      assert(inst->dst.file != PROGRAM_TEMPORARY
+             || inst->dst.index < this->next_temp);
+
+      /* First, do any copy propagation possible into the src regs. */
+      for (int r = 0; r < 3; r++) {
+         glsl_to_tgsi_instruction *first = NULL;
+         bool good = true;
+         int acp_base = inst->src[r].index * 4;
+
+         if (inst->src[r].file != PROGRAM_TEMPORARY ||
+             inst->src[r].reladdr)
+            continue;
+
+         /* See if we can find entries in the ACP consisting of MOVs
+          * from the same src register for all the swizzled channels
+          * of this src register reference.
+          */
+         for (int i = 0; i < 4; i++) {
+            int src_chan = GET_SWZ(inst->src[r].swizzle, i);
+            glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
+
+            if (!copy_chan) {
+               good = false;
+               break;
+            }
+
+            assert(acp_level[acp_base + src_chan] <= level);
+
+            if (!first) {
+               first = copy_chan;
+            } else {
+               if (first->src[0].file != copy_chan->src[0].file ||
+        	   first->src[0].index != copy_chan->src[0].index) {
+        	  good = false;
+        	  break;
+               }
+            }
+         }
+
+         if (good) {
+            /* We've now validated that we can copy-propagate to
+             * replace this src register reference.  Do it.
+             */
+            inst->src[r].file = first->src[0].file;
+            inst->src[r].index = first->src[0].index;
+
+            int swizzle = 0;
+            for (int i = 0; i < 4; i++) {
+               int src_chan = GET_SWZ(inst->src[r].swizzle, i);
+               glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
+               swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
+        		   (3 * i));
+            }
+            inst->src[r].swizzle = swizzle;
+         }
+      }
+
+      switch (inst->op) {
+      case OPCODE_BGNLOOP:
+      case OPCODE_ENDLOOP:
+         /* End of a basic block, clear the ACP entirely. */
+         memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+         break;
+
+      case OPCODE_IF:
+         ++level;
+         break;
+
+      case OPCODE_ENDIF:
+      case OPCODE_ELSE:
+         /* Clear all channels written inside the block from the ACP, but
+          * leaving those that were not touched.
+          */
+         for (int r = 0; r < this->next_temp; r++) {
+            for (int c = 0; c < 4; c++) {
+               if (!acp[4 * r + c])
+        	  continue;
+
+               if (acp_level[4 * r + c] >= level)
+        	  acp[4 * r + c] = NULL;
+            }
+         }
+         if (inst->op == OPCODE_ENDIF)
+            --level;
+         break;
+
+      default:
+         /* Continuing the block, clear any written channels from
+          * the ACP.
+          */
+         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
+            /* Any temporary might be written, so no copy propagation
+             * across this instruction.
+             */
+            memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+         } else if (inst->dst.file == PROGRAM_OUTPUT &&
+        	    inst->dst.reladdr) {
+            /* Any output might be written, so no copy propagation
+             * from outputs across this instruction.
+             */
+            for (int r = 0; r < this->next_temp; r++) {
+               for (int c = 0; c < 4; c++) {
+        	  if (!acp[4 * r + c])
+        	     continue;
+
+        	  if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
+        	     acp[4 * r + c] = NULL;
+               }
+            }
+         } else if (inst->dst.file == PROGRAM_TEMPORARY ||
+        	    inst->dst.file == PROGRAM_OUTPUT) {
+            /* Clear where it's used as dst. */
+            if (inst->dst.file == PROGRAM_TEMPORARY) {
+               for (int c = 0; c < 4; c++) {
+        	  if (inst->dst.writemask & (1 << c)) {
+        	     acp[4 * inst->dst.index + c] = NULL;
+        	  }
+               }
+            }
+
+            /* Clear where it's used as src. */
+            for (int r = 0; r < this->next_temp; r++) {
+               for (int c = 0; c < 4; c++) {
+        	  if (!acp[4 * r + c])
+        	     continue;
+
+        	  int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
+
+        	  if (acp[4 * r + c]->src[0].file == inst->dst.file &&
+        	      acp[4 * r + c]->src[0].index == inst->dst.index &&
+        	      inst->dst.writemask & (1 << src_chan))
+        	  {
+        	     acp[4 * r + c] = NULL;
+        	  }
+               }
+            }
+         }
+         break;
+      }
+
+      /* If this is a copy, add it to the ACP. */
+      if (inst->op == OPCODE_MOV &&
+          inst->dst.file == PROGRAM_TEMPORARY &&
+          !inst->dst.reladdr &&
+          !inst->saturate &&
+          !inst->src[0].reladdr &&
+          !inst->src[0].negate) {
+         for (int i = 0; i < 4; i++) {
+            if (inst->dst.writemask & (1 << i)) {
+               acp[4 * inst->dst.index + i] = inst;
+               acp_level[4 * inst->dst.index + i] = level;
+            }
+         }
+      }
+   }
+
+   ralloc_free(acp_level);
+   ralloc_free(acp);
+}
+
+/*
+ * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.  As an example, a TXP production after copy propagation but 
+ * before this pass:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * and after this pass:
+ *
+ * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ * 
+ * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
+ * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
+ */
+void
+glsl_to_tgsi_visitor::eliminate_dead_code(void)
+{
+   int i;
+   
+   for (i=0; i < this->next_temp; i++) {
+      int last_read = get_last_temp_read(i);
+      int j = 0;
+      
+      foreach_iter(exec_list_iterator, iter, this->instructions) {
+         glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
+             j > last_read)
+         {
+            iter.remove();
+            delete inst;
+         }
+         
+         j++;
+      }
+   }
+}
+
+/* Merges temporary registers together where possible to reduce the number of 
+ * registers needed to run a program.
+ * 
+ * Produces optimal code only after copy propagation and dead code elimination 
+ * have been run. */
+void
+glsl_to_tgsi_visitor::merge_registers(void)
+{
+   int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
+   int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
+   int i, j;
+   
+   /* Read the indices of the last read and first write to each temp register
+    * into an array so that we don't have to traverse the instruction list as 
+    * much. */
+   for (i=0; i < this->next_temp; i++) {
+      last_reads[i] = get_last_temp_read(i);
+      first_writes[i] = get_first_temp_write(i);
+   }
+   
+   /* Start looking for registers with non-overlapping usages that can be 
+    * merged together. */
+   for (i=0; i < this->next_temp - 1; i++) {
+      /* Don't touch unused registers. */
+      if (last_reads[i] < 0 || first_writes[i] < 0) continue;
+      
+      for (j=i+1; j < this->next_temp; j++) {
+         /* Don't touch unused registers. */
+         if (last_reads[j] < 0 || first_writes[j] < 0) continue;
+         
+         /* We can merge the two registers if the first write to j is after or 
+          * in the same instruction as the last read from i.  Note that the 
+          * register at index i will always be used earlier or at the same time 
+          * as the register at index j. */
+         assert(first_writes[i] <= first_writes[j]);
+         if (last_reads[i] <= first_writes[j]) {
+            rename_temp_register(j, i); /* Replace all references to j with i.*/
+            
+            /* Update the first_writes and last_reads arrays with the new 
+             * values for the merged register index, and mark the newly unused 
+             * register index as such. */
+            last_reads[i] = last_reads[j];
+            first_writes[j] = -1;
+            last_reads[j] = -1;
+         }
+      }
+   }
+   
+   ralloc_free(last_reads);
+   ralloc_free(first_writes);
+}
+
+/* Reassign indices to temporary registers by reusing unused indices created 
+ * by optimization passes. */
+void
+glsl_to_tgsi_visitor::renumber_registers(void)
+{
+   int i = 0;
+   int new_index = 0;
+   
+   for (i=0; i < this->next_temp; i++) {
+      if (get_first_temp_read(i) < 0) continue;
+      if (i != new_index)
+         rename_temp_register(i, new_index);
+      new_index++;
+   }
+   
+   this->next_temp = new_index;
+}
+
+/* ------------------------- TGSI conversion stuff -------------------------- */
+struct label {
+   unsigned branch_target;
+   unsigned token;
+};
+
+/**
+ * Intermediate state used during shader translation.
+ */
+struct st_translate {
+   struct ureg_program *ureg;
+
+   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
+   struct ureg_src *constants;
+   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_dst address[1];
+   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+   struct ureg_src systemValues[SYSTEM_VALUE_MAX];
+
+   /* Extra info for handling point size clamping in vertex shader */
+   struct ureg_dst pointSizeResult; /**< Actual point size output register */
+   struct ureg_src pointSizeConst;  /**< Point size range constant register */
+   GLint pointSizeOutIndex;         /**< Temp point size output register */
+   GLboolean prevInstWrotePointSize;
+
+   const GLuint *inputMapping;
+   const GLuint *outputMapping;
+
+   /* For every instruction that contains a label (eg CALL), keep
+    * details so that we can go back afterwards and emit the correct
+    * tgsi instruction number for each label.
+    */
+   struct label *labels;
+   unsigned labels_size;
+   unsigned labels_count;
+
+   /* Keep a record of the tgsi instruction number that each mesa
+    * instruction starts at, will be used to fix up labels after
+    * translation.
+    */
+   unsigned *insn;
+   unsigned insn_size;
+   unsigned insn_count;
+
+   unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
+
+   boolean error;
+};
+
+/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
+static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
+   TGSI_SEMANTIC_FACE,
+   TGSI_SEMANTIC_INSTANCEID
+};
+
+/**
+ * Make note of a branch to a label in the TGSI code.
+ * After we've emitted all instructions, we'll go over the list
+ * of labels built here and patch the TGSI code with the actual
+ * location of each label.
+ */
+static unsigned *get_label( struct st_translate *t,
+                            unsigned branch_target )
+{
+   unsigned i;
+
+   if (t->labels_count + 1 >= t->labels_size) {
+      t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
+      t->labels = (struct label *)realloc(t->labels, 
+                                          t->labels_size * sizeof t->labels[0]);
+      if (t->labels == NULL) {
+         static unsigned dummy;
+         t->error = TRUE;
+         return &dummy;
+      }
+   }
+
+   i = t->labels_count++;
+   t->labels[i].branch_target = branch_target;
+   return &t->labels[i].token;
+}
+
+/**
+ * Called prior to emitting the TGSI code for each Mesa instruction.
+ * Allocate additional space for instructions if needed.
+ * Update the insn[] array so the next Mesa instruction points to
+ * the next TGSI instruction.
+ */
+static void set_insn_start( struct st_translate *t,
+                            unsigned start )
+{
+   if (t->insn_count + 1 >= t->insn_size) {
+      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
+      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]);
+      if (t->insn == NULL) {
+         t->error = TRUE;
+         return;
+      }
+   }
+
+   t->insn[t->insn_count++] = start;
+}
+
+/**
+ * Map a Mesa dst register to a TGSI ureg_dst register.
+ */
+static struct ureg_dst
+dst_register( struct st_translate *t,
+              gl_register_file file,
+              GLuint index )
+{
+   switch( file ) {
+   case PROGRAM_UNDEFINED:
+      return ureg_dst_undef();
+
+   case PROGRAM_TEMPORARY:
+      if (ureg_dst_is_undef(t->temps[index]))
+         t->temps[index] = ureg_DECL_temporary( t->ureg );
+
+      return t->temps[index];
+
+   case PROGRAM_OUTPUT:
+      if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
+         t->prevInstWrotePointSize = GL_TRUE;
+
+      if (t->procType == TGSI_PROCESSOR_VERTEX)
+         assert(index < VERT_RESULT_MAX);
+      else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
+         assert(index < FRAG_RESULT_MAX);
+      else
+         assert(index < GEOM_RESULT_MAX);
+
+      assert(t->outputMapping[index] < Elements(t->outputs));
+
+      return t->outputs[t->outputMapping[index]];
+
+   case PROGRAM_ADDRESS:
+      return t->address[index];
+
+   default:
+      debug_assert( 0 );
+      return ureg_dst_undef();
+   }
+}
+
+/**
+ * Map a Mesa src register to a TGSI ureg_src register.
+ */
+static struct ureg_src
+src_register( struct st_translate *t,
+              gl_register_file file,
+              GLuint index )
+{
+   switch( file ) {
+   case PROGRAM_UNDEFINED:
+      return ureg_src_undef();
+
+   case PROGRAM_TEMPORARY:
+      assert(index >= 0);
+      assert(index < Elements(t->temps));
+      if (ureg_dst_is_undef(t->temps[index]))
+         t->temps[index] = ureg_DECL_temporary( t->ureg );
+      return ureg_src(t->temps[index]);
+
+   case PROGRAM_NAMED_PARAM:
+   case PROGRAM_ENV_PARAM:
+   case PROGRAM_LOCAL_PARAM:
+   case PROGRAM_UNIFORM:
+      assert(index >= 0);
+      return t->constants[index];
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT:       /* ie, immediate */
+      if (index < 0)
+         return ureg_DECL_constant( t->ureg, 0 );
+      else
+         return t->constants[index];
+
+   case PROGRAM_INPUT:
+      assert(t->inputMapping[index] < Elements(t->inputs));
+      return t->inputs[t->inputMapping[index]];
+
+   case PROGRAM_OUTPUT:
+      assert(t->outputMapping[index] < Elements(t->outputs));
+      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
+
+   case PROGRAM_ADDRESS:
+      return ureg_src(t->address[index]);
+
+   case PROGRAM_SYSTEM_VALUE:
+      assert(index < Elements(t->systemValues));
+      return t->systemValues[index];
+
+   default:
+      debug_assert( 0 );
+      return ureg_src_undef();
+   }
+}
+
+/**
+ * Create a TGSI ureg_dst register from a Mesa dest register.
+ */
+static struct ureg_dst
+translate_dst( struct st_translate *t,
+               const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg,
+               boolean saturate )
+{
+   struct ureg_dst dst = dst_register( t, 
+                                       dst_reg->file,
+                                       dst_reg->index );
+
+   dst = ureg_writemask( dst, 
+                         dst_reg->writemask );
+   
+   if (saturate)
+      dst = ureg_saturate( dst );
+
+   if (dst_reg->reladdr != NULL)
+      dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
+
+   return dst;
+}
+
+/**
+ * Create a TGSI ureg_src register from a Mesa src register.
+ */
+static struct ureg_src
+translate_src( struct st_translate *t,
+               const st_src_reg *src_reg )
+{
+   struct ureg_src src = src_register( t, src_reg->file, src_reg->index );
+
+   src = ureg_swizzle( src,
+                       GET_SWZ( src_reg->swizzle, 0 ) & 0x3,
+                       GET_SWZ( src_reg->swizzle, 1 ) & 0x3,
+                       GET_SWZ( src_reg->swizzle, 2 ) & 0x3,
+                       GET_SWZ( src_reg->swizzle, 3 ) & 0x3);
+
+   if ((src_reg->negate & 0xf) == NEGATE_XYZW)
+      src = ureg_negate(src);
+
+#if 0
+   // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR
+   if (src_reg->abs) 
+      src = ureg_abs(src);
+#endif
+
+   if (src_reg->reladdr != NULL) {
+      /* Normally ureg_src_indirect() would be used here, but a stupid compiler 
+       * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 
+       * set the bit for src.Negate.  So we have to do the operation manually
+       * here to work around the compiler's problems. */
+      /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
+      struct ureg_src addr = ureg_src(t->address[0]);
+      src.Indirect = 1;
+      src.IndirectFile = addr.File;
+      src.IndirectIndex = addr.Index;
+      src.IndirectSwizzle = addr.SwizzleX;
+      
+      if (src_reg->file != PROGRAM_INPUT &&
+          src_reg->file != PROGRAM_OUTPUT) {
+         /* If src_reg->index was negative, it was set to zero in
+          * src_register().  Reassign it now.  But don't do this
+          * for input/output regs since they get remapped while
+          * const buffers don't.
+          */
+         src.Index = src_reg->index;
+      }
+   }
+
+   return src;
+}
+
+static void
+compile_tgsi_instruction(struct st_translate *t, 
+        			     const struct glsl_to_tgsi_instruction *inst)
+{
+   struct ureg_program *ureg = t->ureg;
+   GLuint i;
+   struct ureg_dst dst[1];
+   struct ureg_src src[4];
+   unsigned num_dst;
+   unsigned num_src;
+
+   num_dst = _mesa_num_inst_dst_regs( inst->op );
+   num_src = _mesa_num_inst_src_regs( inst->op );
+
+   if (num_dst) 
+      dst[0] = translate_dst( t, 
+                              &inst->dst,
+                              inst->saturate); // inst->SaturateMode
+
+   for (i = 0; i < num_src; i++) 
+      src[i] = translate_src( t, &inst->src[i] );
+
+   switch( inst->op ) {
+   case OPCODE_SWZ:
+      // TODO: copy emit_swz function from st_mesa_to_tgsi.c
+      //emit_swz( t, dst[0], &inst->src[0] );
+      assert(!"OPCODE_SWZ");
+      return;
+
+   case OPCODE_BGNLOOP:
+   case OPCODE_CAL:
+   case OPCODE_ELSE:
+   case OPCODE_ENDLOOP:
+   case OPCODE_IF:
+      debug_assert(num_dst == 0);
+      ureg_label_insn( ureg,
+                       translate_opcode( inst->op ),
+                       src, num_src,
+                       get_label( t, 
+                                  inst->op == OPCODE_CAL ? inst->function->sig_id : 0 ));
+      return;
+
+   case OPCODE_TEX:
+   case OPCODE_TXB:
+   case OPCODE_TXD:
+   case OPCODE_TXL:
+   case OPCODE_TXP:
+      src[num_src++] = t->samplers[inst->sampler];
+      ureg_tex_insn( ureg,
+                     translate_opcode( inst->op ),
+                     dst, num_dst, 
+                     translate_texture_target( inst->tex_target,
+                                               inst->tex_shadow ),
+                     src, num_src );
+      return;
+
+   case OPCODE_SCS:
+      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
+      ureg_insn( ureg, 
+                 translate_opcode( inst->op ), 
+                 dst, num_dst, 
+                 src, num_src );
+      break;
+
+   case OPCODE_XPD:
+      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
+      ureg_insn( ureg, 
+                 translate_opcode( inst->op ), 
+                 dst, num_dst, 
+                 src, num_src );
+      break;
+
+   case OPCODE_NOISE1:
+   case OPCODE_NOISE2:
+   case OPCODE_NOISE3:
+   case OPCODE_NOISE4:
+      /* At some point, a motivated person could add a better
+       * implementation of noise.  Currently not even the nvidia
+       * binary drivers do anything more than this.  In any case, the
+       * place to do this is in the GL state tracker, not the poor
+       * driver.
+       */
+      ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
+      break;
+		 
+   case OPCODE_DDY:
+      // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c
+      assert(!"OPCODE_DDY");
+      //emit_ddy( t, dst[0], &inst->src[0] );
+      break;
+
+   default:
+      ureg_insn( ureg, 
+                 translate_opcode( inst->op ), 
+                 dst, num_dst, 
+                 src, num_src );
+      break;
+   }
+}
+
+/**
+ * Emit the TGSI instructions to adjust the WPOS pixel center convention
+ * Basically, add (adjX, adjY) to the fragment position.
+ */
+static void
+emit_adjusted_wpos( struct st_translate *t,
+                    const struct gl_program *program,
+                    GLfloat adjX, GLfloat adjY)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
+   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+
+   /* Note that we bias X and Y and pass Z and W through unchanged.
+    * The shader might also use gl_FragCoord.w and .z.
+    */
+   ureg_ADD(ureg, wpos_temp, wpos_input,
+            ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
+
+   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+}
+
+
+/**
+ * Emit the TGSI instructions for inverting the WPOS y coordinate.
+ * This code is unavoidable because it also depends on whether
+ * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
+ */
+static void
+emit_wpos_inversion( struct st_translate *t,
+                     const struct gl_program *program,
+                     boolean invert)
+{
+   struct ureg_program *ureg = t->ureg;
+
+   /* Fragment program uses fragment position input.
+    * Need to replace instances of INPUT[WPOS] with temp T
+    * where T = INPUT[WPOS] by y is inverted.
+    */
+   static const gl_state_index wposTransformState[STATE_LENGTH]
+      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 
+          (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
+   
+   /* XXX: note we are modifying the incoming shader here!  Need to
+    * do this before emitting the constant decls below, or this
+    * will be missed:
+    */
+   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
+                                                       wposTransformState);
+
+   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
+   struct ureg_dst wpos_temp;
+   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+
+   /* MOV wpos_temp, input[wpos]
+    */
+   if (wpos_input.File == TGSI_FILE_TEMPORARY)
+      wpos_temp = ureg_dst(wpos_input);
+   else {
+      wpos_temp = ureg_DECL_temporary( ureg );
+      ureg_MOV( ureg, wpos_temp, wpos_input );
+   }
+
+   if (invert) {
+      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
+       */
+      ureg_MAD( ureg,
+                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
+                wpos_input,
+                ureg_scalar(wpostrans, 0),
+                ureg_scalar(wpostrans, 1));
+   } else {
+      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
+       */
+      ureg_MAD( ureg,
+                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
+                wpos_input,
+                ureg_scalar(wpostrans, 2),
+                ureg_scalar(wpostrans, 3));
+   }
+
+   /* Use wpos_temp as position input from here on:
+    */
+   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+}
+
+
+/**
+ * Emit fragment position/ooordinate code.
+ */
+static void
+emit_wpos(struct st_context *st,
+          struct st_translate *t,
+          const struct gl_program *program,
+          struct ureg_program *ureg)
+{
+   const struct gl_fragment_program *fp =
+      (const struct gl_fragment_program *) program;
+   struct pipe_screen *pscreen = st->pipe->screen;
+   boolean invert = FALSE;
+
+   if (fp->OriginUpperLeft) {
+      /* Fragment shader wants origin in upper-left */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
+         /* the driver supports upper-left origin */
+      }
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
+         /* the driver supports lower-left origin, need to invert Y */
+         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+         invert = TRUE;
+      }
+      else
+         assert(0);
+   }
+   else {
+      /* Fragment shader wants origin in lower-left */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
+         /* the driver supports lower-left origin */
+         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
+         /* the driver supports upper-left origin, need to invert Y */
+         invert = TRUE;
+      else
+         assert(0);
+   }
+   
+   if (fp->PixelCenterInteger) {
+      /* Fragment shader wants pixel center integer */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
+         /* the driver supports pixel center integer */
+         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
+         /* the driver supports pixel center half integer, need to bias X,Y */
+         emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
+      else
+         assert(0);
+   }
+   else {
+      /* Fragment shader wants pixel center half integer */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
+         /* the driver supports pixel center half integer */
+      }
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
+         /* the driver supports pixel center integer, need to bias X,Y */
+         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+         emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
+      }
+      else
+         assert(0);
+   }
+
+   /* we invert after adjustment so that we avoid the MOV to temporary,
+    * and reuse the adjustment ADD instead */
+   emit_wpos_inversion(t, program, invert);
+}
+
+/**
+ * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
+ * \param program  the program to translate
+ * \param numInputs  number of input registers used
+ * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
+ *                      input indexes
+ * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
+ * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
+ *                            each input
+ * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
+ * \param numOutputs  number of output registers used
+ * \param outputMapping  maps Mesa fragment program outputs to TGSI
+ *                       generic outputs
+ * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
+ * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
+ *                             each output
+ *
+ * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
+ */
+extern "C" enum pipe_error
+st_translate_program(
+   struct gl_context *ctx,
+   uint procType,
+   struct ureg_program *ureg,
+   glsl_to_tgsi_visitor *program,
+   const struct gl_program *proginfo,
+   GLuint numInputs,
+   const GLuint inputMapping[],
+   const ubyte inputSemanticName[],
+   const ubyte inputSemanticIndex[],
+   const GLuint interpMode[],
+   GLuint numOutputs,
+   const GLuint outputMapping[],
+   const ubyte outputSemanticName[],
+   const ubyte outputSemanticIndex[],
+   boolean passthrough_edgeflags )
+{
+   struct st_translate translate, *t;
+   unsigned i;
+   enum pipe_error ret = PIPE_OK;
+
+   assert(numInputs <= Elements(t->inputs));
+   assert(numOutputs <= Elements(t->outputs));
+
+   t = &translate;
+   memset(t, 0, sizeof *t);
+
+   t->procType = procType;
+   t->inputMapping = inputMapping;
+   t->outputMapping = outputMapping;
+   t->ureg = ureg;
+   t->pointSizeOutIndex = -1;
+   t->prevInstWrotePointSize = GL_FALSE;
+
+   /*_mesa_print_program(program);*/
+
+   /*
+    * Declare input attributes.
+    */
+   if (procType == TGSI_PROCESSOR_FRAGMENT) {
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_fs_input(ureg,
+                                           inputSemanticName[i],
+                                           inputSemanticIndex[i],
+                                           interpMode[i]);
+      }
+
+      if (proginfo->InputsRead & FRAG_BIT_WPOS) {
+         /* Must do this after setting up t->inputs, and before
+          * emitting constant references, below:
+          */
+          printf("FRAG_BIT_WPOS\n");
+          emit_wpos(st_context(ctx), t, proginfo, ureg);
+      }
+
+      if (proginfo->InputsRead & FRAG_BIT_FACE) {
+         // TODO: uncomment
+         printf("FRAG_BIT_FACE\n");
+         //emit_face_var( t, program );
+      }
+
+      /*
+       * Declare output attributes.
+       */
+      for (i = 0; i < numOutputs; i++) {
+         switch (outputSemanticName[i]) {
+         case TGSI_SEMANTIC_POSITION:
+            t->outputs[i] = ureg_DECL_output( ureg,
+                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
+                                              outputSemanticIndex[i] );
+
+            t->outputs[i] = ureg_writemask( t->outputs[i],
+                                            TGSI_WRITEMASK_Z );
+            break;
+         case TGSI_SEMANTIC_STENCIL:
+            t->outputs[i] = ureg_DECL_output( ureg,
+                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
+                                              outputSemanticIndex[i] );
+            t->outputs[i] = ureg_writemask( t->outputs[i],
+                                            TGSI_WRITEMASK_Y );
+            break;
+         case TGSI_SEMANTIC_COLOR:
+            t->outputs[i] = ureg_DECL_output( ureg,
+                                              TGSI_SEMANTIC_COLOR,
+                                              outputSemanticIndex[i] );
+            break;
+         default:
+            debug_assert(0);
+            return PIPE_ERROR_BAD_INPUT;
+         }
+      }
+   }
+   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_gs_input(ureg,
+                                           i,
+                                           inputSemanticName[i],
+                                           inputSemanticIndex[i]);
+      }
+
+      for (i = 0; i < numOutputs; i++) {
+         t->outputs[i] = ureg_DECL_output( ureg,
+                                           outputSemanticName[i],
+                                           outputSemanticIndex[i] );
+      }
+   }
+   else {
+      assert(procType == TGSI_PROCESSOR_VERTEX);
+
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
+      }
+
+      for (i = 0; i < numOutputs; i++) {
+         t->outputs[i] = ureg_DECL_output( ureg,
+                                           outputSemanticName[i],
+                                           outputSemanticIndex[i] );
+         if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
+            /* Writing to the point size result register requires special
+             * handling to implement clamping.
+             */
+            static const gl_state_index pointSizeClampState[STATE_LENGTH]
+               = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
+               /* XXX: note we are modifying the incoming shader here!  Need to
+               * do this before emitting the constant decls below, or this
+               * will be missed.
+               * XXX: depends on "Parameters" field specific to Mesa IR
+               */
+            unsigned pointSizeClampConst =
+               _mesa_add_state_reference(proginfo->Parameters,
+                                         pointSizeClampState);
+            struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
+            t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
+            t->pointSizeResult = t->outputs[i];
+            t->pointSizeOutIndex = i;
+            t->outputs[i] = psizregtemp;
+         }
+      }
+      /*if (passthrough_edgeflags)
+         emit_edgeflags( t, program ); */ // TODO: uncomment
+   }
+
+   /* Declare address register.
+    */
+   if (program->num_address_regs > 0) {
+      debug_assert( program->num_address_regs == 1 );
+      t->address[0] = ureg_DECL_address( ureg );
+   }
+
+   /* Declare misc input registers
+    */
+   {
+      GLbitfield sysInputs = proginfo->SystemValuesRead;
+      unsigned numSys = 0;
+      for (i = 0; sysInputs; i++) {
+         if (sysInputs & (1 << i)) {
+            unsigned semName = mesa_sysval_to_semantic[i];
+            t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
+            numSys++;
+            sysInputs &= ~(1 << i);
+         }
+      }
+   }
+
+   if (program->indirect_addr_temps) {
+      /* If temps are accessed with indirect addressing, declare temporaries
+       * in sequential order.  Else, we declare them on demand elsewhere.
+       * (Note: the number of temporaries is equal to program->next_temp)
+       */
+      for (i = 0; i < (unsigned)program->next_temp; i++) {
+         /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
+         t->temps[i] = ureg_DECL_temporary( t->ureg );
+      }
+   }
+
+   /* Emit constants and immediates.  Mesa uses a single index space
+    * for these, so we put all the translated regs in t->constants.
+    * XXX: this entire if block depends on proginfo->Parameters from Mesa IR
+    */
+   if (proginfo->Parameters) {
+      t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] );
+      if (t->constants == NULL) {
+         ret = PIPE_ERROR_OUT_OF_MEMORY;
+         goto out;
+      }
+
+      for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
+         switch (proginfo->Parameters->Parameters[i].Type) {
+         case PROGRAM_ENV_PARAM:
+         case PROGRAM_LOCAL_PARAM:
+         case PROGRAM_STATE_VAR:
+         case PROGRAM_NAMED_PARAM:
+         case PROGRAM_UNIFORM:
+            t->constants[i] = ureg_DECL_constant( ureg, i );
+            break;
+
+            /* Emit immediates only when there's no indirect addressing of
+             * the const buffer.
+             * FIXME: Be smarter and recognize param arrays:
+             * indirect addressing is only valid within the referenced
+             * array.
+             */
+         case PROGRAM_CONSTANT:
+            if (program->indirect_addr_consts)
+               t->constants[i] = ureg_DECL_constant( ureg, i );
+            else
+               t->constants[i] = 
+                  ureg_DECL_immediate( ureg,
+                                       proginfo->Parameters->ParameterValues[i],
+                                       4 );
+            break;
+         default:
+            break;
+         }
+      }
+   }
+
+   /* texture samplers */
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+      // XXX: depends on SamplersUsed property generated by conversion to Mesa IR
+      if (proginfo->SamplersUsed & (1 << i)) {
+         t->samplers[i] = ureg_DECL_sampler( ureg, i );
+      }
+   }
+
+   /* Emit each instruction in turn:
+    */
+   foreach_iter(exec_list_iterator, iter, program->instructions) {
+      set_insn_start( t, ureg_get_instruction_number( ureg ));
+      compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() );
+
+      if (t->prevInstWrotePointSize && proginfo->Id) {
+         /* The previous instruction wrote to the (fake) vertex point size
+          * result register.  Now we need to clamp that value to the min/max
+          * point size range, putting the result into the real point size
+          * register.
+          * Note that we can't do this easily at the end of program due to
+          * possible early return.
+          */
+         set_insn_start( t, ureg_get_instruction_number( ureg ));
+         ureg_MAX( t->ureg,
+                   ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
+                   ureg_src(t->outputs[t->pointSizeOutIndex]),
+                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+         ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
+                   ureg_src(t->outputs[t->pointSizeOutIndex]),
+                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+      }
+      t->prevInstWrotePointSize = GL_FALSE;
+   }
+
+   /* Fix up all emitted labels:
+    */
+   for (i = 0; i < t->labels_count; i++) {
+      ureg_fixup_label( ureg,
+                        t->labels[i].token,
+                        t->insn[t->labels[i].branch_target] );
+   }
+
+out:
+   FREE(t->insn);
+   FREE(t->labels);
+   FREE(t->constants);
+
+   if (t->error) {
+      debug_printf("%s: translate error flag set\n", __FUNCTION__);
+   }
+
+   return ret;
+}
+/* ----------------------------- End TGSI code ------------------------------ */
+
+/**
+ * Convert a shader's GLSL IR into both a Mesa gl_program and a TGSI shader.
+ */
+static struct gl_program *
+get_mesa_program(struct gl_context *ctx,
+                 struct gl_shader_program *shader_program,
+        	 struct gl_shader *shader)
+{
+   glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
+   struct prog_instruction *mesa_instructions, *mesa_inst;
+   ir_instruction **mesa_instruction_annotation;
+   int i;
+   struct gl_program *prog;
+   GLenum target;
+   const char *target_string;
+   GLboolean progress;
+   struct gl_shader_compiler_options *options =
+         &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
+
+   switch (shader->Type) {
+   case GL_VERTEX_SHADER:
+      target = GL_VERTEX_PROGRAM_ARB;
+      target_string = "vertex";
+      break;
+   case GL_FRAGMENT_SHADER:
+      target = GL_FRAGMENT_PROGRAM_ARB;
+      target_string = "fragment";
+      break;
+   case GL_GEOMETRY_SHADER:
+      target = GL_GEOMETRY_PROGRAM_NV;
+      target_string = "geometry";
+      break;
+   default:
+      assert(!"should not be reached");
+      return NULL;
+   }
+
+   validate_ir_tree(shader->ir);
+
+   prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
+   if (!prog)
+      return NULL;
+   prog->Parameters = _mesa_new_parameter_list();
+   prog->Varying = _mesa_new_parameter_list();
+   prog->Attributes = _mesa_new_parameter_list();
+   v->ctx = ctx;
+   v->prog = prog;
+   v->shader_program = shader_program;
+   v->options = options;
+
+   add_uniforms_to_parameters_list(shader_program, shader, prog);
+
+   /* Emit Mesa IR for main(). */
+   visit_exec_list(shader->ir, v);
+   v->emit(NULL, OPCODE_END);
+
+   /* Now emit bodies for any functions that were used. */
+   do {
+      progress = GL_FALSE;
+
+      foreach_iter(exec_list_iterator, iter, v->function_signatures) {
+         function_entry *entry = (function_entry *)iter.get();
+
+         if (!entry->bgn_inst) {
+            v->current_function = entry;
+
+            entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB);
+            entry->bgn_inst->function = entry;
+
+            visit_exec_list(&entry->sig->body, v);
+
+            glsl_to_tgsi_instruction *last;
+            last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
+            if (last->op != OPCODE_RET)
+               v->emit(NULL, OPCODE_RET);
+
+            glsl_to_tgsi_instruction *end;
+            end = v->emit(NULL, OPCODE_ENDSUB);
+            end->function = entry;
+
+            progress = GL_TRUE;
+         }
+      }
+   } while (progress);
+
+#if 0
+   /* Print out some information (for debugging purposes) used by the 
+    * optimization passes. */
+   for (i=0; i < v->next_temp; i++) {
+      int fr = v->get_first_temp_read(i);
+      int fw = v->get_first_temp_write(i);
+      int lr = v->get_last_temp_read(i);
+      int lw = v->get_last_temp_write(i);
+      
+      printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
+      assert(fw <= fr);
+   }
+#endif
+
+   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
+   v->copy_propagate();
+   v->eliminate_dead_code();
+   v->merge_registers();
+   v->renumber_registers();
+
+   prog->NumTemporaries = v->next_temp;
+
+   int num_instructions = 0;
+   foreach_iter(exec_list_iterator, iter, v->instructions) {
+      num_instructions++;
+   }
+
+   mesa_instructions =
+      (struct prog_instruction *)calloc(num_instructions,
+        				sizeof(*mesa_instructions));
+   mesa_instruction_annotation = ralloc_array(v->mem_ctx, ir_instruction *,
+        				      num_instructions);
+
+   /* Convert glsl_to_tgsi_instructions into Mesa IR prog_instructions.
+    * TODO: remove
+    */
+   mesa_inst = mesa_instructions;
+   i = 0;
+   foreach_iter(exec_list_iterator, iter, v->instructions) {
+      const glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      mesa_inst->Opcode = inst->op;
+      mesa_inst->CondUpdate = inst->cond_update;
+      if (inst->saturate)
+         mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
+      mesa_inst->DstReg.File = inst->dst.file;
+      mesa_inst->DstReg.Index = inst->dst.index;
+      mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
+      mesa_inst->DstReg.WriteMask = inst->dst.writemask;
+      mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
+      mesa_inst->SrcReg[0] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[0]);
+      mesa_inst->SrcReg[1] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[1]);
+      mesa_inst->SrcReg[2] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[2]);
+      mesa_inst->TexSrcUnit = inst->sampler;
+      mesa_inst->TexSrcTarget = inst->tex_target;
+      mesa_inst->TexShadow = inst->tex_shadow;
+      mesa_instruction_annotation[i] = inst->ir;
+
+      /* Set IndirectRegisterFiles. */
+      if (mesa_inst->DstReg.RelAddr)
+         prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
+
+      /* Update program's bitmask of indirectly accessed register files */
+      for (unsigned src = 0; src < 3; src++)
+         if (mesa_inst->SrcReg[src].RelAddr)
+            prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
+
+      if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
+         fail_link(shader_program, "Couldn't flatten if statement\n");
+      }
+
+      switch (mesa_inst->Opcode) {
+      case OPCODE_BGNSUB:
+         inst->function->inst = i;
+         mesa_inst->Comment = strdup(inst->function->sig->function_name());
+         break;
+      case OPCODE_ENDSUB:
+         mesa_inst->Comment = strdup(inst->function->sig->function_name());
+         break;
+      case OPCODE_CAL:
+         mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
+         break;
+      case OPCODE_ARL:
+         prog->NumAddressRegs = 1;
+         break;
+      default:
+         break;
+      }
+
+      mesa_inst++;
+      i++;
+
+      if (!shader_program->LinkStatus)
+         break;
+   }
+
+   if (!shader_program->LinkStatus) {
+      free(mesa_instructions);
+      _mesa_reference_program(ctx, &shader->Program, NULL);
+      return NULL;
+   }
+
+   set_branchtargets(v, mesa_instructions, num_instructions);
+
+   if (ctx->Shader.Flags & GLSL_DUMP) {
+      printf("\n");
+      printf("GLSL IR for linked %s program %d:\n", target_string,
+             shader_program->Name);
+      _mesa_print_ir(shader->ir, NULL);
+      printf("\n");
+      printf("\n");
+      printf("Mesa IR for linked %s program %d:\n", target_string,
+             shader_program->Name);
+      print_program(mesa_instructions, mesa_instruction_annotation,
+        	    num_instructions);
+   }
+
+   prog->Instructions = mesa_instructions;
+   prog->NumInstructions = num_instructions;
+
+   do_set_program_inouts(shader->ir, prog);
+   count_resources(prog);
+
+   check_resources(ctx, shader_program, prog);
+
+   _mesa_reference_program(ctx, &shader->Program, prog);
+
+   if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
+      _mesa_optimize_program(ctx, prog);
+   }
+   
+   struct st_vertex_program *stvp;
+   struct st_fragment_program *stfp;
+   struct st_geometry_program *stgp;
+   
+   switch (shader->Type) {
+   case GL_VERTEX_SHADER:
+      stvp = (struct st_vertex_program *)prog;
+      stvp->glsl_to_tgsi = v;
+      break;
+   case GL_FRAGMENT_SHADER:
+      stfp = (struct st_fragment_program *)prog;
+      stfp->glsl_to_tgsi = v;
+      break;
+   case GL_GEOMETRY_SHADER:
+      stgp = (struct st_geometry_program *)prog;
+      stgp->glsl_to_tgsi = v;
+      break;
+   default:
+      assert(!"should not be reached");
+      return NULL;
+   }
+
+   return prog;
+}
+
+extern "C" {
+
+struct gl_shader *
+st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
+{
+   struct gl_shader *shader;
+   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
+          type == GL_GEOMETRY_SHADER_ARB);
+   shader = rzalloc(NULL, struct gl_shader);
+   if (shader) {
+      shader->Type = type;
+      shader->Name = name;
+      _mesa_init_shader(ctx, shader);
+   }
+   return shader;
+}
+
+struct gl_shader_program *
+st_new_shader_program(struct gl_context *ctx, GLuint name)
+{
+   struct gl_shader_program *shProg;
+   shProg = rzalloc(NULL, struct gl_shader_program);
+   if (shProg) {
+      shProg->Name = name;
+      _mesa_init_shader_program(ctx, shProg);
+   }
+   return shProg;
+}
+
+/**
+ * Link a shader.
+ * Called via ctx->Driver.LinkShader()
+ * This actually involves converting GLSL IR into Mesa gl_programs with
+ * code lowering and other optimizations.
+ */
+GLboolean
+st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   assert(prog->LinkStatus);
+
+   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+      if (prog->_LinkedShaders[i] == NULL)
+         continue;
+
+      bool progress;
+      exec_list *ir = prog->_LinkedShaders[i]->ir;
+      const struct gl_shader_compiler_options *options =
+            &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
+
+      do {
+         progress = false;
+
+         /* Lowering */
+         do_mat_op_to_vec(ir);
+         lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+        			 | LOG_TO_LOG2
+        			 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
+
+         progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
+
+         progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
+
+         progress = lower_quadop_vector(ir, true) || progress;
+
+         if (options->EmitNoIfs) {
+            progress = lower_discard(ir) || progress;
+            progress = lower_if_to_cond_assign(ir) || progress;
+         }
+
+         if (options->EmitNoNoise)
+            progress = lower_noise(ir) || progress;
+
+         /* If there are forms of indirect addressing that the driver
+          * cannot handle, perform the lowering pass.
+          */
+         if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
+             || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
+           progress =
+             lower_variable_index_to_cond_assign(ir,
+        					 options->EmitNoIndirectInput,
+        					 options->EmitNoIndirectOutput,
+        					 options->EmitNoIndirectTemp,
+        					 options->EmitNoIndirectUniform)
+             || progress;
+
+         progress = do_vec_index_to_cond_assign(ir) || progress;
+      } while (progress);
+
+      validate_ir_tree(ir);
+   }
+
+   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+      struct gl_program *linked_prog;
+
+      if (prog->_LinkedShaders[i] == NULL)
+         continue;
+
+      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+
+      if (linked_prog) {
+         bool ok = true;
+
+         switch (prog->_LinkedShaders[i]->Type) {
+         case GL_VERTEX_SHADER:
+            _mesa_reference_vertprog(ctx, &prog->VertexProgram,
+                                     (struct gl_vertex_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
+                                                 linked_prog);
+            break;
+         case GL_FRAGMENT_SHADER:
+            _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
+                                     (struct gl_fragment_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
+                                                 linked_prog);
+            break;
+         case GL_GEOMETRY_SHADER:
+            _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
+                                     (struct gl_geometry_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
+                                                 linked_prog);
+            break;
+         }
+         if (!ok) {
+            return GL_FALSE;
+         }
+      }
+
+      _mesa_reference_program(ctx, &linked_prog, NULL);
+   }
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Link a GLSL shader program.  Called via glLinkProgram().
+ */
+void
+st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   unsigned int i;
+
+   _mesa_clear_shader_program_data(ctx, prog);
+
+   prog->LinkStatus = GL_TRUE;
+
+   for (i = 0; i < prog->NumShaders; i++) {
+      if (!prog->Shaders[i]->CompileStatus) {
+         fail_link(prog, "linking with uncompiled shader");
+         prog->LinkStatus = GL_FALSE;
+      }
+   }
+
+   prog->Varying = _mesa_new_parameter_list();
+   _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
+   _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
+   _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
+
+   if (prog->LinkStatus) {
+      link_shaders(ctx, prog);
+   }
+
+   if (prog->LinkStatus) {
+      if (!ctx->Driver.LinkShader(ctx, prog)) {
+         prog->LinkStatus = GL_FALSE;
+      }
+   }
+
+   set_uniform_initializers(ctx, prog);
+
+   if (ctx->Shader.Flags & GLSL_DUMP) {
+      if (!prog->LinkStatus) {
+         printf("GLSL shader program %d failed to link\n", prog->Name);
+      }
+
+      if (prog->InfoLog && prog->InfoLog[0] != 0) {
+         printf("GLSL shader program %d info log:\n", prog->Name);
+         printf("%s\n", prog->InfoLog);
+      }
+   }
+}
+
+} /* extern "C" */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
new file mode 100644
index 00000000000..e21c0d1e0af
--- /dev/null
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2011 Bryan Cain
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "main/glheader.h"
+#include "tgsi/tgsi_ureg.h"
+
+struct gl_context;
+struct gl_shader;
+struct gl_shader_program;
+struct glsl_to_tgsi_visitor;
+
+enum pipe_error st_translate_program(
+   struct gl_context *ctx,
+   uint procType,
+   struct ureg_program *ureg,
+   struct glsl_to_tgsi_visitor *program,
+   const struct gl_program *proginfo,
+   GLuint numInputs,
+   const GLuint inputMapping[],
+   const ubyte inputSemanticName[],
+   const ubyte inputSemanticIndex[],
+   const GLuint interpMode[],
+   GLuint numOutputs,
+   const GLuint outputMapping[],
+   const ubyte outputSemanticName[],
+   const ubyte outputSemanticIndex[],
+   boolean passthrough_edgeflags);
+
+void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
+
+struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
+
+struct gl_shader_program *
+st_new_shader_program(struct gl_context *ctx, GLuint name);
+
+void st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
+GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index a41e5b16a85..75842286ba8 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -267,7 +267,7 @@ src_register( struct st_translate *t,
 /**
  * Map mesa texture target to TGSI texture target.
  */
-static unsigned
+unsigned
 translate_texture_target( GLuint textarget,
                           GLboolean shadow )
 {
@@ -511,7 +511,7 @@ static void emit_ddy( struct st_translate *t,
 
 
-static unsigned
+unsigned
 translate_opcode( unsigned op )
 {
    switch( op ) {
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h
index 0615e52ef62..0dbdf5f6159 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.h
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h
@@ -64,6 +64,12 @@ st_translate_mesa_program(
 void
 st_free_tokens(const struct tgsi_token *tokens);
 
+unsigned
+translate_opcode(unsigned op);
+
+unsigned
+translate_texture_target(GLuint textarget, GLboolean shadow);
+
 
 #if defined __cplusplus
 } /* extern "C" */
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 7a6d33d3fea..dd618424d66 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -174,8 +174,8 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp)
  * \param tokensOut  destination for TGSI tokens
  * \return  pointer to cached pipe_shader object.
  */
-static void
-st_prepare_vertex_program(struct st_context *st,
+void
+st_prepare_vertex_program(struct gl_context *ctx,
                             struct st_vertex_program *stvp)
 {
    GLuint attr;
@@ -184,7 +184,7 @@ st_prepare_vertex_program(struct st_context *st,
    stvp->num_outputs = 0;
 
    if (stvp->Base.IsPositionInvariant)
-      _mesa_insert_mvp_code(st->ctx, &stvp->Base);
+      _mesa_insert_mvp_code(ctx, &stvp->Base);
 
    assert(stvp->Base.Base.NumInstructions > 1);
 
@@ -292,7 +292,7 @@ st_translate_vertex_program(struct st_context *st,
    enum pipe_error error;
    unsigned num_outputs;
 
-   st_prepare_vertex_program( st, stvp );
+   st_prepare_vertex_program(st->ctx, stvp);
 
    _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
    _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
@@ -318,22 +318,41 @@ st_translate_vertex_program(struct st_context *st,
       debug_printf("\n");
    }
 
-   error = st_translate_mesa_program(st->ctx,
-                                     TGSI_PROCESSOR_VERTEX,
-                                     ureg,
-                                     &stvp->Base.Base,
-                                     /* inputs */
-                                     vpv->num_inputs,
-                                     stvp->input_to_index,
-                                     NULL, /* input semantic name */
-                                     NULL, /* input semantic index */
-                                     NULL,
-                                     /* outputs */
-                                     num_outputs,
-                                     stvp->result_to_output,
-                                     stvp->output_semantic_name,
-                                     stvp->output_semantic_index,
-                                     key->passthrough_edgeflags );
+   if (stvp->glsl_to_tgsi)
+      error = st_translate_program(st->ctx,
+                                   TGSI_PROCESSOR_VERTEX,
+                                   ureg,
+                                   stvp->glsl_to_tgsi,
+                                   &stvp->Base.Base,
+                                   /* inputs */
+                                   stvp->num_inputs,
+                                   stvp->input_to_index,
+                                   NULL, /* input semantic name */
+                                   NULL, /* input semantic index */
+                                   NULL, /* interp mode */
+                                   /* outputs */
+                                   stvp->num_outputs,
+                                   stvp->result_to_output,
+                                   stvp->output_semantic_name,
+                                   stvp->output_semantic_index,
+                                   key->passthrough_edgeflags );
+   else
+      error = st_translate_mesa_program(st->ctx,
+                                        TGSI_PROCESSOR_VERTEX,
+                                        ureg,
+                                        &stvp->Base.Base,
+                                        /* inputs */
+                                        vpv->num_inputs,
+                                        stvp->input_to_index,
+                                        NULL, /* input semantic name */
+                                        NULL, /* input semantic index */
+                                        NULL,
+                                        /* outputs */
+                                        num_outputs,
+                                        stvp->result_to_output,
+                                        stvp->output_semantic_name,
+                                        stvp->output_semantic_index,
+                                        key->passthrough_edgeflags );
 
    if (error)
       goto fail;
@@ -393,6 +412,151 @@ st_get_vp_variant(struct st_context *st,
    return vpv;
 }
 
+/**
+ * Translate Mesa fragment shader attributes to TGSI attributes.
+ * \return GL_TRUE if color output should be written to all render targets, 
+ *         GL_FALSE if not
+ */
+GLboolean
+st_prepare_fragment_program(struct gl_context *ctx,
+                            struct st_fragment_program *stfp)
+{
+   GLuint attr;
+   const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
+   GLboolean write_all = GL_FALSE;
+
+   /*
+    * Convert Mesa program inputs to TGSI input register semantics.
+    */
+   for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
+      if (inputsRead & (1 << attr)) {
+         const GLuint slot = stfp->num_inputs++;
+
+         stfp->input_to_index[attr] = slot;
+
+         switch (attr) {
+         case FRAG_ATTRIB_WPOS:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            break;
+         case FRAG_ATTRIB_COL0:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            break;
+         case FRAG_ATTRIB_COL1:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            stfp->input_semantic_index[slot] = 1;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            break;
+         case FRAG_ATTRIB_FOGC:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
+            break;
+         case FRAG_ATTRIB_FACE:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_CONSTANT;
+            break;
+            /* In most cases, there is nothing special about these
+             * inputs, so adopt a convention to use the generic
+             * semantic name and the mesa FRAG_ATTRIB_ number as the
+             * index. 
+             * 
+             * All that is required is that the vertex shader labels
+             * its own outputs similarly, and that the vertex shader
+             * generates at least every output required by the
+             * fragment shader plus fixed-function hardware (such as
+             * BFC).
+             * 
+             * There is no requirement that semantic indexes start at
+             * zero or be restricted to a particular range -- nobody
+             * should be building tables based on semantic index.
+             */
+         case FRAG_ATTRIB_PNTC:
+         case FRAG_ATTRIB_TEX0:
+         case FRAG_ATTRIB_TEX1:
+         case FRAG_ATTRIB_TEX2:
+         case FRAG_ATTRIB_TEX3:
+         case FRAG_ATTRIB_TEX4:
+         case FRAG_ATTRIB_TEX5:
+         case FRAG_ATTRIB_TEX6:
+         case FRAG_ATTRIB_TEX7:
+         case FRAG_ATTRIB_VAR0:
+         default:
+            /* Actually, let's try and zero-base this just for
+             * readability of the generated TGSI.
+             */
+            assert(attr >= FRAG_ATTRIB_TEX0);
+            stfp->input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0);
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+            if (attr == FRAG_ATTRIB_PNTC)
+               stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            else
+               stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
+            break;
+         }
+      }
+      else {
+         stfp->input_to_index[attr] = -1;
+      }
+   }
+
+   /*
+    * Semantics and mapping for outputs
+    */
+   {
+      uint numColors = 0;
+      GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;
+
+      /* if z is written, emit that first */
+      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+         stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_POSITION;
+         stfp->output_semantic_index[stfp->num_outputs] = 0;
+         stfp->result_to_output[FRAG_RESULT_DEPTH] = stfp->num_outputs;
+         stfp->num_outputs++;
+         outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
+      }
+
+      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+         stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_STENCIL;
+         stfp->output_semantic_index[stfp->num_outputs] = 0;
+         stfp->result_to_output[FRAG_RESULT_STENCIL] = stfp->num_outputs;
+         stfp->num_outputs++;
+         outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
+      }
+
+      /* handle remaning outputs (color) */
+      for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
+         if (outputsWritten & BITFIELD64_BIT(attr)) {
+            switch (attr) {
+            case FRAG_RESULT_DEPTH:
+            case FRAG_RESULT_STENCIL:
+               /* handled above */
+               assert(0);
+               break;
+            case FRAG_RESULT_COLOR:
+               write_all = GL_TRUE; /* fallthrough */
+            default:
+               assert(attr == FRAG_RESULT_COLOR ||
+                      (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
+               stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_COLOR;
+               stfp->output_semantic_index[stfp->num_outputs] = numColors;
+               stfp->result_to_output[attr] = stfp->num_outputs;
+               numColors++;
+               break;
+            }
+
+            stfp->num_outputs++;
+         }
+      }
+   }
+   
+   return write_all;
+}
+
 
 /**
  * Translate a Mesa fragment shader into a TGSI shader using extra info in
@@ -445,155 +609,12 @@ st_translate_fragment_program(struct st_context *st,
 
    if (!stfp->tgsi.tokens) {
       /* need to translate Mesa instructions to TGSI now */
-      GLuint outputMapping[FRAG_RESULT_MAX];
-      GLuint inputMapping[FRAG_ATTRIB_MAX];
-      GLuint interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
-      GLuint attr;
       enum pipe_error error;
-      const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
       struct ureg_program *ureg;
-      GLboolean write_all = GL_FALSE;
-
-      ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
-      ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
-      uint fs_num_inputs = 0;
-
-      ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
-      ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
-      uint fs_num_outputs = 0;
-
-
+      GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp);
+      
       _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
 
-      /*
-       * Convert Mesa program inputs to TGSI input register semantics.
-       */
-      for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
-         if (inputsRead & (1 << attr)) {
-            const GLuint slot = fs_num_inputs++;
-
-            inputMapping[attr] = slot;
-
-            switch (attr) {
-            case FRAG_ATTRIB_WPOS:
-               input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               break;
-            case FRAG_ATTRIB_COL0:
-               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               break;
-            case FRAG_ATTRIB_COL1:
-               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-               input_semantic_index[slot] = 1;
-               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               break;
-            case FRAG_ATTRIB_FOGC:
-               input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
-               break;
-            case FRAG_ATTRIB_FACE:
-               input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
-               break;
-               /* In most cases, there is nothing special about these
-                * inputs, so adopt a convention to use the generic
-                * semantic name and the mesa FRAG_ATTRIB_ number as the
-                * index. 
-                * 
-                * All that is required is that the vertex shader labels
-                * its own outputs similarly, and that the vertex shader
-                * generates at least every output required by the
-                * fragment shader plus fixed-function hardware (such as
-                * BFC).
-                * 
-                * There is no requirement that semantic indexes start at
-                * zero or be restricted to a particular range -- nobody
-                * should be building tables based on semantic index.
-                */
-            case FRAG_ATTRIB_PNTC:
-            case FRAG_ATTRIB_TEX0:
-            case FRAG_ATTRIB_TEX1:
-            case FRAG_ATTRIB_TEX2:
-            case FRAG_ATTRIB_TEX3:
-            case FRAG_ATTRIB_TEX4:
-            case FRAG_ATTRIB_TEX5:
-            case FRAG_ATTRIB_TEX6:
-            case FRAG_ATTRIB_TEX7:
-            case FRAG_ATTRIB_VAR0:
-            default:
-               /* Actually, let's try and zero-base this just for
-                * readability of the generated TGSI.
-                */
-               assert(attr >= FRAG_ATTRIB_TEX0);
-               input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0);
-               input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-               if (attr == FRAG_ATTRIB_PNTC)
-                  interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               else
-                  interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
-               break;
-            }
-         }
-         else {
-            inputMapping[attr] = -1;
-         }
-      }
-
-      /*
-       * Semantics and mapping for outputs
-       */
-      {
-         uint numColors = 0;
-         GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;
-
-         /* if z is written, emit that first */
-         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
-            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
-            fs_output_semantic_index[fs_num_outputs] = 0;
-            outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
-            fs_num_outputs++;
-            outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
-         }
-
-         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
-            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
-            fs_output_semantic_index[fs_num_outputs] = 0;
-            outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
-            fs_num_outputs++;
-            outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
-         }
-
-         /* handle remaning outputs (color) */
-         for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
-            if (outputsWritten & BITFIELD64_BIT(attr)) {
-               switch (attr) {
-               case FRAG_RESULT_DEPTH:
-               case FRAG_RESULT_STENCIL:
-                  /* handled above */
-                  assert(0);
-                  break;
-               case FRAG_RESULT_COLOR:
-                  write_all = GL_TRUE; /* fallthrough */
-               default:
-                  assert(attr == FRAG_RESULT_COLOR ||
-                         (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
-                  fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
-                  fs_output_semantic_index[fs_num_outputs] = numColors;
-                  outputMapping[attr] = fs_num_outputs;
-                  numColors++;
-                  break;
-               }
-
-               fs_num_outputs++;
-            }
-         }
-      }
-
       ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
       if (ureg == NULL)
          return NULL;
@@ -606,21 +627,39 @@ st_translate_fragment_program(struct st_context *st,
       if (write_all == GL_TRUE)
          ureg_property_fs_color0_writes_all_cbufs(ureg, 1);
 
-      error = st_translate_mesa_program(st->ctx,
-                                        TGSI_PROCESSOR_FRAGMENT,
-                                        ureg,
-                                        &stfp->Base.Base,
-                                        /* inputs */
-                                        fs_num_inputs,
-                                        inputMapping,
-                                        input_semantic_name,
-                                        input_semantic_index,
-                                        interpMode,
-                                        /* outputs */
-                                        fs_num_outputs,
-                                        outputMapping,
-                                        fs_output_semantic_name,
-                                        fs_output_semantic_index, FALSE );
+      if (stfp->glsl_to_tgsi)
+         error = st_translate_program(st->ctx,
+                                      TGSI_PROCESSOR_FRAGMENT,
+                                      ureg,
+                                      stfp->glsl_to_tgsi,
+                                      &stfp->Base.Base,
+                                      /* inputs */
+                                      stfp->num_inputs,
+                                      stfp->input_to_index,
+                                      stfp->input_semantic_name,
+                                      stfp->input_semantic_index,
+                                      stfp->interp_mode,
+                                      /* outputs */
+                                      stfp->num_outputs,
+                                      stfp->result_to_output,
+                                      stfp->output_semantic_name,
+                                      stfp->output_semantic_index, FALSE );
+      else
+         error = st_translate_mesa_program(st->ctx,
+                                           TGSI_PROCESSOR_FRAGMENT,
+                                           ureg,
+                                           &stfp->Base.Base,
+                                           /* inputs */
+                                           stfp->num_inputs,
+                                           stfp->input_to_index,
+                                           stfp->input_semantic_name,
+                                           stfp->input_semantic_index,
+                                           stfp->interp_mode,
+                                           /* outputs */
+                                           stfp->num_outputs,
+                                           stfp->result_to_output,
+                                           stfp->output_semantic_name,
+                                           stfp->output_semantic_index, FALSE );
 
       stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL );
       ureg_destroy( ureg );
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index c4244df939e..67723de6d53 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -38,6 +38,7 @@
 #include "program/program.h"
 #include "pipe/p_state.h"
 #include "st_context.h"
+#include "st_glsl_to_tgsi.h"
 
 
 /** Fragment program variant key */
@@ -83,6 +84,22 @@ struct st_fp_variant
 struct st_fragment_program
 {
    struct gl_fragment_program Base;
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+   
+   /** maps a Mesa FRAG_ATTRIB_x to a packed TGSI input index */
+   GLuint input_to_index[FRAG_ATTRIB_MAX];
+   /** maps a TGSI input index back to a Mesa FRAG_ATTRIB_x */
+   GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
+   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
+   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+   GLuint num_inputs;
+   GLuint interp_mode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
+
+   /** Maps FRAG_RESULT_x to slot */
+   GLuint result_to_output[FRAG_RESULT_MAX];
+   ubyte output_semantic_name[FRAG_RESULT_MAX];
+   ubyte output_semantic_index[FRAG_RESULT_MAX];
+   GLuint num_outputs;
 
    struct pipe_shader_state tgsi;
 
@@ -136,6 +153,7 @@ struct st_vp_variant
 struct st_vertex_program
 {
    struct gl_vertex_program Base;  /**< The Mesa vertex program */
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
    /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */
    GLuint input_to_index[VERT_ATTRIB_MAX];
@@ -184,6 +202,7 @@ struct st_gp_variant
 struct st_geometry_program
 {
    struct gl_geometry_program Base;  /**< The Mesa geometry program */
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
    /** map GP input back to VP output */
    GLuint input_map[PIPE_MAX_SHADER_INPUTS];
@@ -276,6 +295,14 @@ st_get_gp_variant(struct st_context *st,
                   const struct st_gp_variant_key *key);
 
 
+extern void
+st_prepare_vertex_program(struct gl_context *ctx,
+                          struct st_vertex_program *stvp);
+
+extern GLboolean
+st_prepare_fragment_program(struct gl_context *ctx,
+                            struct st_fragment_program *stfp);
+
 
 extern void
 st_release_vp_variants( struct st_context *st,
-- 
cgit v1.2.3


From 1e5fd8e480b661c1ab748c2ded587650ea7f3d20 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 29 Apr 2011 19:00:24 -0500
Subject: mesa: fix segfault when no Mesa IR is generated

---
 src/mesa/program/program.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 78efca9f122..224446a2683 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -388,8 +388,9 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog)
    if (prog->String)
       free(prog->String);
 
-   _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
-
+   if (prog->Instructions) {
+      _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
+   }
    if (prog->Parameters) {
       _mesa_free_parameter_list(prog->Parameters);
    }
-- 
cgit v1.2.3


From 44867da3543ca54ef245695cef72a6e305451d93 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 29 Apr 2011 19:24:57 -0500
Subject: glsl_to_tgsi: stop generating Mesa IR

Before, it was still generating unused Mesa IR as a remnant of ir_to_mesa, and
depended on some of the information from it.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 290 +++--------------------------
 src/mesa/state_tracker/st_program.c        |  13 +-
 2 files changed, 33 insertions(+), 270 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e1102503ee0..c562abc96c9 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -254,8 +254,9 @@ public:
    struct gl_shader_compiler_options *options;
 
    int next_temp;
-   
+
    int num_address_regs;
+   int samplers_used;
    bool indirect_addr_temps;
    bool indirect_addr_consts;
 
@@ -2310,170 +2311,23 @@ extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
    delete v;
 }
 
-static struct prog_src_register
-mesa_st_src_reg_from_ir_st_src_reg(st_src_reg reg)
-{
-   struct prog_src_register mesa_reg;
-
-   mesa_reg.File = reg.file;
-   assert(reg.index < (1 << INST_INDEX_BITS));
-   mesa_reg.Index = reg.index;
-   mesa_reg.Swizzle = reg.swizzle;
-   mesa_reg.RelAddr = reg.reladdr != NULL;
-   mesa_reg.Negate = reg.negate;
-   mesa_reg.Abs = 0;
-   mesa_reg.HasIndex2 = GL_FALSE;
-   mesa_reg.RelAddr2 = 0;
-   mesa_reg.Index2 = 0;
-
-   return mesa_reg;
-}
-
-static void
-set_branchtargets(glsl_to_tgsi_visitor *v,
-        	  struct prog_instruction *mesa_instructions,
-        	  int num_instructions)
-{
-   int if_count = 0, loop_count = 0;
-   int *if_stack, *loop_stack;
-   int if_stack_pos = 0, loop_stack_pos = 0;
-   int i, j;
-
-   for (i = 0; i < num_instructions; i++) {
-      switch (mesa_instructions[i].Opcode) {
-      case OPCODE_IF:
-         if_count++;
-         break;
-      case OPCODE_BGNLOOP:
-         loop_count++;
-         break;
-      case OPCODE_BRK:
-      case OPCODE_CONT:
-         mesa_instructions[i].BranchTarget = -1;
-         break;
-      default:
-         break;
-      }
-   }
-
-   if_stack = rzalloc_array(v->mem_ctx, int, if_count);
-   loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
-
-   for (i = 0; i < num_instructions; i++) {
-      switch (mesa_instructions[i].Opcode) {
-      case OPCODE_IF:
-         if_stack[if_stack_pos] = i;
-         if_stack_pos++;
-         break;
-      case OPCODE_ELSE:
-         mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
-         if_stack[if_stack_pos - 1] = i;
-         break;
-      case OPCODE_ENDIF:
-         mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
-         if_stack_pos--;
-         break;
-      case OPCODE_BGNLOOP:
-         loop_stack[loop_stack_pos] = i;
-         loop_stack_pos++;
-         break;
-      case OPCODE_ENDLOOP:
-         loop_stack_pos--;
-         /* Rewrite any breaks/conts at this nesting level (haven't
-          * already had a BranchTarget assigned) to point to the end
-          * of the loop.
-          */
-         for (j = loop_stack[loop_stack_pos]; j < i; j++) {
-            if (mesa_instructions[j].Opcode == OPCODE_BRK ||
-        	mesa_instructions[j].Opcode == OPCODE_CONT) {
-               if (mesa_instructions[j].BranchTarget == -1) {
-        	  mesa_instructions[j].BranchTarget = i;
-               }
-            }
-         }
-         /* The loop ends point at each other. */
-         mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
-         mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
-         break;
-      case OPCODE_CAL:
-         foreach_iter(exec_list_iterator, iter, v->function_signatures) {
-            function_entry *entry = (function_entry *)iter.get();
-
-            if (entry->sig_id == mesa_instructions[i].BranchTarget) {
-               mesa_instructions[i].BranchTarget = entry->inst;
-               break;
-            }
-         }
-         break;
-      default:
-         break;
-      }
-   }
-}
-
-static void
-print_program(struct prog_instruction *mesa_instructions,
-              ir_instruction **mesa_instruction_annotation,
-              int num_instructions)
-{
-   /*ir_instruction *last_ir = NULL;*/
-   int i;
-   int indent = 0;
-
-   for (i = 0; i < num_instructions; i++) {
-      struct prog_instruction *mesa_inst = mesa_instructions + i;
-
-      fprintf(stdout, "%3d: ", i);
-
-#if 0
-/* Disable this for now, since printing GLSL IR along with its corresponding 
- * Mesa IR makes the Mesa IR unreadable. */
-      ir_instruction *ir = mesa_instruction_annotation[i];
-      if (last_ir != ir && ir) {
-         int j;
-
-         for (j = 0; j < indent; j++) {
-            fprintf(stdout, " ");
-         }
-         ir->print();
-         printf("\n");
-         last_ir = ir;
-
-         fprintf(stdout, "     "); /* line number spacing. */
-      }
-#endif
-
-      indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
-        				    PROG_PRINT_DEBUG, NULL);
-   }
-}
-
 
 /**
  * Count resources used by the given gpu program (number of texture
  * samplers, etc).
  */
 static void
-count_resources(struct gl_program *prog)
+count_resources(glsl_to_tgsi_visitor *v)
 {
-   unsigned int i;
+   v->samplers_used = 0;
 
-   prog->SamplersUsed = 0;
-
-   for (i = 0; i < prog->NumInstructions; i++) {
-      struct prog_instruction *inst = &prog->Instructions[i];
+   foreach_iter(exec_list_iterator, iter, v->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
 
-      if (_mesa_is_tex_instruction(inst->Opcode)) {
-         prog->SamplerTargets[inst->TexSrcUnit] =
-            (gl_texture_index)inst->TexSrcTarget;
-         prog->SamplersUsed |= 1 << inst->TexSrcUnit;
-         if (inst->TexShadow) {
-            prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
-         }
+      if (_mesa_is_tex_instruction(inst->op)) {
+         v->samplers_used |= 1 << inst->sampler;
       }
    }
-
-   _mesa_update_shader_textures_used(prog);
 }
 
 
@@ -2487,34 +2341,35 @@ count_resources(struct gl_program *prog)
 static void
 check_resources(const struct gl_context *ctx,
                 struct gl_shader_program *shader_program,
-                struct gl_program *prog)
+                glsl_to_tgsi_visitor *prog,
+                struct gl_program *proginfo)
 {
-   switch (prog->Target) {
+   switch (proginfo->Target) {
    case GL_VERTEX_PROGRAM_ARB:
-      if (_mesa_bitcount(prog->SamplersUsed) >
+      if (_mesa_bitcount(prog->samplers_used) >
           ctx->Const.MaxVertexTextureImageUnits) {
          fail_link(shader_program, "Too many vertex shader texture samplers");
       }
-      if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
+      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
          fail_link(shader_program, "Too many vertex shader constants");
       }
       break;
    case MESA_GEOMETRY_PROGRAM:
-      if (_mesa_bitcount(prog->SamplersUsed) >
+      if (_mesa_bitcount(prog->samplers_used) >
           ctx->Const.MaxGeometryTextureImageUnits) {
          fail_link(shader_program, "Too many geometry shader texture samplers");
       }
-      if (prog->Parameters->NumParameters >
+      if (proginfo->Parameters->NumParameters >
           MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
          fail_link(shader_program, "Too many geometry shader constants");
       }
       break;
    case GL_FRAGMENT_PROGRAM_ARB:
-      if (_mesa_bitcount(prog->SamplersUsed) >
+      if (_mesa_bitcount(prog->samplers_used) >
           ctx->Const.MaxTextureImageUnits) {
          fail_link(shader_program, "Too many fragment shader texture samplers");
       }
-      if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
+      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
          fail_link(shader_program, "Too many fragment shader constants");
       }
       break;
@@ -3767,8 +3622,6 @@ st_translate_program(
    t->pointSizeOutIndex = -1;
    t->prevInstWrotePointSize = GL_FALSE;
 
-   /*_mesa_print_program(program);*/
-
    /*
     * Declare input attributes.
     */
@@ -3952,8 +3805,7 @@ st_translate_program(
 
    /* texture samplers */
    for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
-      // XXX: depends on SamplersUsed property generated by conversion to Mesa IR
-      if (proginfo->SamplersUsed & (1 << i)) {
+      if (program->samplers_used & (1 << i)) {
          t->samplers[i] = ureg_DECL_sampler( ureg, i );
       }
    }
@@ -4006,7 +3858,8 @@ out:
 /* ----------------------------- End TGSI code ------------------------------ */
 
 /**
- * Convert a shader's GLSL IR into both a Mesa gl_program and a TGSI shader.
+ * Convert a shader's GLSL IR into a Mesa gl_program, although without 
+ * generating Mesa IR.
  */
 static struct gl_program *
 get_mesa_program(struct gl_context *ctx,
@@ -4014,9 +3867,6 @@ get_mesa_program(struct gl_context *ctx,
         	 struct gl_shader *shader)
 {
    glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
-   struct prog_instruction *mesa_instructions, *mesa_inst;
-   ir_instruction **mesa_instruction_annotation;
-   int i;
    struct gl_program *prog;
    GLenum target;
    const char *target_string;
@@ -4110,90 +3960,6 @@ get_mesa_program(struct gl_context *ctx,
    v->merge_registers();
    v->renumber_registers();
 
-   prog->NumTemporaries = v->next_temp;
-
-   int num_instructions = 0;
-   foreach_iter(exec_list_iterator, iter, v->instructions) {
-      num_instructions++;
-   }
-
-   mesa_instructions =
-      (struct prog_instruction *)calloc(num_instructions,
-        				sizeof(*mesa_instructions));
-   mesa_instruction_annotation = ralloc_array(v->mem_ctx, ir_instruction *,
-        				      num_instructions);
-
-   /* Convert glsl_to_tgsi_instructions into Mesa IR prog_instructions.
-    * TODO: remove
-    */
-   mesa_inst = mesa_instructions;
-   i = 0;
-   foreach_iter(exec_list_iterator, iter, v->instructions) {
-      const glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
-
-      mesa_inst->Opcode = inst->op;
-      mesa_inst->CondUpdate = inst->cond_update;
-      if (inst->saturate)
-         mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
-      mesa_inst->DstReg.File = inst->dst.file;
-      mesa_inst->DstReg.Index = inst->dst.index;
-      mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
-      mesa_inst->DstReg.WriteMask = inst->dst.writemask;
-      mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
-      mesa_inst->SrcReg[0] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[0]);
-      mesa_inst->SrcReg[1] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[1]);
-      mesa_inst->SrcReg[2] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[2]);
-      mesa_inst->TexSrcUnit = inst->sampler;
-      mesa_inst->TexSrcTarget = inst->tex_target;
-      mesa_inst->TexShadow = inst->tex_shadow;
-      mesa_instruction_annotation[i] = inst->ir;
-
-      /* Set IndirectRegisterFiles. */
-      if (mesa_inst->DstReg.RelAddr)
-         prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
-
-      /* Update program's bitmask of indirectly accessed register files */
-      for (unsigned src = 0; src < 3; src++)
-         if (mesa_inst->SrcReg[src].RelAddr)
-            prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
-
-      if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
-         fail_link(shader_program, "Couldn't flatten if statement\n");
-      }
-
-      switch (mesa_inst->Opcode) {
-      case OPCODE_BGNSUB:
-         inst->function->inst = i;
-         mesa_inst->Comment = strdup(inst->function->sig->function_name());
-         break;
-      case OPCODE_ENDSUB:
-         mesa_inst->Comment = strdup(inst->function->sig->function_name());
-         break;
-      case OPCODE_CAL:
-         mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
-         break;
-      case OPCODE_ARL:
-         prog->NumAddressRegs = 1;
-         break;
-      default:
-         break;
-      }
-
-      mesa_inst++;
-      i++;
-
-      if (!shader_program->LinkStatus)
-         break;
-   }
-
-   if (!shader_program->LinkStatus) {
-      free(mesa_instructions);
-      _mesa_reference_program(ctx, &shader->Program, NULL);
-      return NULL;
-   }
-
-   set_branchtargets(v, mesa_instructions, num_instructions);
-
    if (ctx->Shader.Flags & GLSL_DUMP) {
       printf("\n");
       printf("GLSL IR for linked %s program %d:\n", target_string,
@@ -4201,25 +3967,17 @@ get_mesa_program(struct gl_context *ctx,
       _mesa_print_ir(shader->ir, NULL);
       printf("\n");
       printf("\n");
-      printf("Mesa IR for linked %s program %d:\n", target_string,
-             shader_program->Name);
-      print_program(mesa_instructions, mesa_instruction_annotation,
-        	    num_instructions);
    }
 
-   prog->Instructions = mesa_instructions;
-   prog->NumInstructions = num_instructions;
+   prog->Instructions = NULL;
+   prog->NumInstructions = 0;
 
    do_set_program_inouts(shader->ir, prog);
-   count_resources(prog);
+   count_resources(v);
 
-   check_resources(ctx, shader_program, prog);
+   check_resources(ctx, shader_program, v, prog);
 
    _mesa_reference_program(ctx, &shader->Program, prog);
-
-   if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
-      _mesa_optimize_program(ctx, prog);
-   }
    
    struct st_vertex_program *stvp;
    struct st_fragment_program *stfp;
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index dd618424d66..6d395128295 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -186,7 +186,8 @@ st_prepare_vertex_program(struct gl_context *ctx,
    if (stvp->Base.IsPositionInvariant)
       _mesa_insert_mvp_code(ctx, &stvp->Base);
 
-   assert(stvp->Base.Base.NumInstructions > 1);
+   if (!stvp->glsl_to_tgsi)
+      assert(stvp->Base.Base.NumInstructions > 1);
 
    /*
     * Determine number of inputs, the mappings between VERT_ATTRIB_x
@@ -294,8 +295,11 @@ st_translate_vertex_program(struct st_context *st,
 
    st_prepare_vertex_program(st->ctx, stvp);
 
-   _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
-   _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
+   if (!stvp->glsl_to_tgsi)
+   {
+      _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
+      _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
+   }
 
    ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
    if (ureg == NULL) {
@@ -613,7 +617,8 @@ st_translate_fragment_program(struct st_context *st,
       struct ureg_program *ureg;
       GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp);
       
-      _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
+      if (!stfp->glsl_to_tgsi)
+         _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
 
       ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
       if (ureg == NULL)
-- 
cgit v1.2.3


From c341d3cfd0ddbabf6274212b7f0da1a25854a673 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 30 Apr 2011 13:03:33 -0500
Subject: glsl_to_tgsi: remove reads to output registers

Fixes a regression in 0 A.D. introduced by 809a11c77073e999fd47.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 91 ++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index c562abc96c9..5ea03b4424e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -100,6 +100,15 @@ public:
       this->reladdr = NULL;
    }
 
+   st_src_reg(gl_register_file file, int index)
+   {
+      this->file = file;
+      this->index = index;
+      this->swizzle = SWIZZLE_XYZW;
+      this->negate = 0;
+      this->reladdr = NULL;
+   }
+
    st_src_reg()
    {
       this->file = PROGRAM_UNDEFINED;
@@ -346,6 +355,8 @@ public:
 
    bool process_move_condition(ir_rvalue *ir);
 
+   void remove_output_reads(gl_register_file type);
+
    void rename_temp_register(int index, int new_index);
    int get_first_temp_read(int index);
    int get_first_temp_write(int index);
@@ -2595,6 +2606,81 @@ set_uniform_initializers(struct gl_context *ctx,
    ralloc_free(mem_ctx);
 }
 
+/*
+ * Scan/rewrite program to remove reads of custom (output) registers.
+ * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
+ * (for vertex shaders).
+ * In GLSL shaders, varying vars can be read and written.
+ * On some hardware, trying to read an output register causes trouble.
+ * So, rewrite the program to use a temporary register in this case.
+ * 
+ * Based on _mesa_remove_output_reads from programopt.c.
+ */
+void
+glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
+{
+   GLuint i;
+   GLint outputMap[VERT_RESULT_MAX];
+   GLuint numVaryingReads = 0;
+   GLboolean usedTemps[MAX_PROGRAM_TEMPS];
+   GLuint firstTemp = 0;
+
+   _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
+                             usedTemps, MAX_PROGRAM_TEMPS);
+
+   assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
+   assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
+
+   for (i = 0; i < VERT_RESULT_MAX; i++)
+      outputMap[i] = -1;
+
+   /* look for instructions which read from varying vars */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      const GLuint numSrc = _mesa_num_inst_src_regs(inst->op);
+      GLuint j;
+      for (j = 0; j < numSrc; j++) {
+         if (inst->src[j].file == type) {
+            /* replace the read with a temp reg */
+            const GLuint var = inst->src[j].index;
+            if (outputMap[var] == -1) {
+               numVaryingReads++;
+               outputMap[var] = _mesa_find_free_register(usedTemps,
+                                                         MAX_PROGRAM_TEMPS,
+                                                         firstTemp);
+               firstTemp = outputMap[var] + 1;
+            }
+            inst->src[j].file = PROGRAM_TEMPORARY;
+            inst->src[j].index = outputMap[var];
+         }
+      }
+   }
+
+   if (numVaryingReads == 0)
+      return; /* nothing to be done */
+
+   /* look for instructions which write to the varying vars identified above */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
+         /* change inst to write to the temp reg, instead of the varying */
+         inst->dst.file = PROGRAM_TEMPORARY;
+         inst->dst.index = outputMap[inst->dst.index];
+      }
+   }
+   
+   /* insert new MOV instructions at the end */
+   for (i = 0; i < VERT_RESULT_MAX; i++) {
+      if (outputMap[i] >= 0) {
+         /* MOV VAR[i], TEMP[tmp]; */
+         st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]);
+         st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW);
+         dst.index = i;
+         this->emit(NULL, OPCODE_MOV, dst, src);
+      }
+   }
+}
+
 /* Replaces all references to a temporary register index with another index. */
 void
 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
@@ -3954,6 +4040,11 @@ get_mesa_program(struct gl_context *ctx,
    }
 #endif
 
+   /* Remove reads to output registers, and to varyings in vertex shaders. */
+   v->remove_output_reads(PROGRAM_OUTPUT);
+   if (target == GL_VERTEX_PROGRAM_ARB)
+      v->remove_output_reads(PROGRAM_VARYING);
+
    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
    v->copy_propagate();
    v->eliminate_dead_code();
-- 
cgit v1.2.3


From 556bd82ce1227a568d69dfa0c22841986267d39f Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 30 Apr 2011 13:44:32 -0500
Subject: glsl_to_tgsi: remove a bad assertion

It was triggered by Alien Arena.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5ea03b4424e..aa63539e5e8 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3077,11 +3077,11 @@ glsl_to_tgsi_visitor::merge_registers(void)
    
    /* Start looking for registers with non-overlapping usages that can be 
     * merged together. */
-   for (i=0; i < this->next_temp - 1; i++) {
+   for (i=0; i < this->next_temp; i++) {
       /* Don't touch unused registers. */
       if (last_reads[i] < 0 || first_writes[i] < 0) continue;
       
-      for (j=i+1; j < this->next_temp; j++) {
+      for (j=0; j < this->next_temp; j++) {
          /* Don't touch unused registers. */
          if (last_reads[j] < 0 || first_writes[j] < 0) continue;
          
@@ -3089,8 +3089,9 @@ glsl_to_tgsi_visitor::merge_registers(void)
           * in the same instruction as the last read from i.  Note that the 
           * register at index i will always be used earlier or at the same time 
           * as the register at index j. */
-         assert(first_writes[i] <= first_writes[j]);
-         if (last_reads[i] <= first_writes[j]) {
+         if (first_writes[i] <= first_writes[j] && 
+             last_reads[i] <= first_writes[j])
+         {
             rename_temp_register(j, i); /* Replace all references to j with i.*/
             
             /* Update the first_writes and last_reads arrays with the new 
-- 
cgit v1.2.3


From 5768ed6429937940bd48f5de4f8383273952880a Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 30 Apr 2011 21:17:38 -0500
Subject: glsl_to_tgsi: define the sampler objects used

Fixes the Nexuiz title screen and the water in 0 A.D.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index aa63539e5e8..5f3f0ba295a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2328,7 +2328,7 @@ extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
  * samplers, etc).
  */
 static void
-count_resources(glsl_to_tgsi_visitor *v)
+count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
 {
    v->samplers_used = 0;
 
@@ -2337,8 +2337,17 @@ count_resources(glsl_to_tgsi_visitor *v)
 
       if (_mesa_is_tex_instruction(inst->op)) {
          v->samplers_used |= 1 << inst->sampler;
+
+         prog->SamplerTargets[inst->sampler] =
+            (gl_texture_index)inst->tex_target;
+         if (inst->tex_shadow) {
+            prog->ShadowSamplers |= 1 << inst->sampler;
+         }
       }
    }
+   
+   prog->SamplersUsed = v->samplers_used;
+   _mesa_update_shader_textures_used(prog);
 }
 
 
@@ -4065,7 +4074,7 @@ get_mesa_program(struct gl_context *ctx,
    prog->NumInstructions = 0;
 
    do_set_program_inouts(shader->ir, prog);
-   count_resources(v);
+   count_resources(v, prog);
 
    check_resources(ctx, shader_program, v, prog);
 
-- 
cgit v1.2.3


From a6705aa5ca151278ed1e596b68a327afd1405b9e Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 30 Apr 2011 23:17:11 -0500
Subject: glsl_to_tgsi: lower noise opcodes when converting from GLSL IR, not
 when generating TGSI

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5f3f0ba295a..08c6a7b2dd3 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1275,12 +1275,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_noise: {
-      const enum prog_opcode opcode =
-         prog_opcode(OPCODE_NOISE1
-        	     + (ir->operands[0]->type->vector_elements) - 1);
-      assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
-
-      emit(ir, opcode, result_dst, op[0]);
+      /* At some point, a motivated person could add a better
+       * implementation of noise.  Currently not even the nvidia
+       * binary drivers do anything more than this.  In any case, the
+       * place to do this is in the GL state tracker, not the poor
+       * driver.
+       */
+      emit(ir, OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
       break;
    }
 
@@ -3484,13 +3485,7 @@ compile_tgsi_instruction(struct st_translate *t,
    case OPCODE_NOISE2:
    case OPCODE_NOISE3:
    case OPCODE_NOISE4:
-      /* At some point, a motivated person could add a better
-       * implementation of noise.  Currently not even the nvidia
-       * binary drivers do anything more than this.  In any case, the
-       * place to do this is in the GL state tracker, not the poor
-       * driver.
-       */
-      ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
+      assert(!"OPCODE_NOISE should have been lowered\n");
       break;
 		 
    case OPCODE_DDY:
-- 
cgit v1.2.3


From 3b0858f1aed83e2d90449f042d625c86ac7b93ed Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sun, 1 May 2011 11:55:03 -0500
Subject: glsl_to_tgsi: support DDY (ir_unop_dFdy)

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 08c6a7b2dd3..eed9bb0819e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1271,6 +1271,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       emit(ir, OPCODE_DDX, result_dst, op[0]);
       break;
    case ir_unop_dFdy:
+      op[0].negate = ~op[0].negate;
       emit(ir, OPCODE_DDY, result_dst, op[0]);
       break;
 
@@ -3487,12 +3488,6 @@ compile_tgsi_instruction(struct st_translate *t,
    case OPCODE_NOISE4:
       assert(!"OPCODE_NOISE should have been lowered\n");
       break;
-		 
-   case OPCODE_DDY:
-      // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c
-      assert(!"OPCODE_DDY");
-      //emit_ddy( t, dst[0], &inst->src[0] );
-      break;
 
    default:
       ureg_insn( ureg, 
-- 
cgit v1.2.3


From 56dc2c176c3ef0d4d5abea54ff4035b062262286 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sun, 1 May 2011 21:49:21 -0500
Subject: glsl_to_tgsi: use TGSI opcodes when converting from GLSL IR

Before, the translator used Mesa IR opcodes (a holdover from ir_to_mesa) and
converted them to TGSI opcodes during TGSI emission.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 550 ++++++++++++-----------------
 1 file changed, 217 insertions(+), 333 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index eed9bb0819e..4cb2f377e98 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -27,7 +27,7 @@
 /**
  * \file glsl_to_tgsi.cpp
  *
- * Translate GLSL IR to Mesa's gl_program representation and to TGSI.
+ * Translate GLSL IR to TGSI.
  */
 
 #include <stdio.h>
@@ -63,11 +63,12 @@ extern "C" {
 #include "pipe/p_state.h"
 #include "util/u_math.h"
 #include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
 #include "st_context.h"
 #include "st_program.h"
 #include "st_glsl_to_tgsi.h"
 #include "st_mesa_to_tgsi.h"
+}
 
 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
                            (1 << PROGRAM_ENV_PARAM) |    \
@@ -75,7 +76,6 @@ extern "C" {
                            (1 << PROGRAM_NAMED_PARAM) |  \
                            (1 << PROGRAM_CONSTANT) |     \
                            (1 << PROGRAM_UNIFORM))
-}
 
 class st_src_reg;
 class st_dst_reg;
@@ -83,8 +83,7 @@ class st_dst_reg;
 static int swizzle_for_size(int size);
 
 /**
- * This struct is a corresponding struct to Mesa prog_src_register, with
- * wider fields.
+ * This struct is a corresponding struct to TGSI ureg_src.
  */
 class st_src_reg {
 public:
@@ -190,7 +189,7 @@ public:
       return node;
    }
 
-   enum prog_opcode op;
+   unsigned op;
    st_dst_reg dst;
    st_src_reg src[3];
    /** Pointer to the ir source this tree came from for debugging */
@@ -201,7 +200,7 @@ public:
    int tex_target; /**< One of TEXTURE_*_INDEX */
    GLboolean tex_shadow;
 
-   class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
+   class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
 };
 
 class variable_storage : public exec_node {
@@ -317,15 +316,15 @@ public:
    /** List of glsl_to_tgsi_instruction */
    exec_list instructions;
 
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op);
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
 
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst, st_src_reg src0);
 
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst,
         		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
 
@@ -338,13 +337,13 @@ public:
                 st_src_reg src1,
                 unsigned elements);
 
-   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+   void emit_scalar(ir_instruction *ir, unsigned op,
         	    st_dst_reg dst, st_src_reg src0);
 
-   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+   void emit_scalar(ir_instruction *ir, unsigned op,
         	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 
-   void emit_scs(ir_instruction *ir, enum prog_opcode op,
+   void emit_scs(ir_instruction *ir, unsigned op,
         	 st_dst_reg dst, const st_src_reg &src);
 
    GLboolean try_emit_mad(ir_expression *ir,
@@ -405,8 +404,29 @@ swizzle_for_size(int size)
    return size_swizzles[size - 1];
 }
 
+static bool
+is_tex_instruction(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->is_tex;
+}
+
+static unsigned
+num_inst_dst_regs(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->num_dst;
+}
+
+static unsigned
+num_inst_src_regs(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->is_tex ? info->num_src - 1 : info->num_src;
+}
+
 glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
         		 st_dst_reg dst,
         		 st_src_reg src0, st_src_reg src1, st_src_reg src2)
 {
@@ -427,7 +447,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
    reladdr_to_temp(ir, &src0, &num_reladdr);
 
    if (dst.reladdr) {
-      emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
+      emit(ir, TGSI_OPCODE_ARL, address_reg, *dst.reladdr);
       num_reladdr--;
    }
    assert(num_reladdr == 0);
@@ -441,7 +461,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 
    inst->function = NULL;
    
-   if (op == OPCODE_ARL)
+   if (op == TGSI_OPCODE_ARL)
       this->num_address_regs = 1;
    
    /* Update indirect addressing status used by TGSI */
@@ -491,14 +511,14 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 
 
 glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
         		 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
 {
    return emit(ir, op, dst, src0, src1, undef_src);
 }
 
 glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
         		 st_dst_reg dst, st_src_reg src0)
 {
    assert(dst.writemask != 0);
@@ -506,7 +526,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 }
 
 glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op)
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
 {
    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 }
@@ -516,30 +536,30 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
         		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
         		    unsigned elements)
 {
-   static const gl_inst_opcode dot_opcodes[] = {
-      OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
+   static const unsigned dot_opcodes[] = {
+      TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
    };
 
    emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 }
 
 /**
- * Emits Mesa scalar opcodes to produce unique answers across channels.
+ * Emits TGSI scalar opcodes to produce unique answers across channels.
  *
- * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
+ * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
  * channel determines the result across all channels.  So to do a vec4
  * of this operation, we want to emit a scalar per source channel used
  * to produce dest channels.
  */
 void
-glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst,
         			st_src_reg orig_src0, st_src_reg orig_src1)
 {
    int i, j;
    int done_mask = ~dst.writemask;
 
-   /* Mesa RCP is a scalar operation splatting results to all channels,
+   /* TGSI RCP is a scalar operation splatting results to all channels,
     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
     * dst channels.
     */
@@ -577,7 +597,7 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 }
 
 void
-glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst, st_src_reg src0)
 {
    st_src_reg undef = undef_src;
@@ -588,21 +608,21 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 }
 
 /**
- * Emit an OPCODE_SCS instruction
+ * Emit an TGSI_OPCODE_SCS instruction
  *
- * The \c SCS opcode functions a bit differently than the other Mesa (or
- * ARB_fragment_program) opcodes.  Instead of splatting its result across all
- * four components of the destination, it writes one value to the \c x
- * component and another value to the \c y component.
+ * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
+ * Instead of splatting its result across all four components of the 
+ * destination, it writes one value to the \c x component and another value to 
+ * the \c y component.
  *
  * \param ir        IR instruction being processed
- * \param op        Either \c OPCODE_SIN or \c OPCODE_COS depending on which
- *                  value is desired.
+ * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 
+ *                  on which value is desired.
  * \param dst       Destination register
  * \param src       Source register
  */
 void
-glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
         		     st_dst_reg dst,
         		     const st_src_reg &src)
 {
@@ -613,12 +633,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
       return;
    }
 
-   const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
+   const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
    const unsigned scs_mask = (1U << component);
    int done_mask = ~dst.writemask;
    st_src_reg tmp;
 
-   assert(op == OPCODE_SIN || op == OPCODE_COS);
+   assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
 
    /* If there are compnents in the destination that differ from the component
     * that will be written by the SCS instrution, we'll need a temporary.
@@ -661,7 +681,7 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
 
          /* Emit the SCS instruction.
           */
-         inst = emit(ir, OPCODE_SCS, tmp_dst, src0);
+         inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
          inst->dst.writemask = scs_mask;
 
          /* Move the result of the SCS instruction to the desired location in
@@ -669,12 +689,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
           */
          tmp.swizzle = MAKE_SWIZZLE4(component, component,
         			     component, component);
-         inst = emit(ir, OPCODE_SCS, dst, tmp);
+         inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
          inst->dst.writemask = this_mask;
       } else {
          /* Emit the SCS instruction to write directly to the destination.
           */
-         glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0);
+         glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
          inst->dst.writemask = scs_mask;
       }
 
@@ -870,7 +890,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
          } else {
             st_src_reg src(PROGRAM_STATE_VAR, index, NULL);
             src.swizzle = slots[i].swizzle;
-            emit(ir, OPCODE_MOV, dst, src);
+            emit(ir, TGSI_OPCODE_MOV, dst, src);
             /* even a float takes up a whole vec4 reg in a struct/array. */
             dst.index++;
          }
@@ -903,7 +923,7 @@ glsl_to_tgsi_visitor::visit(ir_loop *ir)
       delete a;
    }
 
-   emit(NULL, OPCODE_BGNLOOP);
+   emit(NULL, TGSI_OPCODE_BGNLOOP);
 
    if (ir->to) {
       ir_expression *e =
@@ -936,7 +956,7 @@ glsl_to_tgsi_visitor::visit(ir_loop *ir)
       delete e;
    }
 
-   emit(NULL, OPCODE_ENDLOOP);
+   emit(NULL, TGSI_OPCODE_ENDLOOP);
 }
 
 void
@@ -944,10 +964,10 @@ glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
 {
    switch (ir->mode) {
    case ir_loop_jump::jump_break:
-      emit(NULL, OPCODE_BRK);
+      emit(NULL, TGSI_OPCODE_BRK);
       break;
    case ir_loop_jump::jump_continue:
-      emit(NULL, OPCODE_CONT);
+      emit(NULL, TGSI_OPCODE_CONT);
       break;
    }
 }
@@ -1000,7 +1020,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    c = this->result;
 
    this->result = get_temp(ir->type);
-   emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c);
+   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c);
 
    return true;
 }
@@ -1023,7 +1043,7 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
 
    this->result = get_temp(ir->type);
    glsl_to_tgsi_instruction *inst;
-   inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src);
+   inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src);
    inst->saturate = true;
 
    return true;
@@ -1036,135 +1056,18 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
    if (!reg->reladdr)
       return;
 
-   emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
+   emit(ir, TGSI_OPCODE_ARL, address_reg, *reg->reladdr);
 
    if (*num_reladdr != 1) {
       st_src_reg temp = get_temp(glsl_type::vec4_type);
 
-      emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg);
+      emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
       *reg = temp;
    }
 
    (*num_reladdr)--;
 }
 
-void
-glsl_to_tgsi_visitor::emit_swz(ir_expression *ir)
-{
-   /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
-    * This means that each of the operands is either an immediate value of -1,
-    * 0, or 1, or is a component from one source register (possibly with
-    * negation).
-    */
-   uint8_t components[4] = { 0 };
-   bool negate[4] = { false };
-   ir_variable *var = NULL;
-
-   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
-      ir_rvalue *op = ir->operands[i];
-
-      assert(op->type->is_scalar());
-
-      while (op != NULL) {
-         switch (op->ir_type) {
-         case ir_type_constant: {
-
-            assert(op->type->is_scalar());
-
-            const ir_constant *const c = op->as_constant();
-            if (c->is_one()) {
-               components[i] = SWIZZLE_ONE;
-            } else if (c->is_zero()) {
-               components[i] = SWIZZLE_ZERO;
-            } else if (c->is_negative_one()) {
-               components[i] = SWIZZLE_ONE;
-               negate[i] = true;
-            } else {
-               assert(!"SWZ constant must be 0.0 or 1.0.");
-            }
-
-            op = NULL;
-            break;
-         }
-
-         case ir_type_dereference_variable: {
-            ir_dereference_variable *const deref =
-               (ir_dereference_variable *) op;
-
-            assert((var == NULL) || (deref->var == var));
-            components[i] = SWIZZLE_X;
-            var = deref->var;
-            op = NULL;
-            break;
-         }
-
-         case ir_type_expression: {
-            ir_expression *const expr = (ir_expression *) op;
-
-            assert(expr->operation == ir_unop_neg);
-            negate[i] = true;
-
-            op = expr->operands[0];
-            break;
-         }
-
-         case ir_type_swizzle: {
-            ir_swizzle *const swiz = (ir_swizzle *) op;
-
-            components[i] = swiz->mask.x;
-            op = swiz->val;
-            break;
-         }
-
-         default:
-            assert(!"Should not get here.");
-            return;
-         }
-      }
-   }
-
-   assert(var != NULL);
-
-   ir_dereference_variable *const deref =
-      new(mem_ctx) ir_dereference_variable(var);
-
-   this->result.file = PROGRAM_UNDEFINED;
-   deref->accept(this);
-   if (this->result.file == PROGRAM_UNDEFINED) {
-      ir_print_visitor v;
-      printf("Failed to get tree for expression operand:\n");
-      deref->accept(&v);
-      exit(1);
-   }
-
-   st_src_reg src;
-
-   src = this->result;
-   src.swizzle = MAKE_SWIZZLE4(components[0],
-        		       components[1],
-        		       components[2],
-        		       components[3]);
-   src.negate = ((unsigned(negate[0]) << 0)
-        	 | (unsigned(negate[1]) << 1)
-        	 | (unsigned(negate[2]) << 2)
-        	 | (unsigned(negate[3]) << 3));
-
-   /* Storage for our result.  Ideally for an assignment we'd be using the
-    * actual storage for the result here, instead.
-    */
-   const st_src_reg result_src = get_temp(ir->type);
-   st_dst_reg result_dst = st_dst_reg(result_src);
-
-   /* Limit writes to the channels that will be used by result_src later.
-    * This does limit this temp's use as a temporary for multi-instruction
-    * sequences.
-    */
-   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
-
-   emit(ir, OPCODE_SWZ, result_dst, src);
-   this->result = result_src;
-}
-
 void
 glsl_to_tgsi_visitor::visit(ir_expression *ir)
 {
@@ -1173,7 +1076,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    st_src_reg result_src;
    st_dst_reg result_dst;
 
-   /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
+   /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
     */
    if (ir->operation == ir_binop_add) {
       if (try_emit_mad(ir, 1))
@@ -1184,10 +1087,8 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    if (try_emit_sat(ir))
       return;
 
-   if (ir->operation == ir_quadop_vector) {
-      this->emit_swz(ir);
-      return;
-   }
+   if (ir->operation == ir_quadop_vector)
+      assert(!"ir_quadop_vector should have been lowered");
 
    for (operand = 0; operand < ir->get_num_operands(); operand++) {
       this->result.file = PROGRAM_UNDEFINED;
@@ -1228,51 +1129,51 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
 
    switch (ir->operation) {
    case ir_unop_logic_not:
-      emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0));
+      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0));
       break;
    case ir_unop_neg:
       op[0].negate = ~op[0].negate;
       result_src = op[0];
       break;
    case ir_unop_abs:
-      emit(ir, OPCODE_ABS, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
       break;
    case ir_unop_sign:
-      emit(ir, OPCODE_SSG, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
       break;
    case ir_unop_rcp:
-      emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
       break;
 
    case ir_unop_exp2:
-      emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
       break;
    case ir_unop_exp:
    case ir_unop_log:
       assert(!"not reached: should be handled by ir_explog_to_explog2");
       break;
    case ir_unop_log2:
-      emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
       break;
    case ir_unop_sin:
-      emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
       break;
    case ir_unop_cos:
-      emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
       break;
    case ir_unop_sin_reduced:
-      emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
+      emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
       break;
    case ir_unop_cos_reduced:
-      emit_scs(ir, OPCODE_COS, result_dst, op[0]);
+      emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
       break;
 
    case ir_unop_dFdx:
-      emit(ir, OPCODE_DDX, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
       break;
    case ir_unop_dFdy:
       op[0].negate = ~op[0].negate;
-      emit(ir, OPCODE_DDY, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
       break;
 
    case ir_unop_noise: {
@@ -1282,19 +1183,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        * place to do this is in the GL state tracker, not the poor
        * driver.
        */
-      emit(ir, OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
+      emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
       break;
    }
 
    case ir_binop_add:
-      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
       break;
    case ir_binop_sub:
-      emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_mul:
-      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
       break;
    case ir_binop_div:
       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
@@ -1303,33 +1204,33 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
 
    case ir_binop_less:
-      emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
       break;
    case ir_binop_greater:
-      emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
       break;
    case ir_binop_lequal:
-      emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
       break;
    case ir_binop_gequal:
-      emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
       break;
    case ir_binop_equal:
-      emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
       break;
    case ir_binop_nequal:
-      emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       break;
    case ir_binop_all_equal:
       /* "==" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
          st_src_reg temp = get_temp(glsl_type::vec4_type);
-         emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
          emit_dp(ir, result_dst, temp, temp, vector_elements);
-         emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
+         emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
       } else {
-         emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
+         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
       }
       break;
    case ir_binop_any_nequal:
@@ -1337,11 +1238,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
          st_src_reg temp = get_temp(glsl_type::vec4_type);
-         emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
          emit_dp(ir, result_dst, temp, temp, vector_elements);
-         emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
       } else {
-         emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       }
       break;
 
@@ -1349,22 +1250,22 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       assert(ir->operands[0]->type->is_vector());
       emit_dp(ir, result_dst, op[0], op[0],
               ir->operands[0]->type->vector_elements);
-      emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
       break;
 
    case ir_binop_logic_xor:
-      emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_logic_or:
       /* This could be a saturated add and skip the SNE. */
-      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
-      emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
       break;
 
    case ir_binop_logic_and:
       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
-      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_dot:
@@ -1376,15 +1277,15 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
 
    case ir_unop_sqrt:
       /* sqrt(x) = x * rsq(x). */
-      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
-      emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
       /* For incoming channels <= 0, set the result to 0. */
       op[0].negate = ~op[0].negate;
-      emit(ir, OPCODE_CMP, result_dst,
+      emit(ir, TGSI_OPCODE_CMP, result_dst,
         		  op[0], result_src, st_src_reg_for_float(0.0));
       break;
    case ir_unop_rsq:
-      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
       break;
    case ir_unop_i2f:
    case ir_unop_b2f:
@@ -1393,36 +1294,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       result_src = op[0];
       break;
    case ir_unop_f2i:
-      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
       break;
    case ir_unop_f2b:
    case ir_unop_i2b:
-      emit(ir, OPCODE_SNE, result_dst,
+      emit(ir, TGSI_OPCODE_SNE, result_dst,
         		  op[0], st_src_reg_for_float(0.0));
       break;
    case ir_unop_trunc:
-      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
       break;
    case ir_unop_ceil:
       op[0].negate = ~op[0].negate;
-      emit(ir, OPCODE_FLR, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
       result_src.negate = ~result_src.negate;
       break;
    case ir_unop_floor:
-      emit(ir, OPCODE_FLR, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
       break;
    case ir_unop_fract:
-      emit(ir, OPCODE_FRC, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
       break;
 
    case ir_binop_min:
-      emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
       break;
    case ir_binop_max:
-      emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
       break;
    case ir_binop_pow:
-      emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
+      emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
       break;
 
    case ir_unop_bit_not:
@@ -1586,7 +1487,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
       } else {
          index_reg = get_temp(glsl_type::float_type);
 
-         emit(ir, OPCODE_MUL, st_dst_reg(index_reg),
+         emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
               this->result, st_src_reg_for_float(element_size));
       }
 
@@ -1728,9 +1629,9 @@ glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
 
    src_ir->accept(this);
 
-   /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
+   /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
-    * choose which value OPCODE_CMP produces without an extra instruction
+    * choose which value TGSI_OPCODE_CMP produces without an extra instruction
     * computing the condition.
     */
    if (negate)
@@ -1803,9 +1704,9 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
 
       for (i = 0; i < type_size(ir->lhs->type); i++) {
          if (switch_order) {
-            emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r);
+            emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r);
          } else {
-            emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l));
+            emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l));
          }
 
          l.index++;
@@ -1813,7 +1714,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       }
    } else {
       for (i = 0; i < type_size(ir->lhs->type); i++) {
-         emit(ir, OPCODE_MOV, l, r);
+         emit(ir, TGSI_OPCODE_MOV, l, r);
          l.index++;
          r.index++;
       }
@@ -1849,7 +1750,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          src = this->result;
 
          for (i = 0; i < (unsigned int)size; i++) {
-            emit(ir, OPCODE_MOV, temp, src);
+            emit(ir, TGSI_OPCODE_MOV, temp, src);
 
             src.index++;
             temp.index++;
@@ -1870,7 +1771,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          ir->array_elements[i]->accept(this);
          src = this->result;
          for (int j = 0; j < size; j++) {
-            emit(ir, OPCODE_MOV, temp, src);
+            emit(ir, TGSI_OPCODE_MOV, temp, src);
 
             src.index++;
             temp.index++;
@@ -1893,7 +1794,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
         					values,
         					ir->type->vector_elements,
         					&src.swizzle);
-         emit(ir, OPCODE_MOV, mat_column, src);
+         emit(ir, TGSI_OPCODE_MOV, mat_column, src);
 
          mat_column.index++;
       }
@@ -2005,7 +1906,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
          l.cond_mask = COND_TR;
 
          for (i = 0; i < type_size(param->type); i++) {
-            emit(ir, OPCODE_MOV, l, r);
+            emit(ir, TGSI_OPCODE_MOV, l, r);
             l.index++;
             r.index++;
          }
@@ -2016,7 +1917,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
    assert(!sig_iter.has_next());
 
    /* Emit call instruction */
-   call_inst = emit(ir, OPCODE_CAL);
+   call_inst = emit(ir, TGSI_OPCODE_CAL);
    call_inst->function = entry;
 
    /* Process out parameters. */
@@ -2041,7 +1942,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
          st_dst_reg l = st_dst_reg(this->result);
 
          for (i = 0; i < type_size(param->type); i++) {
-            emit(ir, OPCODE_MOV, l, r);
+            emit(ir, TGSI_OPCODE_MOV, l, r);
             l.index++;
             r.index++;
          }
@@ -2061,7 +1962,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
    st_src_reg result_src, coord, lod_info, projector, dx, dy;
    st_dst_reg result_dst, coord_dst;
    glsl_to_tgsi_instruction *inst = NULL;
-   prog_opcode opcode = OPCODE_NOP;
+   unsigned opcode = TGSI_OPCODE_NOP;
 
    ir->coordinate->accept(this);
 
@@ -2072,7 +1973,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
     */
    coord = get_temp(glsl_type::vec4_type);
    coord_dst = st_dst_reg(coord);
-   emit(ir, OPCODE_MOV, coord_dst, this->result);
+   emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
 
    if (ir->projector) {
       ir->projector->accept(this);
@@ -2087,20 +1988,20 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
 
    switch (ir->op) {
    case ir_tex:
-      opcode = OPCODE_TEX;
+      opcode = TGSI_OPCODE_TEX;
       break;
    case ir_txb:
-      opcode = OPCODE_TXB;
+      opcode = TGSI_OPCODE_TXB;
       ir->lod_info.bias->accept(this);
       lod_info = this->result;
       break;
    case ir_txl:
-      opcode = OPCODE_TXL;
+      opcode = TGSI_OPCODE_TXL;
       ir->lod_info.lod->accept(this);
       lod_info = this->result;
       break;
    case ir_txd:
-      opcode = OPCODE_TXD;
+      opcode = TGSI_OPCODE_TXD;
       ir->lod_info.grad.dPdx->accept(this);
       dx = this->result;
       ir->lod_info.grad.dPdy->accept(this);
@@ -2112,25 +2013,25 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
    }
 
    if (ir->projector) {
-      if (opcode == OPCODE_TEX) {
+      if (opcode == TGSI_OPCODE_TEX) {
          /* Slot the projector in as the last component of the coord. */
          coord_dst.writemask = WRITEMASK_W;
-         emit(ir, OPCODE_MOV, coord_dst, projector);
+         emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
          coord_dst.writemask = WRITEMASK_XYZW;
-         opcode = OPCODE_TXP;
+         opcode = TGSI_OPCODE_TXP;
       } else {
          st_src_reg coord_w = coord;
          coord_w.swizzle = SWIZZLE_WWWW;
 
          /* For the other TEX opcodes there's no projective version
-          * since the last slot is taken up by lod info.  Do the
+          * since the last slot is taken up by LOD info.  Do the
           * projective divide now.
           */
          coord_dst.writemask = WRITEMASK_W;
-         emit(ir, OPCODE_RCP, coord_dst, projector);
+         emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
 
          /* In the case where we have to project the coordinates "by hand,"
-          * the shadow comparitor value must also be projected.
+          * the shadow comparator value must also be projected.
           */
          st_src_reg tmp_src = coord;
          if (ir->shadow_comparitor) {
@@ -2143,42 +2044,42 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
             st_dst_reg tmp_dst = st_dst_reg(tmp_src);
 
             tmp_dst.writemask = WRITEMASK_Z;
-            emit(ir, OPCODE_MOV, tmp_dst, this->result);
+            emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
 
             tmp_dst.writemask = WRITEMASK_XY;
-            emit(ir, OPCODE_MOV, tmp_dst, coord);
+            emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
          }
 
          coord_dst.writemask = WRITEMASK_XYZ;
-         emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
+         emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
 
          coord_dst.writemask = WRITEMASK_XYZW;
          coord.swizzle = SWIZZLE_XYZW;
       }
    }
 
-   /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
-    * comparitor was put in the correct place (and projected) by the code,
+   /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
+    * comparator was put in the correct place (and projected) by the code,
     * above, that handles by-hand projection.
     */
-   if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
+   if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
       /* Slot the shadow value in as the second to last component of the
        * coord.
        */
       ir->shadow_comparitor->accept(this);
       coord_dst.writemask = WRITEMASK_Z;
-      emit(ir, OPCODE_MOV, coord_dst, this->result);
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
       coord_dst.writemask = WRITEMASK_XYZW;
    }
 
-   if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
-      /* Mesa IR stores lod or lod bias in the last channel of the coords. */
+   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) {
+      /* TGSI stores LOD or LOD bias in the last channel of the coords. */
       coord_dst.writemask = WRITEMASK_W;
-      emit(ir, OPCODE_MOV, coord_dst, lod_info);
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
       coord_dst.writemask = WRITEMASK_XYZW;
    }
 
-   if (opcode == OPCODE_TXD)
+   if (opcode == TGSI_OPCODE_TXD)
       inst = emit(ir, opcode, result_dst, coord, dx, dy);
    else
       inst = emit(ir, opcode, result_dst, coord);
@@ -2235,13 +2136,13 @@ glsl_to_tgsi_visitor::visit(ir_return *ir)
       l = st_dst_reg(current_function->return_reg);
 
       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
-         emit(ir, OPCODE_MOV, l, r);
+         emit(ir, TGSI_OPCODE_MOV, l, r);
          l.index++;
          r.index++;
       }
    }
 
-   emit(ir, OPCODE_RET);
+   emit(ir, TGSI_OPCODE_RET);
 }
 
 void
@@ -2252,9 +2153,9 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir)
    if (ir->condition) {
       ir->condition->accept(this);
       this->result.negate = ~this->result.negate;
-      emit(ir, OPCODE_KIL, undef_dst, this->result);
+      emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
    } else {
-      emit(ir, OPCODE_KIL_NV);
+      emit(ir, TGSI_OPCODE_KILP);
    }
 
    fp->UsesKill = GL_TRUE;
@@ -2280,14 +2181,14 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
        */
       if (cond_inst == prev_inst) {
          st_src_reg temp = get_temp(glsl_type::bool_type);
-         cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result);
+         cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
       }
       cond_inst->cond_update = GL_TRUE;
 
-      if_inst = emit(ir->condition, OPCODE_IF);
+      if_inst = emit(ir->condition, TGSI_OPCODE_IF);
       if_inst->dst.cond_mask = COND_NE;
    } else {
-      if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
+      if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
    }
 
    this->instructions.push_tail(if_inst);
@@ -2295,11 +2196,11 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
    visit_exec_list(&ir->then_instructions, this);
 
    if (!ir->else_instructions.is_empty()) {
-      else_inst = emit(ir->condition, OPCODE_ELSE);
+      else_inst = emit(ir->condition, TGSI_OPCODE_ELSE);
       visit_exec_list(&ir->else_instructions, this);
    }
 
-   if_inst = emit(ir->condition, OPCODE_ENDIF);
+   if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
 }
 
 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
@@ -2337,7 +2238,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
    foreach_iter(exec_list_iterator, iter, v->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
 
-      if (_mesa_is_tex_instruction(inst->op)) {
+      if (is_tex_instruction(inst->op)) {
          v->samplers_used |= 1 << inst->sampler;
 
          prog->SamplerTargets[inst->sampler] =
@@ -2648,7 +2549,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
    /* look for instructions which read from varying vars */
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
-      const GLuint numSrc = _mesa_num_inst_src_regs(inst->op);
+      const GLuint numSrc = num_inst_src_regs(inst->op);
       GLuint j;
       for (j = 0; j < numSrc; j++) {
          if (inst->src[j].file == type) {
@@ -2687,7 +2588,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
          st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]);
          st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW);
          dst.index = i;
-         this->emit(NULL, OPCODE_MOV, dst, src);
+         this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
       }
    }
 }
@@ -2700,7 +2601,7 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
       unsigned j;
       
-      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
          if (inst->src[j].file == PROGRAM_TEMPORARY && 
              inst->src[j].index == index) {
             inst->src[j].index = new_index;
@@ -2723,17 +2624,17 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
       
-      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
          if (inst->src[j].file == PROGRAM_TEMPORARY && 
              inst->src[j].index == index) {
             return (depth == 0) ? i : loop_start;
          }
       }
       
-      if (inst->op == OPCODE_BGNLOOP) {
+      if (inst->op == TGSI_OPCODE_BGNLOOP) {
          if(depth++ == 0)
             loop_start = i;
-      } else if (inst->op == OPCODE_ENDLOOP) {
+      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
          if (--depth == 0)
             loop_start = -1;
       }
@@ -2759,10 +2660,10 @@ glsl_to_tgsi_visitor::get_first_temp_write(int index)
          return (depth == 0) ? i : loop_start;
       }
       
-      if (inst->op == OPCODE_BGNLOOP) {
+      if (inst->op == TGSI_OPCODE_BGNLOOP) {
          if(depth++ == 0)
             loop_start = i;
-      } else if (inst->op == OPCODE_ENDLOOP) {
+      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
          if (--depth == 0)
             loop_start = -1;
       }
@@ -2784,16 +2685,16 @@ glsl_to_tgsi_visitor::get_last_temp_read(int index)
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
       
-      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
          if (inst->src[j].file == PROGRAM_TEMPORARY && 
              inst->src[j].index == index) {
             last = (depth == 0) ? i : -2;
          }
       }
       
-      if (inst->op == OPCODE_BGNLOOP)
+      if (inst->op == TGSI_OPCODE_BGNLOOP)
          depth++;
-      else if (inst->op == OPCODE_ENDLOOP)
+      else if (inst->op == TGSI_OPCODE_ENDLOOP)
          if (--depth == 0 && last == -2)
             last = i;
       assert(depth >= 0);
@@ -2818,9 +2719,9 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index)
       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
          last = (depth == 0) ? i : -2;
       
-      if (inst->op == OPCODE_BGNLOOP)
+      if (inst->op == TGSI_OPCODE_BGNLOOP)
          depth++;
-      else if (inst->op == OPCODE_ENDLOOP)
+      else if (inst->op == TGSI_OPCODE_ENDLOOP)
          if (--depth == 0 && last == -2)
             last = i;
       assert(depth >= 0);
@@ -2922,18 +2823,18 @@ glsl_to_tgsi_visitor::copy_propagate(void)
       }
 
       switch (inst->op) {
-      case OPCODE_BGNLOOP:
-      case OPCODE_ENDLOOP:
+      case TGSI_OPCODE_BGNLOOP:
+      case TGSI_OPCODE_ENDLOOP:
          /* End of a basic block, clear the ACP entirely. */
          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
          break;
 
-      case OPCODE_IF:
+      case TGSI_OPCODE_IF:
          ++level;
          break;
 
-      case OPCODE_ENDIF:
-      case OPCODE_ELSE:
+      case TGSI_OPCODE_ENDIF:
+      case TGSI_OPCODE_ELSE:
          /* Clear all channels written inside the block from the ACP, but
           * leaving those that were not touched.
           */
@@ -2946,7 +2847,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
         	  acp[4 * r + c] = NULL;
             }
          }
-         if (inst->op == OPCODE_ENDIF)
+         if (inst->op == TGSI_OPCODE_ENDIF)
             --level;
          break;
 
@@ -3005,7 +2906,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
       }
 
       /* If this is a copy, add it to the ACP. */
-      if (inst->op == OPCODE_MOV &&
+      if (inst->op == TGSI_OPCODE_MOV &&
           inst->dst.file == PROGRAM_TEMPORARY &&
           !inst->dst.reladdr &&
           !inst->saturate &&
@@ -3337,11 +3238,11 @@ src_register( struct st_translate *t,
 }
 
 /**
- * Create a TGSI ureg_dst register from a Mesa dest register.
+ * Create a TGSI ureg_dst register from an st_dst_reg.
  */
 static struct ureg_dst
 translate_dst( struct st_translate *t,
-               const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg,
+               const st_dst_reg *dst_reg,
                boolean saturate )
 {
    struct ureg_dst dst = dst_register( t, 
@@ -3361,7 +3262,7 @@ translate_dst( struct st_translate *t,
 }
 
 /**
- * Create a TGSI ureg_src register from a Mesa src register.
+ * Create a TGSI ureg_src register from an st_src_reg.
  */
 static struct ureg_src
 translate_src( struct st_translate *t,
@@ -3378,12 +3279,6 @@ translate_src( struct st_translate *t,
    if ((src_reg->negate & 0xf) == NEGATE_XYZW)
       src = ureg_negate(src);
 
-#if 0
-   // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR
-   if (src_reg->abs) 
-      src = ureg_abs(src);
-#endif
-
    if (src_reg->reladdr != NULL) {
       /* Normally ureg_src_indirect() would be used here, but a stupid compiler 
        * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 
@@ -3421,77 +3316,64 @@ compile_tgsi_instruction(struct st_translate *t,
    unsigned num_dst;
    unsigned num_src;
 
-   num_dst = _mesa_num_inst_dst_regs( inst->op );
-   num_src = _mesa_num_inst_src_regs( inst->op );
+   num_dst = num_inst_dst_regs( inst->op );
+   num_src = num_inst_src_regs( inst->op );
 
    if (num_dst) 
       dst[0] = translate_dst( t, 
                               &inst->dst,
-                              inst->saturate); // inst->SaturateMode
+                              inst->saturate);
 
    for (i = 0; i < num_src; i++) 
       src[i] = translate_src( t, &inst->src[i] );
 
    switch( inst->op ) {
-   case OPCODE_SWZ:
-      // TODO: copy emit_swz function from st_mesa_to_tgsi.c
-      //emit_swz( t, dst[0], &inst->src[0] );
-      assert(!"OPCODE_SWZ");
-      return;
-
-   case OPCODE_BGNLOOP:
-   case OPCODE_CAL:
-   case OPCODE_ELSE:
-   case OPCODE_ENDLOOP:
-   case OPCODE_IF:
+   case TGSI_OPCODE_BGNLOOP:
+   case TGSI_OPCODE_CAL:
+   case TGSI_OPCODE_ELSE:
+   case TGSI_OPCODE_ENDLOOP:
+   case TGSI_OPCODE_IF:
       debug_assert(num_dst == 0);
       ureg_label_insn( ureg,
-                       translate_opcode( inst->op ),
+                       inst->op,
                        src, num_src,
                        get_label( t, 
-                                  inst->op == OPCODE_CAL ? inst->function->sig_id : 0 ));
+                                  inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 ));
       return;
 
-   case OPCODE_TEX:
-   case OPCODE_TXB:
-   case OPCODE_TXD:
-   case OPCODE_TXL:
-   case OPCODE_TXP:
+   case TGSI_OPCODE_TEX:
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXD:
+   case TGSI_OPCODE_TXL:
+   case TGSI_OPCODE_TXP:
       src[num_src++] = t->samplers[inst->sampler];
       ureg_tex_insn( ureg,
-                     translate_opcode( inst->op ),
+                     inst->op,
                      dst, num_dst, 
                      translate_texture_target( inst->tex_target,
                                                inst->tex_shadow ),
                      src, num_src );
       return;
 
-   case OPCODE_SCS:
+   case TGSI_OPCODE_SCS:
       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
       ureg_insn( ureg, 
-                 translate_opcode( inst->op ), 
+                 inst->op, 
                  dst, num_dst, 
                  src, num_src );
       break;
 
-   case OPCODE_XPD:
+   case TGSI_OPCODE_XPD:
       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
       ureg_insn( ureg, 
-                 translate_opcode( inst->op ), 
+                 inst->op, 
                  dst, num_dst, 
                  src, num_src );
       break;
 
-   case OPCODE_NOISE1:
-   case OPCODE_NOISE2:
-   case OPCODE_NOISE3:
-   case OPCODE_NOISE4:
-      assert(!"OPCODE_NOISE should have been lowered\n");
-      break;
-
    default:
       ureg_insn( ureg, 
-                 translate_opcode( inst->op ), 
+                 inst->op, 
                  dst, num_dst, 
                  src, num_src );
       break;
@@ -3993,9 +3875,8 @@ get_mesa_program(struct gl_context *ctx,
 
    add_uniforms_to_parameters_list(shader_program, shader, prog);
 
-   /* Emit Mesa IR for main(). */
+   /* Emit intermediate IR for main(). */
    visit_exec_list(shader->ir, v);
-   v->emit(NULL, OPCODE_END);
 
    /* Now emit bodies for any functions that were used. */
    do {
@@ -4007,18 +3888,18 @@ get_mesa_program(struct gl_context *ctx,
          if (!entry->bgn_inst) {
             v->current_function = entry;
 
-            entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB);
+            entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
             entry->bgn_inst->function = entry;
 
             visit_exec_list(&entry->sig->body, v);
 
             glsl_to_tgsi_instruction *last;
             last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
-            if (last->op != OPCODE_RET)
-               v->emit(NULL, OPCODE_RET);
+            if (last->op != TGSI_OPCODE_RET)
+               v->emit(NULL, TGSI_OPCODE_RET);
 
             glsl_to_tgsi_instruction *end;
-            end = v->emit(NULL, OPCODE_ENDSUB);
+            end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
             end->function = entry;
 
             progress = GL_TRUE;
@@ -4050,6 +3931,9 @@ get_mesa_program(struct gl_context *ctx,
    v->eliminate_dead_code();
    v->merge_registers();
    v->renumber_registers();
+   
+   /* Write the END instruction. */
+   v->emit(NULL, TGSI_OPCODE_END);
 
    if (ctx->Shader.Flags & GLSL_DUMP) {
       printf("\n");
@@ -4127,8 +4011,8 @@ st_new_shader_program(struct gl_context *ctx, GLuint name)
 /**
  * Link a shader.
  * Called via ctx->Driver.LinkShader()
- * This actually involves converting GLSL IR into Mesa gl_programs with
- * code lowering and other optimizations.
+ * This actually involves converting GLSL IR into an intermediate TGSI-like IR 
+ * with code lowering and other optimizations.
  */
 GLboolean
 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
-- 
cgit v1.2.3


From 16d7a717d592524e4d62fec4173cb9523f7a1453 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 2 May 2011 23:12:18 -0500
Subject: glsl_to_tgsi: fix shaders with indirect addressing of temps

Fixes several Piglit tests, although it's a step backwards for optimization.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 4cb2f377e98..75ab9c5de7c 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -485,7 +485,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
    else {
       for (i=0; i<3; i++) {
          if(inst->src[i].reladdr) {
-            switch(dst.file) {
+            switch(inst->src[i].file) {
             case PROGRAM_TEMPORARY:
                this->indirect_addr_temps = true;
                break;
@@ -3928,9 +3928,17 @@ get_mesa_program(struct gl_context *ctx,
 
    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
    v->copy_propagate();
-   v->eliminate_dead_code();
-   v->merge_registers();
-   v->renumber_registers();
+   
+   /* FIXME: These passes to optimize temporary registers don't work when there
+    * is indirect addressing of the temporary register space.  We need proper 
+    * array support so that we don't have to give up these passes in every 
+    * shader that uses arrays.
+    */
+   if (!v->indirect_addr_temps) {
+      v->merge_registers();
+      v->eliminate_dead_code();
+      v->renumber_registers();
+   }
    
    /* Write the END instruction. */
    v->emit(NULL, TGSI_OPCODE_END);
-- 
cgit v1.2.3


From 6d89abadbcd68bbe9e08f041412549f8dc1fc73c Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Tue, 17 May 2011 17:13:20 -0500
Subject: mesa: support boolean and integer-based parameters in prog_parameter

The functionality is not used by anything yet, and the glUniform functions will
need to be reworked before this can reach its full usefulness.  It is
nonetheless a step towards integer support in the state tracker and classic drivers.
---
 src/mesa/main/ff_fragment_shader.cpp       |  3 +-
 src/mesa/main/ffvertex_prog.c              | 10 +++---
 src/mesa/main/uniforms.c                   | 12 +++----
 src/mesa/program/ir_to_mesa.cpp            |  8 ++---
 src/mesa/program/nvfragparse.c             | 23 +++++++++-----
 src/mesa/program/prog_execute.c            |  2 +-
 src/mesa/program/prog_parameter.c          | 50 ++++++++++++++++--------------
 src/mesa/program/prog_parameter.h          | 25 ++++++++++-----
 src/mesa/program/prog_parameter_layout.c   |  2 +-
 src/mesa/program/prog_print.c              |  2 +-
 src/mesa/program/program.c                 |  3 +-
 src/mesa/program/sampler.cpp               |  2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 ++++---
 13 files changed, 88 insertions(+), 66 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp
index 0b53c28f7ae..2ccbaf8f8c3 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -875,7 +875,8 @@ static struct ureg register_const4f( struct texenv_fragment_program *p,
    values[1] = s1;
    values[2] = s2;
    values[3] = s3;
-   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
+   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters,
+                                     (gl_constant_value *) values, 4,
                                      &swizzle );
    r = make_ureg(PROGRAM_CONSTANT, idx);
    r.swz = swizzle;
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index b8e49a3757f..2d2485c9e06 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -455,13 +455,13 @@ static struct ureg register_const4f( struct tnl_program *p,
 			      GLfloat s2,
 			      GLfloat s3)
 {
-   GLfloat values[4];
+   gl_constant_value values[4];
    GLint idx;
    GLuint swizzle;
-   values[0] = s0;
-   values[1] = s1;
-   values[2] = s2;
-   values[3] = s3;
+   values[0].f = s0;
+   values[1].f = s1;
+   values[2].f = s2;
+   values[3].f = s3;
    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
                                      &swizzle );
    ASSERT(swizzle == SWIZZLE_NOOP);
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 1c4fd82baac..07d46c6404f 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -429,7 +429,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = prog->Parameters->ParameterValues[base][j];
+                  params[k++] = prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -442,7 +442,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
                   params[k++] = (GLdouble)
-                     prog->Parameters->ParameterValues[base][j];
+                     prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -455,7 +455,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
                   params[k++] = (GLint)
-                     prog->Parameters->ParameterValues[base][j];
+                     prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -468,7 +468,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
                   params[k++] = (GLuint)
-                     prog->Parameters->ParameterValues[base][j];
+                     prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -670,7 +670,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
       /* loop over number of samplers to change */
       for (i = 0; i < count; i++) {
          GLuint sampler = (GLuint)
-            program->Parameters->ParameterValues[index + offset + i][0];
+            program->Parameters->ParameterValues[index+offset + i][0].f;
          GLuint texUnit = ((GLuint *) values)[i];
 
          /* check that the sampler (tex unit index) is legal */
@@ -936,7 +936,7 @@ set_program_uniform_matrix(struct gl_context *ctx, struct gl_program *program,
             /* Ignore writes beyond the end of (the used part of) an array */
             return;
          }
-         v = program->Parameters->ParameterValues[index + offset];
+         v = (GLfloat *) program->Parameters->ParameterValues[index + offset];
          for (row = 0; row < rows; row++) {
             if (transpose) {
                v[row] = values[src + row * cols + col];
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 00869979dd8..f27492749bd 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -599,7 +599,7 @@ ir_to_mesa_visitor::src_reg_for_float(float val)
    src_reg src(PROGRAM_CONSTANT, -1, NULL);
 
    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-					  &val, 1, &src.swizzle);
+					  (const gl_constant_value *)&val, 1, &src.swizzle);
 
    return src;
 }
@@ -1798,7 +1798,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir)
 
 	 src = src_reg(PROGRAM_CONSTANT, -1, NULL);
 	 src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-						values,
+						(gl_constant_value *) values,
 						ir->type->vector_elements,
 						&src.swizzle);
 	 emit(ir, OPCODE_MOV, mat_column, src);
@@ -1836,7 +1836,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir)
 
    this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-						   values,
+						   (gl_constant_value *) values,
 						   ir->type->vector_elements,
 						   &this->result.swizzle);
 }
@@ -2533,7 +2533,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
 	  */
 	 if (file == PROGRAM_SAMPLER) {
 	    for (unsigned int j = 0; j < size / 4; j++)
-	       prog->Parameters->ParameterValues[index + j][0] = next_sampler++;
+	       prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
 	 }
 
 	 /* The location chosen in the Parameters list here (returned
diff --git a/src/mesa/program/nvfragparse.c b/src/mesa/program/nvfragparse.c
index 8516b5fc1ff..ce72c610d89 100644
--- a/src/mesa/program/nvfragparse.c
+++ b/src/mesa/program/nvfragparse.c
@@ -472,8 +472,9 @@ Parse_ScalarConstant(struct parse_state *parseState, GLfloat *number)
       const GLfloat *constant;
       if (!Parse_Identifier(parseState, ident))
          RETURN_ERROR1("Expected an identifier");
-      constant = _mesa_lookup_parameter_value(parseState->parameters,
-                                              -1, (const char *) ident);
+      constant = (GLfloat *)_mesa_lookup_parameter_value(parseState->parameters,
+                                                         -1, 
+                                                         (const char *) ident);
       /* XXX Check that it's a constant and not a parameter */
       if (!constant) {
          RETURN_ERROR1("Undefined symbol");
@@ -1039,7 +1040,8 @@ Parse_VectorSrc(struct parse_state *parseState,
       if (!Parse_ScalarConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->File = PROGRAM_NAMED_PARAM;
       srcReg->Index = paramIndex;
    }
@@ -1051,7 +1053,8 @@ Parse_VectorSrc(struct parse_state *parseState,
       if (!Parse_VectorConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->File = PROGRAM_NAMED_PARAM;
       srcReg->Index = paramIndex;      
    }
@@ -1145,7 +1148,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState,
       if (!Parse_VectorConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->File = PROGRAM_NAMED_PARAM;
       srcReg->Index = paramIndex;      
    }
@@ -1170,7 +1174,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState,
       if (!Parse_ScalarConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->Index = paramIndex;      
       srcReg->File = PROGRAM_NAMED_PARAM;
       needSuffix = GL_FALSE;
@@ -1296,7 +1301,8 @@ Parse_InstructionSequence(struct parse_state *parseState,
             RETURN_ERROR2(id, "already defined");
          }
          _mesa_add_named_parameter(parseState->parameters,
-                                   (const char *) id, value);
+                                   (const char *) id,
+                                   (gl_constant_value *) value);
       }
       else if (Parse_String(parseState, "DECLARE")) {
          GLubyte id[100];
@@ -1315,7 +1321,8 @@ Parse_InstructionSequence(struct parse_state *parseState,
             RETURN_ERROR2(id, "already declared");
          }
          _mesa_add_named_parameter(parseState->parameters,
-                                   (const char *) id, value);
+                                   (const char *) id,
+                                   (gl_constant_value *) value);
       }
       else if (Parse_String(parseState, "END")) {
          inst->Opcode = OPCODE_END;
diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index e7553c69dbe..dbfd1b91875 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -157,7 +157,7 @@ get_src_register_pointer(const struct prog_src_register *source,
    case PROGRAM_NAMED_PARAM:
       if (reg >= (GLint) prog->Parameters->NumParameters)
          return ZeroVec;
-      return prog->Parameters->ParameterValues[reg];
+      return (GLfloat *) prog->Parameters->ParameterValues[reg];
 
    case PROGRAM_SYSTEM_VALUE:
       assert(reg < Elements(machine->SystemValues));
diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c
index 3570cab118b..b1cdf8bf2c0 100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@ -56,8 +56,8 @@ _mesa_new_parameter_list_sized(unsigned size)
       p->Parameters = (struct gl_program_parameter *)
 	 calloc(1, size * sizeof(struct gl_program_parameter));
 
-      p->ParameterValues = (GLfloat (*)[4])
-         _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16);
+      p->ParameterValues = (gl_constant_value (*)[4])
+         _mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16);
 
 
       if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) {
@@ -101,14 +101,15 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList)
  * \param name  the parameter name, will be duplicated/copied!
  * \param size  number of elements in 'values' vector (1..4, or more)
  * \param datatype  GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE.
- * \param values  initial parameter value, up to 4 GLfloats, or NULL
+ * \param values  initial parameter value, up to 4 gl_constant_values, or NULL
  * \param state  state indexes, or NULL
  * \return  index of new parameter in the list, or -1 if error (out of mem)
  */
 GLint
 _mesa_add_parameter(struct gl_program_parameter_list *paramList,
                     gl_register_file type, const char *name,
-                    GLuint size, GLenum datatype, const GLfloat *values,
+                    GLuint size, GLenum datatype,
+                    const gl_constant_value *values,
                     const gl_state_index state[STATE_LENGTH],
                     GLbitfield flags)
 {
@@ -127,10 +128,10 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
 		       oldNum * sizeof(struct gl_program_parameter),
 		       paramList->Size * sizeof(struct gl_program_parameter));
 
-      paramList->ParameterValues = (GLfloat (*)[4])
+      paramList->ParameterValues = (gl_constant_value (*)[4])
          _mesa_align_realloc(paramList->ParameterValues,         /* old buf */
-                             oldNum * 4 * sizeof(GLfloat),      /* old size */
-                             paramList->Size * 4 *sizeof(GLfloat), /* new sz */
+                             oldNum * 4 * sizeof(gl_constant_value),/* old sz */
+                             paramList->Size*4*sizeof(gl_constant_value),/*new*/
                              16);
    }
 
@@ -142,7 +143,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
       return -1;
    }
    else {
-      GLuint i;
+      GLuint i, j;
 
       paramList->NumParameters = oldNum + sz4;
 
@@ -163,7 +164,8 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
          }
          else {
             /* silence valgrind */
-            ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0);
+            for (j = 0; j < 4; j++)
+            	paramList->ParameterValues[oldNum + i][j].f = 0;
          }
          size -= 4;
       }
@@ -184,7 +186,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
  */
 GLint
 _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
-                          const char *name, const GLfloat values[4])
+                          const char *name, const gl_constant_value values[4])
 {
    return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name,
                               4, GL_NONE, values, NULL, 0x0);
@@ -204,17 +206,17 @@ _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
  */
 GLint
 _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
-                         const char *name, const GLfloat values[4],
+                         const char *name, const gl_constant_value values[4],
                          GLuint size)
 {
    /* first check if this is a duplicate constant */
    GLint pos;
    for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) {
-      const GLfloat *pvals = paramList->ParameterValues[pos];
-      if (pvals[0] == values[0] &&
-          pvals[1] == values[1] &&
-          pvals[2] == values[2] &&
-          pvals[3] == values[3] &&
+      const gl_constant_value *pvals = paramList->ParameterValues[pos];
+      if (pvals[0].u == values[0].u &&
+          pvals[1].u == values[1].u &&
+          pvals[2].u == values[2].u &&
+          pvals[3].u == values[3].u &&
           strcmp(paramList->Parameters[pos].Name, name) == 0) {
          /* Same name and value is already in the param list - reuse it */
          return pos;
@@ -240,7 +242,7 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
  */
 GLint
 _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
-                           const GLfloat values[4], GLuint size,
+                           const gl_constant_value values[4], GLuint size,
                            GLuint *swizzleOut)
 {
    GLint pos;
@@ -262,7 +264,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
          struct gl_program_parameter *p = paramList->Parameters + pos;
          if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) {
             /* ok, found room */
-            GLfloat *pVal = paramList->ParameterValues[pos];
+            gl_constant_value *pVal = paramList->ParameterValues[pos];
             GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */
             pVal[p->Size] = values[0];
             p->Size++;
@@ -401,7 +403,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
  * Lookup a parameter value by name in the given parameter list.
  * \return pointer to the float[4] values.
  */
-GLfloat *
+gl_constant_value *
 _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
                              GLsizei nameLen, const char *name)
 {
@@ -465,7 +467,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
  */
 GLboolean
 _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
-                                const GLfloat v[], GLuint vSize,
+                                const gl_constant_value v[], GLuint vSize,
                                 GLint *posOut, GLuint *swizzleOut)
 {
    GLuint i;
@@ -484,7 +486,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
             /* swizzle not allowed */
             GLuint j, match = 0;
             for (j = 0; j < vSize; j++) {
-               if (v[j] == list->ParameterValues[i][j])
+               if (v[j].u == list->ParameterValues[i][j].u)
                   match++;
             }
             if (match == vSize) {
@@ -498,7 +500,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
                 /* look for v[0] anywhere within float[4] value */
                 GLuint j;
                 for (j = 0; j < list->Parameters[i].Size; j++) {
-                   if (list->ParameterValues[i][j] == v[0]) {
+                   if (list->ParameterValues[i][j].u == v[0].u) {
                       /* found it */
                       *posOut = i;
                       *swizzleOut = MAKE_SWIZZLE4(j, j, j, j);
@@ -511,13 +513,13 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
                 GLuint swz[4];
                 GLuint match = 0, j, k;
                 for (j = 0; j < vSize; j++) {
-                   if (v[j] == list->ParameterValues[i][j]) {
+                   if (v[j].u == list->ParameterValues[i][j].u) {
                       swz[j] = j;
                       match++;
                    }
                    else {
                       for (k = 0; k < list->Parameters[i].Size; k++) {
-                         if (v[j] == list->ParameterValues[i][k]) {
+                         if (v[j].u == list->ParameterValues[i][k].u) {
                             swz[j] = k;
                             match++;
                             break;
diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index 10cbbe57a6c..dcc171ed745 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -46,7 +46,15 @@
 #define PROG_PARAM_BIT_CYL_WRAP  0x10  /**< XXX gallium debug */
 /*@}*/
 
-
+/**
+ * Actual data for constant values of parameters.
+ */
+typedef union gl_constant_value {
+	GLfloat f;
+	GLboolean b;
+	GLint i;
+	GLuint u;
+} gl_constant_value;
 
 /**
  * Program parameter.
@@ -81,7 +89,7 @@ struct gl_program_parameter_list
    GLuint Size;           /**< allocated size of Parameters, ParameterValues */
    GLuint NumParameters;  /**< number of parameters in arrays */
    struct gl_program_parameter *Parameters; /**< Array [Size] */
-   GLfloat (*ParameterValues)[4];        /**< Array [Size] of GLfloat[4] */
+   gl_constant_value (*ParameterValues)[4]; /**< Array [Size] of constant[4] */
    GLbitfield StateFlags; /**< _NEW_* flags indicating which state changes
                                might invalidate ParameterValues[] */
 };
@@ -112,22 +120,23 @@ _mesa_num_parameters(const struct gl_program_parameter_list *list)
 extern GLint
 _mesa_add_parameter(struct gl_program_parameter_list *paramList,
                     gl_register_file type, const char *name,
-                    GLuint size, GLenum datatype, const GLfloat *values,
+                    GLuint size, GLenum datatype,
+                    const gl_constant_value *values,
                     const gl_state_index state[STATE_LENGTH],
                     GLbitfield flags);
 
 extern GLint
 _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
-                          const char *name, const GLfloat values[4]);
+                          const char *name, const gl_constant_value values[4]);
 
 extern GLint
 _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
-                         const char *name, const GLfloat values[4],
+                         const char *name, const gl_constant_value values[4],
                          GLuint size);
 
 extern GLint
 _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
-                           const GLfloat values[4], GLuint size,
+                           const gl_constant_value values[4], GLuint size,
                            GLuint *swizzleOut);
 
 extern GLint
@@ -143,7 +152,7 @@ extern GLint
 _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
                           const gl_state_index stateTokens[STATE_LENGTH]);
 
-extern GLfloat *
+extern gl_constant_value *
 _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
                              GLsizei nameLen, const char *name);
 
@@ -153,7 +162,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
 
 extern GLboolean
 _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
-                                const GLfloat v[], GLuint vSize,
+                                const gl_constant_value v[], GLuint vSize,
                                 GLint *posOut, GLuint *swizzleOut);
 
 extern GLuint
diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c
index 90a9771080c..28fca3b92d9 100644
--- a/src/mesa/program/prog_parameter_layout.c
+++ b/src/mesa/program/prog_parameter_layout.c
@@ -182,7 +182,7 @@ _mesa_layout_parameters(struct asm_parser_state *state)
 
 	 switch (p->Type) {
 	 case PROGRAM_CONSTANT: {
-	    const float *const v =
+	    const gl_constant_value *const v =
 	       state->prog->Parameters->ParameterValues[idx];
 
 	    inst->Base.SrcReg[i].Index =
diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c
index 7c3b4909e73..70412b1fa6a 100644
--- a/src/mesa/program/prog_print.c
+++ b/src/mesa/program/prog_print.c
@@ -985,7 +985,7 @@ _mesa_fprint_parameter_list(FILE *f,
    fprintf(f, "dirty state flags: 0x%x\n", list->StateFlags);
    for (i = 0; i < list->NumParameters; i++){
       struct gl_program_parameter *param = list->Parameters + i;
-      const GLfloat *v = list->ParameterValues[i];
+      const GLfloat *v = (GLfloat *) list->ParameterValues[i];
       fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}",
 	      i, param->Size,
 	      _mesa_register_file_name(list->Parameters[i].Type),
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 224446a2683..4f2b6270501 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -1030,7 +1030,8 @@ _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog)
    GLuint i;
    GLuint whiteSwizzle;
    GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters,
-                                                 white, 4, &whiteSwizzle);
+                                                 (gl_constant_value *) white,
+                                                 4, &whiteSwizzle);
 
    (void) whiteIndex;
 
diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
index 1457d1199fa..e8d34c670a9 100644
--- a/src/mesa/program/sampler.cpp
+++ b/src/mesa/program/sampler.cpp
@@ -132,6 +132,6 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler,
 
    index += getname.offset;
 
-   return prog->Parameters->ParameterValues[index][0];
+   return prog->Parameters->ParameterValues[index][0].f;
 }
 }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 75ab9c5de7c..881b9e05de1 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -706,9 +706,11 @@ struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 {
    st_src_reg src(PROGRAM_CONSTANT, -1, NULL);
+   union gl_constant_value uval;
 
+   uval.f = val;
    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        				  &val, 1, &src.swizzle);
+        				  &uval, 1, &src.swizzle);
 
    return src;
 }
@@ -1791,7 +1793,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
 
          src = st_src_reg(PROGRAM_CONSTANT, -1, NULL);
          src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        					values,
+        					(gl_constant_value *) values,
         					ir->type->vector_elements,
         					&src.swizzle);
          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
@@ -1829,7 +1831,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
 
    this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        					   values,
+        					   (gl_constant_value *) values,
         					   ir->type->vector_elements,
         					   &this->result.swizzle);
 }
@@ -2401,7 +2403,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
           */
          if (file == PROGRAM_SAMPLER) {
             for (unsigned int j = 0; j < size / 4; j++)
-               prog->Parameters->ParameterValues[index + j][0] = next_sampler++;
+               prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
          }
 
          /* The location chosen in the Parameters list here (returned
@@ -3762,7 +3764,7 @@ st_translate_program(
             else
                t->constants[i] = 
                   ureg_DECL_immediate( ureg,
-                                       proginfo->Parameters->ParameterValues[i],
+                                       (GLfloat *) proginfo->Parameters->ParameterValues[i],
                                        4 );
             break;
          default:
-- 
cgit v1.2.3


From b191382c60bdcfeb7f424b23aa6ab63de81e2f08 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 13 Jun 2011 18:12:56 -0500
Subject: mesa, glsl_to_tgsi: add native support for integers in shaders

Disabled by default on all drivers.  To enable it, change ctx->GLSLVersion to 130
in st_extensions.c.  Currently, softpipe is the only driver with integer support.
---
 src/glsl/glsl_types.h                      |  15 ++
 src/mesa/main/uniforms.c                   |  38 ++--
 src/mesa/program/prog_parameter.c          |  28 ++-
 src/mesa/program/prog_parameter.h          |   5 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 302 ++++++++++++++++++++++++-----
 5 files changed, 328 insertions(+), 60 deletions(-)

(limited to 'src/mesa')

diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index 1b069df74fe..eb9d501858a 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -165,6 +165,21 @@ struct glsl_type {
    static const glsl_type *const mat4x3_type;
    static const glsl_type *const mat4_type;
    /*@}*/
+   
+   /**
+    * Get the built-in instance of the vec4 type for a specific base type
+    */
+   static const glsl_type *get_vec4_type(glsl_base_type base_type)
+   {
+      if (base_type == GLSL_TYPE_FLOAT)
+         return vec4_type;
+      else if (base_type == GLSL_TYPE_INT)
+         return ivec4_type;
+      else if (base_type == GLSL_TYPE_UINT)
+         return uvec4_type;
+      else
+         return NULL;
+   }
 
 
    /**
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 07d46c6404f..ce4863faf78 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -454,8 +454,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = (GLint)
-                     prog->Parameters->ParameterValues[base][j].f;
+                  params[k++] = ctx->Const.GLSLVersion <= 120 ? 
+                     (GLint) prog->Parameters->ParameterValues[base][j].f : 
+                     prog->Parameters->ParameterValues[base][j].i;
                }
             }
          }
@@ -467,8 +468,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = (GLuint)
-                     prog->Parameters->ParameterValues[base][j].f;
+                  params[k++] = ctx->Const.GLSLVersion <= 120 ? 
+                     (GLuint) prog->Parameters->ParameterValues[base][j].f : 
+                     prog->Parameters->ParameterValues[base][j].u;
                }
             }
          }
@@ -735,42 +737,52 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
 
       /* loop over number of array elements */
       for (k = 0; k < count; k++) {
-         GLfloat *uniformVal;
+         gl_constant_value *uniformVal;
 
          if (offset + k >= slots) {
             /* Extra array data is ignored */
             break;
          }
 
-         /* uniformVal (the destination) is always float[4] */
+         /* uniformVal (the destination) is always gl_constant_value[4] */
          uniformVal = program->Parameters->ParameterValues[index + offset + k];
 
          if (basicType == GL_INT) {
-            /* convert user's ints to floats */
             const GLint *iValues = ((const GLint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = (GLfloat) iValues[i];
+               if (ctx->Const.GLSLVersion <= 120)
+                  uniformVal[i].f = (GLfloat) iValues[i];
+               else
+                  uniformVal[i].i = iValues[i];
             }
          }
          else if (basicType == GL_UNSIGNED_INT) {
-            /* convert user's uints to floats */
             const GLuint *iValues = ((const GLuint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = (GLfloat) iValues[i];
+               if (ctx->Const.GLSLVersion <= 120)
+                  uniformVal[i].f = (GLfloat)(GLuint) iValues[i];
+               else
+                  uniformVal[i].u = iValues[i];
             }
          }
          else {
             const GLfloat *fValues = ((const GLfloat *) values) + k * elems;
             assert(basicType == GL_FLOAT);
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = fValues[i];
+               uniformVal[i].f = fValues[i];
             }
          }
 
-         /* if the uniform is bool-valued, convert to 1.0 or 0.0 */
+         /* if the uniform is bool-valued, convert to 1 or 0 */
          if (isUniformBool) {
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f;
+               if (basicType == GL_FLOAT)
+                  uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0;
+               else
+                  uniformVal[i].b = uniformVal[i].u ? 1 : 0;
+               
+               if (ctx->Const.GLSLVersion <= 120)
+                  uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
             }
          }
       }
diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c
index b1cdf8bf2c0..49b3ffbdd5c 100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@ -241,9 +241,9 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
  * \return index/position of the new parameter in the parameter list.
  */
 GLint
-_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
                            const gl_constant_value values[4], GLuint size,
-                           GLuint *swizzleOut)
+                           GLenum datatype, GLuint *swizzleOut)
 {
    GLint pos;
    ASSERT(size >= 1);
@@ -276,7 +276,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
 
    /* add a new parameter to store this constant */
    pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL,
-                             size, GL_NONE, values, NULL, 0x0);
+                             size, datatype, values, NULL, 0x0);
    if (pos >= 0 && swizzleOut) {
       if (size == 1)
          *swizzleOut = SWIZZLE_XXXX;
@@ -286,6 +286,28 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
    return pos;
 }
 
+/**
+ * Add a new unnamed constant to the parameter list.  This will be used
+ * when a fragment/vertex program contains something like this:
+ *    MOV r, { 0, 1, 2, 3 };
+ * If swizzleOut is non-null we'll search the parameter list for an
+ * existing instance of the constant which matches with a swizzle.
+ *
+ * \param paramList  the parameter list
+ * \param values  four float values
+ * \param swizzleOut  returns swizzle mask for accessing the constant
+ * \return index/position of the new parameter in the parameter list.
+ * \sa _mesa_add_typed_unnamed_constant
+ */
+GLint
+_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
+                           const gl_constant_value values[4], GLuint size,
+                           GLuint *swizzleOut)
+{
+   return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE,
+                                           swizzleOut);
+}
+
 /**
  * Add parameter representing a varying variable.
  */
diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index dcc171ed745..f858cf0fa0d 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -134,6 +134,11 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
                          const char *name, const gl_constant_value values[4],
                          GLuint size);
 
+extern GLint
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
+                           const gl_constant_value values[4], GLuint size,
+                           GLenum datatype, GLuint *swizzleOut);
+
 extern GLint
 _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
                            const gl_constant_value values[4], GLuint size,
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 881b9e05de1..3f5c0c60226 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -96,11 +96,13 @@ public:
       else
          this->swizzle = SWIZZLE_XYZW;
       this->negate = 0;
+      this->type = type ? type->base_type : GLSL_TYPE_ERROR;
       this->reladdr = NULL;
    }
 
-   st_src_reg(gl_register_file file, int index)
+   st_src_reg(gl_register_file file, int index, int type)
    {
+      this->type = type;
       this->file = file;
       this->index = index;
       this->swizzle = SWIZZLE_XYZW;
@@ -110,6 +112,7 @@ public:
 
    st_src_reg()
    {
+      this->type = GLSL_TYPE_ERROR;
       this->file = PROGRAM_UNDEFINED;
       this->index = 0;
       this->swizzle = 0;
@@ -123,23 +126,26 @@ public:
    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
    int negate; /**< NEGATE_XYZW mask from mesa */
+   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
    /** Register index should be offset by the integer in this reg. */
    st_src_reg *reladdr;
 };
 
 class st_dst_reg {
 public:
-   st_dst_reg(gl_register_file file, int writemask)
+   st_dst_reg(gl_register_file file, int writemask, int type)
    {
       this->file = file;
       this->index = 0;
       this->writemask = writemask;
       this->cond_mask = COND_TR;
       this->reladdr = NULL;
+      this->type = type;
    }
 
    st_dst_reg()
    {
+      this->type = GLSL_TYPE_ERROR;
       this->file = PROGRAM_UNDEFINED;
       this->index = 0;
       this->writemask = 0;
@@ -153,12 +159,14 @@ public:
    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
    GLuint cond_mask:4;
+   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
    /** Register index should be offset by the integer in this reg. */
    st_src_reg *reladdr;
 };
 
 st_src_reg::st_src_reg(st_dst_reg reg)
 {
+   this->type = reg.type;
    this->file = reg.file;
    this->index = reg.index;
    this->swizzle = SWIZZLE_XYZW;
@@ -168,6 +176,7 @@ st_src_reg::st_src_reg(st_dst_reg reg)
 
 st_dst_reg::st_dst_reg(st_src_reg reg)
 {
+   this->type = reg.type;
    this->file = reg.file;
    this->index = reg.index;
    this->writemask = WRITEMASK_XYZW;
@@ -267,6 +276,8 @@ public:
    int samplers_used;
    bool indirect_addr_temps;
    bool indirect_addr_consts;
+   
+   int glsl_version;
 
    variable_storage *find_variable_storage(ir_variable *var);
 
@@ -276,6 +287,8 @@ public:
    void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
 
    st_src_reg st_src_reg_for_float(float val);
+   st_src_reg st_src_reg_for_int(int val);
+   st_src_reg st_src_reg_for_type(int type, int val);
 
    /**
     * \name Visit methods
@@ -327,6 +340,10 @@ public:
    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst,
         		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
+   
+   unsigned get_opcode(ir_instruction *ir, unsigned op,
+                    st_dst_reg dst,
+                    st_src_reg src0, st_src_reg src1);
 
    /**
     * Emit the correct dot-product instruction for the type of arguments
@@ -343,6 +360,8 @@ public:
    void emit_scalar(ir_instruction *ir, unsigned op,
         	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 
+   void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
+
    void emit_scs(ir_instruction *ir, unsigned op,
         	 st_dst_reg dst, const st_src_reg &src);
 
@@ -372,9 +391,9 @@ public:
 
 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL);
 
-static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
+static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
 
-static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
+static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
 
 static void
 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
@@ -432,6 +451,8 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 {
    glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
    int num_reladdr = 0, i;
+   
+   op = get_opcode(ir, op, dst, src0, src1);
 
    /* If we have to do relative addressing, we want to load the ARL
     * reg directly for one of the regs, and preload the other reladdr
@@ -447,7 +468,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
    reladdr_to_temp(ir, &src0, &num_reladdr);
 
    if (dst.reladdr) {
-      emit(ir, TGSI_OPCODE_ARL, address_reg, *dst.reladdr);
+      emit_arl(ir, address_reg, *dst.reladdr);
       num_reladdr--;
    }
    assert(num_reladdr == 0);
@@ -531,6 +552,62 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 }
 
+/**
+ * Determines whether to use an integer, unsigned integer, or float opcode 
+ * based on the operands and input opcode, then emits the result.
+ * 
+ * TODO: type checking for remaining TGSI opcodes
+ */
+unsigned
+glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
+        		 st_dst_reg dst,
+        		 st_src_reg src0, st_src_reg src1)
+{
+   int type = GLSL_TYPE_FLOAT;
+   
+   if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
+      type = GLSL_TYPE_FLOAT;
+   else if (glsl_version >= 130)
+      type = src0.type;
+
+#define case4(c, f, i, u) \
+   case TGSI_OPCODE_##c: \
+      if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
+      else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
+      else op = TGSI_OPCODE_##f; \
+      break;
+#define case3(f, i, u)  case4(f, f, i, u)
+#define case2fi(f, i)   case4(f, f, i, i)
+#define case2iu(i, u)   case4(i, LAST, i, u)
+   
+   switch(op) {
+      case2fi(ADD, UADD);
+      case2fi(MUL, UMUL);
+      case2fi(MAD, UMAD);
+      case3(DIV, IDIV, UDIV);
+      case3(MAX, IMAX, UMAX);
+      case3(MIN, IMIN, UMIN);
+      case2iu(MOD, UMOD);
+      
+      case2fi(SEQ, USEQ);
+      case2fi(SNE, USNE);
+      case3(SGE, ISGE, USGE);
+      case3(SLT, ISLT, USLT);
+      
+      case2iu(SHL, SHL);
+      case2iu(ISHR, USHR);
+      case2iu(NOT, NOT);
+      case2iu(AND, AND);
+      case2iu(OR, OR);
+      case2iu(XOR, XOR);
+      
+      default: break;
+   }
+   
+   assert(op != TGSI_OPCODE_LAST);
+   return op;
+}
+
 void
 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
         		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
@@ -607,6 +684,22 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
    emit_scalar(ir, op, dst, src0, undef);
 }
 
+void
+glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
+        		        st_dst_reg dst, st_src_reg src0)
+{
+   st_src_reg tmp = get_temp(glsl_type::float_type);
+
+   if (src0.type == GLSL_TYPE_INT)
+      emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
+   else if (src0.type == GLSL_TYPE_UINT)
+      emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
+   else
+      tmp = src0;
+   
+   emit(ir, TGSI_OPCODE_ARL, dst, tmp);
+}
+
 /**
  * Emit an TGSI_OPCODE_SCS instruction
  *
@@ -705,16 +798,41 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
 struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 {
-   st_src_reg src(PROGRAM_CONSTANT, -1, NULL);
+   st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT);
    union gl_constant_value uval;
 
    uval.f = val;
-   src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        				  &uval, 1, &src.swizzle);
+   src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+        				  &uval, 1, GL_FLOAT, &src.swizzle);
+
+   return src;
+}
+
+struct st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
+{
+   st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT);
+   union gl_constant_value uval;
+   
+   assert(glsl_version >= 130);
+
+   uval.i = val;
+   src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+        				  &uval, 1, GL_INT, &src.swizzle);
 
    return src;
 }
 
+struct st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
+{
+   if (glsl_version >= 130)
+      return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 
+                                       st_src_reg_for_int(val);
+   else
+      return st_src_reg_for_float(val);
+}
+
 static int
 type_size(const struct glsl_type *type)
 {
@@ -759,8 +877,7 @@ type_size(const struct glsl_type *type)
 /**
  * In the initial pass of codegen, we assign temporary numbers to
  * intermediate results.  (not SSA -- variable assignments will reuse
- * storage).  Actual register allocation for the Mesa VM occurs in a
- * pass over the Mesa IR later.
+ * storage).
  */
 st_src_reg
 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
@@ -769,6 +886,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
    int swizzle[4];
    int i;
 
+   src.type = type->base_type;
    src.file = PROGRAM_TEMPORARY;
    src.index = next_temp;
    src.reladdr = NULL;
@@ -875,7 +993,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
          this->variables.push_tail(storage);
          this->next_temp += type_size(ir->type);
 
-         dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
+         dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
+               glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT));
       }
 
 
@@ -890,7 +1009,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
                assert(index == storage->index + (int)i);
             }
          } else {
-            st_src_reg src(PROGRAM_STATE_VAR, index, NULL);
+            st_src_reg src(PROGRAM_STATE_VAR, index,
+                  glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT);
             src.swizzle = slots[i].swizzle;
             emit(ir, TGSI_OPCODE_MOV, dst, src);
             /* even a float takes up a whole vec4 reg in a struct/array. */
@@ -1058,7 +1178,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
    if (!reg->reladdr)
       return;
 
-   emit(ir, TGSI_OPCODE_ARL, address_reg, *reg->reladdr);
+   emit_arl(ir, address_reg, *reg->reladdr);
 
    if (*num_reladdr != 1) {
       st_src_reg temp = get_temp(glsl_type::vec4_type);
@@ -1131,13 +1251,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
 
    switch (ir->operation) {
    case ir_unop_logic_not:
-      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0));
+      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
       break;
    case ir_unop_neg:
-      op[0].negate = ~op[0].negate;
-      result_src = op[0];
+      assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
+      if (result_dst.type == GLSL_TYPE_INT)
+         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+      else {
+         op[0].negate = ~op[0].negate;
+         result_src = op[0];
+      }
       break;
    case ir_unop_abs:
+      assert(result_dst.type == GLSL_TYPE_FLOAT);
       emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
       break;
    case ir_unop_sign:
@@ -1200,9 +1326,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
       break;
    case ir_binop_div:
-      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+      if (result_dst.type == GLSL_TYPE_FLOAT)
+         assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+      else
+         emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
+      break;
    case ir_binop_mod:
-      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      if (result_dst.type == GLSL_TYPE_FLOAT)
+         assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      else
+         emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_less:
@@ -1227,7 +1360,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       /* "==" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_type::vec4_type);
+         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+               glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : 
+               glsl_type::vec4_type);
+         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
          emit_dp(ir, result_dst, temp, temp, vector_elements);
          emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
@@ -1239,7 +1375,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       /* "!=" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_type::vec4_type);
+         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+               glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : 
+               glsl_type::vec4_type);
+         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
          emit_dp(ir, result_dst, temp, temp, vector_elements);
          emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
@@ -1291,17 +1430,24 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
    case ir_unop_i2f:
    case ir_unop_b2f:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
+         break;
+      }
    case ir_unop_b2i:
-      /* Mesa IR lacks types, ints are stored as truncated floats. */
+      /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
       result_src = op[0];
       break;
    case ir_unop_f2i:
-      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
+      if (glsl_version >= 130)
+         emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
+      else
+         emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
       break;
    case ir_unop_f2b:
    case ir_unop_i2b:
-      emit(ir, TGSI_OPCODE_SNE, result_dst,
-        		  op[0], st_src_reg_for_float(0.0));
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 
+            st_src_reg_for_type(result_dst.type, 0));
       break;
    case ir_unop_trunc:
       emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
@@ -1329,12 +1475,40 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_bit_not:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
+         break;
+      }
    case ir_unop_u2f:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
+         break;
+      }
    case ir_binop_lshift:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
+         break;
+      }
    case ir_binop_rshift:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
+         break;
+      }
    case ir_binop_bit_and:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
+         break;
+      }
    case ir_binop_bit_xor:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
+         break;
+      }
    case ir_binop_bit_or:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
+         break;
+      }
    case ir_unop_round_even:
       assert(!"GLSL 1.30 features unsupported");
       break;
@@ -1729,7 +1903,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
 {
    st_src_reg src;
    GLfloat stack_vals[4] = { 0 };
-   GLfloat *values = stack_vals;
+   gl_constant_value *values = (gl_constant_value *) stack_vals;
+   GLenum gl_type = GL_NONE;
    unsigned int i;
 
    /* Unfortunately, 4 floats is all we can get into
@@ -1737,7 +1912,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
     * aggregate constant and move each constant value into it.  If we
     * get lucky, copy propagation will eliminate the extra moves.
     */
-
    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
       st_src_reg temp_base = get_temp(ir->type);
       st_dst_reg temp = st_dst_reg(temp_base);
@@ -1789,13 +1963,13 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
 
       for (i = 0; i < ir->type->matrix_columns; i++) {
          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
-         values = &ir->value.f[i * ir->type->vector_elements];
+         values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
 
-         src = st_src_reg(PROGRAM_CONSTANT, -1, NULL);
+         src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type);
          src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        					(gl_constant_value *) values,
-        					ir->type->vector_elements,
-        					&src.swizzle);
+                                                values,
+                                                ir->type->vector_elements,
+                                                &src.swizzle);
          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
 
          mat_column.index++;
@@ -1808,21 +1982,36 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    src.file = PROGRAM_CONSTANT;
    switch (ir->type->base_type) {
    case GLSL_TYPE_FLOAT:
-      values = &ir->value.f[0];
+      gl_type = GL_FLOAT;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         values[i].f = ir->value.f[i];
+      }
       break;
    case GLSL_TYPE_UINT:
+      gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         values[i] = ir->value.u[i];
+         if (glsl_version >= 130)
+            values[i].u = ir->value.u[i];
+         else
+            values[i].f = ir->value.u[i];
       }
       break;
    case GLSL_TYPE_INT:
+      gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         values[i] = ir->value.i[i];
+         if (glsl_version >= 130)
+            values[i].i = ir->value.i[i];
+         else
+            values[i].f = ir->value.i[i];
       }
       break;
    case GLSL_TYPE_BOOL:
+      gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         values[i] = ir->value.b[i];
+         if (glsl_version >= 130)
+            values[i].b = ir->value.b[i];
+         else
+            values[i].f = ir->value.b[i];
       }
       break;
    default:
@@ -1830,9 +2019,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    }
 
    this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
-   this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        					   (gl_constant_value *) values,
-        					   ir->type->vector_elements,
+   this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+        					   values, ir->type->vector_elements, gl_type,
         					   &this->result.swizzle);
 }
 
@@ -2535,6 +2723,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
 {
    GLuint i;
    GLint outputMap[VERT_RESULT_MAX];
+   GLint outputTypes[VERT_RESULT_MAX];
    GLuint numVaryingReads = 0;
    GLboolean usedTemps[MAX_PROGRAM_TEMPS];
    GLuint firstTemp = 0;
@@ -2562,6 +2751,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
                outputMap[var] = _mesa_find_free_register(usedTemps,
                                                          MAX_PROGRAM_TEMPS,
                                                          firstTemp);
+               outputTypes[var] = inst->src[j].type;
                firstTemp = outputMap[var] + 1;
             }
             inst->src[j].file = PROGRAM_TEMPORARY;
@@ -2587,8 +2777,8 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
    for (i = 0; i < VERT_RESULT_MAX; i++) {
       if (outputMap[i] >= 0) {
          /* MOV VAR[i], TEMP[tmp]; */
-         st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]);
-         st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW);
+         st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]);
+         st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]);
          dst.index = i;
          this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
       }
@@ -3762,10 +3952,33 @@ st_translate_program(
             if (program->indirect_addr_consts)
                t->constants[i] = ureg_DECL_constant( ureg, i );
             else
-               t->constants[i] = 
-                  ureg_DECL_immediate( ureg,
-                                       (GLfloat *) proginfo->Parameters->ParameterValues[i],
-                                       4 );
+               switch(proginfo->Parameters->Parameters[i].DataType)
+               {
+               case GL_FLOAT:
+               case GL_FLOAT_VEC2:
+               case GL_FLOAT_VEC3:
+               case GL_FLOAT_VEC4:
+                  t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4);
+                  break;
+               case GL_INT:
+               case GL_INT_VEC2:
+               case GL_INT_VEC3:
+               case GL_INT_VEC4:
+                  t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4);
+                  break;
+               case GL_UNSIGNED_INT:
+               case GL_UNSIGNED_INT_VEC2:
+               case GL_UNSIGNED_INT_VEC3:
+               case GL_UNSIGNED_INT_VEC4:
+               case GL_BOOL:
+               case GL_BOOL_VEC2:
+               case GL_BOOL_VEC3:
+               case GL_BOOL_VEC4:
+                  t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4);
+                  break;
+               default:
+                  assert(!"should not get here");
+               }
             break;
          default:
             break;
@@ -3874,6 +4087,7 @@ get_mesa_program(struct gl_context *ctx,
    v->prog = prog;
    v->shader_program = shader_program;
    v->options = options;
+   v->glsl_version = ctx->Const.GLSLVersion;
 
    add_uniforms_to_parameters_list(shader_program, shader, prog);
 
-- 
cgit v1.2.3


From b2c067e3075414703a7ebad439d4290c27cab46a Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Tue, 14 Jun 2011 17:38:14 -0500
Subject: glsl-to-tgsi: fix piglit tests

This commit fixes all of the piglit tests regressed by "mesa, glsl_to_tgsi: add
native support for integers in shaders" on softpipe.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 3f5c0c60226..49613fccda7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -886,7 +886,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
    int swizzle[4];
    int i;
 
-   src.type = type->base_type;
+   src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
    src.file = PROGRAM_TEMPORARY;
    src.index = next_temp;
    src.reladdr = NULL;
@@ -1632,6 +1632,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
    }
 
    this->result = st_src_reg(entry->file, entry->index, var->type);
+   if (glsl_version <= 120)
+      this->result.type = GLSL_TYPE_FLOAT;
 }
 
 void
@@ -1966,10 +1968,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
 
          src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type);
-         src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-                                                values,
-                                                ir->type->vector_elements,
-                                                &src.swizzle);
+         src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+                                                      values,
+                                                      ir->type->vector_elements,
+                                                      GL_FLOAT,
+                                                      &src.swizzle);
          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
 
          mat_column.index++;
@@ -4142,15 +4145,14 @@ get_mesa_program(struct gl_context *ctx,
    if (target == GL_VERTEX_PROGRAM_ARB)
       v->remove_output_reads(PROGRAM_VARYING);
 
-   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
-   v->copy_propagate();
-   
-   /* FIXME: These passes to optimize temporary registers don't work when there
+   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor.
+    * FIXME: These passes to optimize temporary registers don't work when there
     * is indirect addressing of the temporary register space.  We need proper 
     * array support so that we don't have to give up these passes in every 
     * shader that uses arrays.
     */
    if (!v->indirect_addr_temps) {
+      v->copy_propagate();
       v->merge_registers();
       v->eliminate_dead_code();
       v->renumber_registers();
-- 
cgit v1.2.3


From bf1cee9f24022e3da96d84fdc6baaa050d3eadf1 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Tue, 14 Jun 2011 18:17:40 -0500
Subject: glsl_to_tgsi: finish some loose ends

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 46 +++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 10 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 49613fccda7..438f21483c7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2200,7 +2200,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       ir->lod_info.grad.dPdy->accept(this);
       dy = this->result;
       break;
-   case ir_txf: // TODO: use TGSI_OPCODE_TXF here
+   case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
@@ -3731,6 +3731,37 @@ emit_wpos(struct st_context *st,
    emit_wpos_inversion(t, program, invert);
 }
 
+/**
+ * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
+ * TGSI uses +1 for front, -1 for back.
+ * This function converts the TGSI value to the GL value.  Simply clamping/
+ * saturating the value to [0,1] does the job.
+ */
+static void
+emit_face_var(struct st_translate *t)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
+   struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
+
+   /* MOV_SAT face_temp, input[face] */
+   face_temp = ureg_saturate(face_temp);
+   ureg_MOV(ureg, face_temp, face_input);
+
+   /* Use face_temp as face input from here on: */
+   t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
+}
+
+static void
+emit_edgeflags(struct st_translate *t)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
+   struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
+
+   ureg_MOV(ureg, edge_dst, edge_src);
+}
+
 /**
  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
  * \param program  the program to translate
@@ -3800,15 +3831,11 @@ st_translate_program(
          /* Must do this after setting up t->inputs, and before
           * emitting constant references, below:
           */
-          printf("FRAG_BIT_WPOS\n");
           emit_wpos(st_context(ctx), t, proginfo, ureg);
       }
 
-      if (proginfo->InputsRead & FRAG_BIT_FACE) {
-         // TODO: uncomment
-         printf("FRAG_BIT_FACE\n");
-         //emit_face_var( t, program );
-      }
+      if (proginfo->InputsRead & FRAG_BIT_FACE)
+         emit_face_var(t);
 
       /*
        * Declare output attributes.
@@ -3875,7 +3902,6 @@ st_translate_program(
                /* XXX: note we are modifying the incoming shader here!  Need to
                * do this before emitting the constant decls below, or this
                * will be missed.
-               * XXX: depends on "Parameters" field specific to Mesa IR
                */
             unsigned pointSizeClampConst =
                _mesa_add_state_reference(proginfo->Parameters,
@@ -3887,8 +3913,8 @@ st_translate_program(
             t->outputs[i] = psizregtemp;
          }
       }
-      /*if (passthrough_edgeflags)
-         emit_edgeflags( t, program ); */ // TODO: uncomment
+      if (passthrough_edgeflags)
+         emit_edgeflags(t);
    }
 
    /* Declare address register.
-- 
cgit v1.2.3


From b30bbd7436bdb9727d3766ba9c07abd610e6dda8 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 15 Jun 2011 14:45:03 -0500
Subject: glsl_to_tgsi: silence compiler warning

---
 src/mesa/state_tracker/st_mesa_to_tgsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 75842286ba8..656c985d78f 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -1207,7 +1207,7 @@ st_translate_mesa_program(
             else
                t->constants[i] = 
                   ureg_DECL_immediate( ureg,
-                                       program->Parameters->ParameterValues[i],
+                                       (const float*) program->Parameters->ParameterValues[i],
                                        4 );
             break;
          default:
-- 
cgit v1.2.3


From 1141c3f4c4014e3c2834db65b96a3ba7cc78744a Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 15 Jun 2011 17:31:51 -0500
Subject: glsl: remove glsl_type::get_vec4_type()

Thanks to Kenneth Graunke for pointing out that glsl_type::get_instance(base, 4, 1)
is the same as glsl_type::get_vec4_type(base).

The function was only used in st_glsl_to_tgsi, and this commit replaces that usage
with get_instance.
---
 src/glsl/glsl_types.h                      | 15 ---------------
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  4 ++--
 2 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'src/mesa')

diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index eb9d501858a..1b069df74fe 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -165,21 +165,6 @@ struct glsl_type {
    static const glsl_type *const mat4x3_type;
    static const glsl_type *const mat4_type;
    /*@}*/
-   
-   /**
-    * Get the built-in instance of the vec4 type for a specific base type
-    */
-   static const glsl_type *get_vec4_type(glsl_base_type base_type)
-   {
-      if (base_type == GLSL_TYPE_FLOAT)
-         return vec4_type;
-      else if (base_type == GLSL_TYPE_INT)
-         return ivec4_type;
-      else if (base_type == GLSL_TYPE_UINT)
-         return uvec4_type;
-      else
-         return NULL;
-   }
 
 
    /**
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 438f21483c7..5fedf263090 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1361,7 +1361,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
          st_src_reg temp = get_temp(glsl_version >= 130 ? 
-               glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : 
+               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
@@ -1376,7 +1376,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
          st_src_reg temp = get_temp(glsl_version >= 130 ? 
-               glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : 
+               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
-- 
cgit v1.2.3


From 552cc48fca9b932fceb3d8fa7f9d0067f46b67c2 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 16 Jun 2011 13:42:57 -0500
Subject: glsl_to_tgsi: fix compile error with g++ 4.6

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5fedf263090..6c92441a105 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -389,7 +389,7 @@ public:
    void *mem_ctx;
 };
 
-static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL);
+static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
 
 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
 
-- 
cgit v1.2.3


From 29d21417e38aed0f0710d3692df320728aef90b1 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 16 Jun 2011 18:36:16 -0500
Subject: glsl_to_tgsi: implement simplify_cmp pass needed by r300g

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 95 ++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6c92441a105..322bfbbf1ab 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -374,6 +374,7 @@ public:
    bool process_move_condition(ir_rvalue *ir);
 
    void remove_output_reads(gl_register_file type);
+   void simplify_cmp(void);
 
    void rename_temp_register(int index, int new_index);
    int get_first_temp_read(int index);
@@ -2788,6 +2789,97 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
    }
 }
 
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction
+ */
+static int
+get_src_arg_mask(st_dst_reg dst, st_src_reg src)
+{
+   int read_mask = 0, comp;
+
+   /* Now, given the src swizzle and the written channels, find which
+    * components are actually read
+    */
+   for (comp = 0; comp < 4; ++comp) {
+      const unsigned coord = GET_SWZ(src.swizzle, comp);
+      ASSERT(coord < 4);
+      if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
+         read_mask |= 1 << coord;
+   }
+
+   return read_mask;
+}
+
+/**
+ * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
+ * instruction is the first instruction to write to register T0.  There are
+ * several lowering passes done in GLSL IR (e.g. branches and
+ * relative addressing) that create a large number of conditional assignments
+ * that ir_to_mesa converts to CMP instructions like the one mentioned above.
+ *
+ * Here is why this conversion is safe:
+ * CMP T0, T1 T2 T0 can be expanded to:
+ * if (T1 < 0.0)
+ * 	MOV T0, T2;
+ * else
+ * 	MOV T0, T0;
+ *
+ * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
+ * as the original program.  If (T1 < 0.0) evaluates to false, executing
+ * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
+ * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
+ * because any instruction that was going to read from T0 after this was going
+ * to read a garbage value anyway.
+ */
+void
+glsl_to_tgsi_visitor::simplify_cmp(void)
+{
+   unsigned tempWrites[MAX_PROGRAM_TEMPS];
+   unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
+
+   memset(tempWrites, 0, sizeof(tempWrites));
+   memset(outputWrites, 0, sizeof(outputWrites));
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      unsigned prevWriteMask = 0;
+
+      /* Give up if we encounter relative addressing or flow control. */
+      if (inst->dst.reladdr ||
+          tgsi_get_opcode_info(inst->op)->is_branch ||
+          inst->op == TGSI_OPCODE_BGNSUB ||
+          inst->op == TGSI_OPCODE_CONT ||
+          inst->op == TGSI_OPCODE_END ||
+          inst->op == TGSI_OPCODE_ENDSUB ||
+          inst->op == TGSI_OPCODE_RET) {
+         return;
+      }
+
+      if (inst->dst.file == PROGRAM_OUTPUT) {
+         assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
+         prevWriteMask = outputWrites[inst->dst.index];
+         outputWrites[inst->dst.index] |= inst->dst.writemask;
+      } else if (inst->dst.file == PROGRAM_TEMPORARY) {
+         assert(inst->dst.index < MAX_PROGRAM_TEMPS);
+         prevWriteMask = tempWrites[inst->dst.index];
+         tempWrites[inst->dst.index] |= inst->dst.writemask;
+      }
+
+      /* For a CMP to be considered a conditional write, the destination
+       * register and source register two must be the same. */
+      if (inst->op == TGSI_OPCODE_CMP
+          && !(inst->dst.writemask & prevWriteMask)
+          && inst->src[2].file == inst->dst.file
+          && inst->src[2].index == inst->dst.index
+          && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
+
+         inst->op = TGSI_OPCODE_MOV;
+         inst->src[0] = inst->src[1];
+      }
+   }
+}
+
 /* Replaces all references to a temporary register index with another index. */
 void
 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
@@ -4170,6 +4262,9 @@ get_mesa_program(struct gl_context *ctx,
    v->remove_output_reads(PROGRAM_OUTPUT);
    if (target == GL_VERTEX_PROGRAM_ARB)
       v->remove_output_reads(PROGRAM_VARYING);
+   
+   /* Perform the simplify_cmp optimization, which is required by r300g. */
+   v->simplify_cmp();
 
    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor.
     * FIXME: These passes to optimize temporary registers don't work when there
-- 
cgit v1.2.3


From 8c50f18b29637470539d05ccc32b0cae0092aeac Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Tue, 21 Jun 2011 21:52:19 +0100
Subject: glsl_to_tgsi: execute merge_registers() after eliminate_dead_code()

Fixes a regression unintentionally introduced by "glsl_to_tgsi: fix shaders with
indirect addressing of temps" that caused missing leaves in 3dmark01 test 4 (Nature)
and missing/displaced textures on human models in Counter-Strike: Source.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Signed-off-by: Bryan Cain <bryancain3@gmail.com>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 322bfbbf1ab..abeb44a4083 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4274,8 +4274,8 @@ get_mesa_program(struct gl_context *ctx,
     */
    if (!v->indirect_addr_temps) {
       v->copy_propagate();
-      v->merge_registers();
       v->eliminate_dead_code();
+      v->merge_registers();
       v->renumber_registers();
    }
    
-- 
cgit v1.2.3


From 8b881ad1c3d9dd3c96afbdbb608a7240d40e9c92 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 23 Jun 2011 19:35:36 -0500
Subject: glsl_to_tgsi: use swizzle_for_size for src reg in conditional moves

This prevents the copy propagation pass from being confused by undefined
channels and thus missing optimization opportunities.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index abeb44a4083..6d76686ab5d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1882,10 +1882,13 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       st_src_reg condition = this->result;
 
       for (i = 0; i < type_size(ir->lhs->type); i++) {
+         st_src_reg l_src = st_src_reg(l);
+         l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
+         
          if (switch_order) {
-            emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r);
+            emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r);
          } else {
-            emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l));
+            emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src);
          }
 
          l.index++;
-- 
cgit v1.2.3


From 7ec7dd4fb6ae6c8aa29988754476e1212eb986ef Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 23 Jun 2011 19:53:37 -0500
Subject: glsl_to_tgsi: remove handling of XPD opcode in
 compile_tgsi_instruction()

The opcode is never emitted by the glsl_to_tgsi_visitor, so its special case in
compile_tgsi_instruction() was dead code.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6d76686ab5d..721ba28d61f 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3653,14 +3653,6 @@ compile_tgsi_instruction(struct st_translate *t,
                  src, num_src );
       break;
 
-   case TGSI_OPCODE_XPD:
-      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
-      ureg_insn( ureg, 
-                 inst->op, 
-                 dst, num_dst, 
-                 src, num_src );
-      break;
-
    default:
       ureg_insn( ureg, 
                  inst->op, 
-- 
cgit v1.2.3


From 41472f7809dcff114223b8fadc5b97baff6060a9 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 24 Jun 2011 18:45:04 -0500
Subject: glsl_to_tgsi: add a better, more advanced dead code elimination pass

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 140 +++++++++++++++++++++++++++++
 1 file changed, 140 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 721ba28d61f..d47364fabb6 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -208,6 +208,7 @@ public:
    int sampler; /**< sampler index */
    int tex_target; /**< One of TEXTURE_*_INDEX */
    GLboolean tex_shadow;
+   int dead_mask; /**< Used in dead code elimination */
 
    class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
 };
@@ -384,6 +385,7 @@ public:
 
    void copy_propagate(void);
    void eliminate_dead_code(void);
+   int eliminate_dead_code_advanced(void);
    void merge_registers(void);
    void renumber_registers(void);
 
@@ -480,6 +482,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
    inst->src[1] = src1;
    inst->src[2] = src2;
    inst->ir = ir;
+   inst->dead_mask = 0;
 
    inst->function = NULL;
    
@@ -3257,6 +3260,142 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
    }
 }
 
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
+ * code elimination.  This is less primitive than eliminate_dead_code(), as it
+ * is per-channel and can detect consecutive writes without a read between them
+ * as dead code.  However, there is some dead code that can be eliminated by 
+ * eliminate_dead_code() but not this function - for example, this function 
+ * cannot eliminate an instruction writing to a register that is never read and
+ * is the only instruction writing to that register.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.
+ */
+int
+glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
+{
+   glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
+                                                     glsl_to_tgsi_instruction *,
+                                                     this->next_temp * 4);
+   int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+   int level = 0;
+   int removed = 0;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      assert(inst->dst.file != PROGRAM_TEMPORARY
+             || inst->dst.index < this->next_temp);
+      
+      switch (inst->op) {
+      case TGSI_OPCODE_BGNLOOP:
+      case TGSI_OPCODE_ENDLOOP:
+         /* End of a basic block, clear the write array entirely.
+          * FIXME: This keeps us from killing dead code when the writes are
+          * on either side of a loop, even when the register isn't touched
+          * inside the loop.
+          */
+         memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+         break;
+
+      case TGSI_OPCODE_IF:
+         ++level;
+         break;
+
+      case TGSI_OPCODE_ENDIF:
+         --level;
+         break;
+
+      case TGSI_OPCODE_ELSE:
+         /* Clear all channels written inside the preceding if block from the
+          * write array, but leave those that were not touched.
+          *
+          * FIXME: This destroys opportunities to remove dead code inside of
+          * IF blocks that are followed by an ELSE block.
+          */
+         for (int r = 0; r < this->next_temp; r++) {
+            for (int c = 0; c < 4; c++) {
+               if (!writes[4 * r + c])
+        	         continue;
+
+               if (write_level[4 * r + c] >= level)
+        	         writes[4 * r + c] = NULL;
+            }
+         }
+         break;
+
+      default:
+         /* Continuing the block, clear any channels from the write array that
+          * are read by this instruction.
+          */
+         for (int i = 0; i < 4; i++) {
+            if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
+               /* Any temporary might be read, so no dead code elimination 
+                * across this instruction.
+                */
+               memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+            } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
+               /* Clear where it's used as src. */
+               int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
+               
+               for (int c = 0; c < 4; c++) {
+              	   if (src_chans & (1 << c)) {
+              	      writes[4 * inst->src[i].index + c] = NULL;
+              	   }
+               }
+            }
+         }
+         break;
+      }
+
+      /* If this instruction writes to a temporary, add it to the write array.
+       * If there is already an instruction in the write array for one or more
+       * of the channels, flag that channel write as dead.
+       */
+      if (inst->dst.file == PROGRAM_TEMPORARY &&
+          !inst->dst.reladdr &&
+          !inst->saturate) {
+         for (int c = 0; c < 4; c++) {
+            if (inst->dst.writemask & (1 << c)) {
+               if (writes[4 * inst->dst.index + c]) {
+                  if (write_level[4 * inst->dst.index + c] < level)
+                     continue;
+                  else
+                     writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
+               }
+               writes[4 * inst->dst.index + c] = inst;
+               write_level[4 * inst->dst.index + c] = level;
+            }
+         }
+      }
+   }
+
+   /* Now actually remove the instructions that are completely dead and update
+    * the writemask of other instructions with dead channels.
+    */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (!inst->dead_mask || !inst->dst.writemask)
+         continue;
+      else if (inst->dead_mask == inst->dst.writemask) {
+         iter.remove();
+         delete inst;
+         removed++;
+      } else
+         inst->dst.writemask &= ~(inst->dead_mask);
+   }
+
+   ralloc_free(write_level);
+   ralloc_free(writes);
+   
+   return removed;
+}
+
 /* Merges temporary registers together where possible to reduce the number of 
  * registers needed to run a program.
  * 
@@ -4269,6 +4408,7 @@ get_mesa_program(struct gl_context *ctx,
     */
    if (!v->indirect_addr_temps) {
       v->copy_propagate();
+      while (v->eliminate_dead_code_advanced());
       v->eliminate_dead_code();
       v->merge_registers();
       v->renumber_registers();
-- 
cgit v1.2.3


From 194732fd7299481dd57815f46a594d155260ce17 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 24 Jun 2011 20:37:53 -0500
Subject: glsl_to_tgsi: use a more specific condition for gl_FragDepth hack in
 generating assignments

This reduces the number of instructions in the fragment shader of
glsl-fs-atan-2 from 174 to 146 with EmitNoIfs enabled.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d47364fabb6..5f22f7091d6 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1841,7 +1841,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
    if (ir->write_mask == 0) {
       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
       l.writemask = WRITEMASK_XYZW;
-   } else if (ir->lhs->type->is_scalar()) {
+   } else if (ir->lhs->type->is_scalar() &&
+              ir->lhs->variable_referenced()->mode == ir_var_out) {
       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
        * FINISHME: W component of fragment shader output zero, work correctly.
        */
@@ -1851,7 +1852,6 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       int first_enabled_chan = 0;
       int rhs_chan = 0;
 
-      assert(ir->lhs->type->is_vector());
       l.writemask = ir->write_mask;
 
       for (int i = 0; i < 4; i++) {
-- 
cgit v1.2.3


From 3bd06e5b82b438041f50e2469be9ea68bf3b4300 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 24 Jun 2011 22:32:26 -0500
Subject: glsl_to_tgsi: use the correct writemask in try_emit_mad() and
 try_emit_sat()

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5f22f7091d6..13573fc1b94 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1133,6 +1133,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 {
    int nonmul_operand = 1 - mul_operand;
    st_src_reg a, b, c;
+   st_dst_reg result_dst;
 
    ir_expression *expr = ir->operands[mul_operand]->as_expression();
    if (!expr || expr->operation != ir_binop_mul)
@@ -1146,7 +1147,9 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    c = this->result;
 
    this->result = get_temp(ir->type);
-   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c);
+   result_dst = st_dst_reg(this->result);
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+   emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
 
    return true;
 }
@@ -1168,8 +1171,10 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
    st_src_reg src = this->result;
 
    this->result = get_temp(ir->type);
+   st_dst_reg result_dst = st_dst_reg(this->result);
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
    glsl_to_tgsi_instruction *inst;
-   inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src);
+   inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
    inst->saturate = true;
 
    return true;
-- 
cgit v1.2.3


From 71cbc9e3c4c9ef6090ee31e87601ae64af26321e Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 24 Jun 2011 23:17:30 -0500
Subject: glsl_to_tgsi: improve eliminate_dead_code_advanced()

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 13573fc1b94..15a1a3c51c4 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3379,6 +3379,15 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
       }
    }
 
+   /* Anything still in the write array at this point is dead code. */
+   for (int r = 0; r < this->next_temp; r++) {
+      for (int c = 0; c < 4; c++) {
+         glsl_to_tgsi_instruction *inst = writes[4 * r + c];
+         if (inst)
+            inst->dead_mask |= (1 << c);
+      }
+   }
+
    /* Now actually remove the instructions that are completely dead and update
     * the writemask of other instructions with dead channels.
     */
-- 
cgit v1.2.3


From f00406b68c07f97b11e873c04917cafdb1a67462 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 27 Jun 2011 17:11:07 -0500
Subject: glsl_to_tgsi: improve assignment handling

This is a hack, but it's better than emitting an unnecessary MOV instruction
and hoping the optimization passes clean it up.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 15a1a3c51c4..e38617ae9fe 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -695,13 +695,13 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
    st_src_reg tmp = get_temp(glsl_type::float_type);
 
    if (src0.type == GLSL_TYPE_INT)
-      emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
+      emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
    else if (src0.type == GLSL_TYPE_UINT)
-      emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
+      emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
    else
       tmp = src0;
    
-   emit(ir, TGSI_OPCODE_ARL, dst, tmp);
+   emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
 }
 
 /**
@@ -1902,6 +1902,17 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
          l.index++;
          r.index++;
       }
+   } else if (ir->rhs->as_expression() &&
+              this->instructions.get_tail() &&
+              ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
+              type_size(ir->lhs->type) == 1) {
+      /* To avoid emitting an extra MOV when assigning an expression to a 
+       * variable, change the destination register of the last instruction 
+       * emitted as part of the expression to the assignment variable.
+       */
+      glsl_to_tgsi_instruction *inst;
+      inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+      inst->dst = l;
    } else {
       for (i = 0; i < type_size(ir->lhs->type); i++) {
          emit(ir, TGSI_OPCODE_MOV, l, r);
-- 
cgit v1.2.3


From 4c8b6a286887628e5fc35306189a4c4a83c482ea Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 27 Jun 2011 17:25:50 -0500
Subject: glsl_to_tgsi: fix mistake in new dead code elimination pass

The conditions of IF opcodes were not being counted as reads, which sometimes
led to the condition register being wrong or undefined.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e38617ae9fe..f87c64f62c7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3315,10 +3315,6 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
          memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
          break;
 
-      case TGSI_OPCODE_IF:
-         ++level;
-         break;
-
       case TGSI_OPCODE_ENDIF:
          --level;
          break;
@@ -3341,6 +3337,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
          }
          break;
 
+      case TGSI_OPCODE_IF:
+         ++level;
+         /* fallthrough to default case to mark the condition as read */
+      
       default:
          /* Continuing the block, clear any channels from the write array that
           * are read by this instruction.
-- 
cgit v1.2.3


From 9c2810103d107d1e5ef8bd8b57819d12264f664a Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 27 Jun 2011 17:40:10 -0500
Subject: glsl_to_tgsi: always run copy_propagate() and
 eliminate_dead_code_advanced()

These two passes are written to handle indirect addressing properly.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f87c64f62c7..e7d0af83a6b 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4422,18 +4422,17 @@ get_mesa_program(struct gl_context *ctx,
    if (target == GL_VERTEX_PROGRAM_ARB)
       v->remove_output_reads(PROGRAM_VARYING);
    
-   /* Perform the simplify_cmp optimization, which is required by r300g. */
+   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
    v->simplify_cmp();
+   v->copy_propagate();
+   while (v->eliminate_dead_code_advanced());
 
-   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor.
-    * FIXME: These passes to optimize temporary registers don't work when there
+   /* FIXME: These passes to optimize temporary registers don't work when there
     * is indirect addressing of the temporary register space.  We need proper 
     * array support so that we don't have to give up these passes in every 
     * shader that uses arrays.
     */
    if (!v->indirect_addr_temps) {
-      v->copy_propagate();
-      while (v->eliminate_dead_code_advanced());
       v->eliminate_dead_code();
       v->merge_registers();
       v->renumber_registers();
-- 
cgit v1.2.3


From 54db6e618e43abbd69b59e0a03e2b6ec83d3120f Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 30 Jun 2011 13:42:37 -0500
Subject: r200, r600c, i965: fix build

---
 src/mesa/drivers/dri/i965/brw_fs.cpp           |  2 +-
 src/mesa/drivers/dri/i965/brw_vs_emit.c        |  2 +-
 src/mesa/drivers/dri/r200/r200_vertprog.c      |  8 ++++----
 src/mesa/drivers/dri/r600/evergreen_fragprog.c |  8 ++++----
 src/mesa/drivers/dri/r600/evergreen_vertprog.c | 16 ++++++++--------
 src/mesa/drivers/dri/r600/r700_fragprog.c      |  8 ++++----
 src/mesa/drivers/dri/r600/r700_vertprog.c      | 16 ++++++++--------
 7 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 7c73a8fbf02..31f76f8c939 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -605,7 +605,7 @@ fs_visitor::setup_paramvalues_refs()
    /* Set up the pointers to ParamValues now that that array is finalized. */
    for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
       c->prog_data.param[i] =
-	 fp->Base.Parameters->ParameterValues[this->param_index[i]] +
+	 (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] +
 	 this->param_offset[i];
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index b6c9e5a1ceb..2fa04a15a34 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1359,7 +1359,7 @@ get_src_reg( struct brw_vs_compile *c,
 
 	 if (component >= 0) {
 	    params = c->vp->program.Base.Parameters;
-	    f = params->ParameterValues[src->Index][component];
+	    f = params->ParameterValues[src->Index][component].f;
 
 	    if (src->Abs)
 	       f = fabs(f);
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
index 63e03b0e0c7..cf44d7f459c 100644
--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
@@ -126,10 +126,10 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_
       case PROGRAM_NAMED_PARAM:
       //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
       case PROGRAM_CONSTANT:
-	 *fcmd++ = paramList->ParameterValues[pi][0];
-	 *fcmd++ = paramList->ParameterValues[pi][1];
-	 *fcmd++ = paramList->ParameterValues[pi][2];
-	 *fcmd++ = paramList->ParameterValues[pi][3];
+	 *fcmd++ = paramList->ParameterValues[pi][0].f;
+	 *fcmd++ = paramList->ParameterValues[pi][1].f;
+	 *fcmd++ = paramList->ParameterValues[pi][2].f;
+	 *fcmd++ = paramList->ParameterValues[pi][3].f;
 	 break;
       default:
 	 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
index e527c379b62..cc584ca2b35 100644
--- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c
+++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
@@ -752,10 +752,10 @@ GLboolean evergreenSetupFPconstants(struct gl_context * ctx)
 	    unNumParamData = paramList->NumParameters;
 
 	    for(ui=0; ui<unNumParamData; ui++) {
-		        evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
 	    }
 
 	    /* alloc multiple of 16 constants */
diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
index 018869b9996..117916ac78f 100644
--- a/src/mesa/drivers/dri/r600/evergreen_vertprog.c
+++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
@@ -684,17 +684,17 @@ GLboolean evergreenSetupVPconstants(struct gl_context * ctx)
 	    for(ui=0; ui<unNumParamData; ui++) {
             if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) 
             {
-                evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
-		        evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
-		        evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
-		        evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+                evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f;
+		        evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f;
+		        evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f;
+		        evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f;
             }
             else
             {
-		        evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
             }
 	    }
 
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 40494cd6af0..6f9834e68fe 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -778,10 +778,10 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx)
 	    unNumParamData = paramList->NumParameters;
 
 	    for(ui=0; ui<unNumParamData; ui++) {
-		        r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
 	    }
 
         /* Load fp constants to gpu */
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 7d4be9180a0..b1e2742b27d 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -720,17 +720,17 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx)
 	    for(ui=0; ui<unNumParamData; ui++) {
             if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) 
             {
-                r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
-		        r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
-		        r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
-		        r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+              r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f;
+		        r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f;
+		        r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f;
+		        r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f;
             }
             else
             {
-		        r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
             }
 	    }
 
-- 
cgit v1.2.3


From 33e0c47b05c8fbae9d7af57ba65b612825b5db60 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 4 Jul 2011 08:44:12 -0500
Subject: glsl_to_tgsi: replace MAX_PROGRAM_TEMPS (256) with MAX_TEMPS (4096)

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e7d0af83a6b..d7afc22c048 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -77,6 +77,8 @@ extern "C" {
                            (1 << PROGRAM_CONSTANT) |     \
                            (1 << PROGRAM_UNIFORM))
 
+#define MAX_TEMPS         4096
+
 class st_src_reg;
 class st_dst_reg;
 
@@ -2751,11 +2753,11 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
    GLint outputMap[VERT_RESULT_MAX];
    GLint outputTypes[VERT_RESULT_MAX];
    GLuint numVaryingReads = 0;
-   GLboolean usedTemps[MAX_PROGRAM_TEMPS];
+   GLboolean usedTemps[MAX_TEMPS];
    GLuint firstTemp = 0;
 
    _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
-                             usedTemps, MAX_PROGRAM_TEMPS);
+                             usedTemps, MAX_TEMPS);
 
    assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
    assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
@@ -2775,7 +2777,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
             if (outputMap[var] == -1) {
                numVaryingReads++;
                outputMap[var] = _mesa_find_free_register(usedTemps,
-                                                         MAX_PROGRAM_TEMPS,
+                                                         MAX_TEMPS,
                                                          firstTemp);
                outputTypes[var] = inst->src[j].type;
                firstTemp = outputMap[var] + 1;
@@ -2857,7 +2859,7 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src)
 void
 glsl_to_tgsi_visitor::simplify_cmp(void)
 {
-   unsigned tempWrites[MAX_PROGRAM_TEMPS];
+   unsigned tempWrites[MAX_TEMPS];
    unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
 
    memset(tempWrites, 0, sizeof(tempWrites));
@@ -2883,7 +2885,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
          prevWriteMask = outputWrites[inst->dst.index];
          outputWrites[inst->dst.index] |= inst->dst.writemask;
       } else if (inst->dst.file == PROGRAM_TEMPORARY) {
-         assert(inst->dst.index < MAX_PROGRAM_TEMPS);
+         assert(inst->dst.index < MAX_TEMPS);
          prevWriteMask = tempWrites[inst->dst.index];
          tempWrites[inst->dst.index] |= inst->dst.writemask;
       }
@@ -3504,7 +3506,7 @@ struct label {
 struct st_translate {
    struct ureg_program *ureg;
 
-   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
+   struct ureg_dst temps[MAX_TEMPS];
    struct ureg_src *constants;
    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
-- 
cgit v1.2.3


From c0dcab2882a4731dccd363a40c3ebcabc88b9c5d Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 8 Jul 2011 21:12:08 -0500
Subject: st/mesa, glsl_to_tgsi: support glDrawPixels/glCopyPixels with a GLSL
 fragment shader active

Since this was previously implemented using Mesa IR and _mesa_combine_programs,
this commit adds a new code path that works with glsl_to_tgsi.
---
 src/mesa/state_tracker/st_cb_drawpixels.c  |  65 +++++++++++++++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 126 +++++++++++++++++++++++++++++
 src/mesa/state_tracker/st_glsl_to_tgsi.h   |   3 +
 3 files changed, 194 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 965fbcd1d9e..f4dd2a42847 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -94,6 +94,67 @@ is_passthrough_program(const struct gl_fragment_program *prog)
 }
 
 
+/* XXX copied verbatim from st_atom_pixeltransfer.c */
+static struct pipe_resource *
+create_color_map_texture(struct gl_context *ctx)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_resource *pt;
+   enum pipe_format format;
+   const uint texSize = 256; /* simple, and usually perfect */
+
+   /* find an RGBA texture format */
+   format = st_choose_format(pipe->screen, GL_RGBA,
+                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
+
+   /* create texture for color map/table */
+   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
+                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
+   return pt;
+}
+
+
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ */
+static struct gl_fragment_program *
+get_glsl_pixel_transfer_program(struct st_context *st,
+                                struct st_fragment_program *orig)
+{
+   int pixelMaps = 0, scaleAndBias = 0;
+   struct gl_context *ctx = st->ctx;
+   struct st_fragment_program *fp = (struct st_fragment_program *)
+      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+
+   if (!fp)
+      return NULL;
+
+   if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 ||
+       ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 ||
+       ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 ||
+       ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) {
+      scaleAndBias = 1;
+   }
+
+   pixelMaps = ctx->Pixel.MapColorFlag;
+
+   if (pixelMaps) {
+      /* create the colormap/texture now if not already done */
+      if (!st->pixel_xfer.pixelmap_texture) {
+         st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx);
+         st->pixel_xfer.pixelmap_sampler_view =
+            st_create_texture_sampler_view(st->pipe,
+                                           st->pixel_xfer.pixelmap_texture);
+      }
+   }
+
+   get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi,
+                              scaleAndBias, pixelMaps);
+
+   return &fp->Base;
+}
+
 
 /**
  * Make fragment shader for glDraw/CopyPixels.  This shader is made
@@ -107,11 +168,15 @@ st_make_drawpix_fragment_program(struct st_context *st,
                                  struct gl_fragment_program **fpOut)
 {
    struct gl_program *newProg;
+   struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn;
 
    if (is_passthrough_program(fpIn)) {
       newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
                                              &st->pixel_xfer.program->Base);
    }
+   else if (stfp->glsl_to_tgsi != NULL) {
+      newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp);
+   }
    else {
 #if 0
       /* debug */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d7afc22c048..ae0c92f5f13 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3494,6 +3494,132 @@ glsl_to_tgsi_visitor::renumber_registers(void)
    this->next_temp = new_index;
 }
 
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
+ */
+extern "C" void
+get_pixel_transfer_visitor(struct st_fragment_program *fp,
+                           glsl_to_tgsi_visitor *original,
+                           int scale_and_bias, int pixel_maps)
+{
+   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+   struct st_context *st = st_context(original->ctx);
+   struct gl_program *prog = &fp->Base.Base;
+   struct gl_program_parameter_list *params = _mesa_new_parameter_list();
+   st_src_reg coord, src0;
+   st_dst_reg dst0;
+   glsl_to_tgsi_instruction *inst;
+
+   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+   v->ctx = original->ctx;
+   v->prog = prog;
+   v->glsl_version = original->glsl_version;
+   v->options = original->options;
+   v->next_temp = original->next_temp;
+   v->num_address_regs = original->num_address_regs;
+   v->samplers_used = prog->SamplersUsed = original->samplers_used;
+   v->indirect_addr_temps = original->indirect_addr_temps;
+   v->indirect_addr_consts = original->indirect_addr_consts;
+
+   /*
+    * Get initial pixel color from the texture.
+    * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
+    */
+   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+   src0 = v->get_temp(glsl_type::vec4_type);
+   dst0 = st_dst_reg(src0);
+   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst->sampler = 0;
+   inst->tex_target = TEXTURE_2D_INDEX;
+
+   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR);
+   prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
+   v->samplers_used |= (1 << 0);
+
+   if (scale_and_bias) {
+      static const gl_state_index scale_state[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_PT_SCALE,
+           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+      static const gl_state_index bias_state[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_PT_BIAS,
+           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+      GLint scale_p, bias_p;
+      st_src_reg scale, bias;
+
+      scale_p = _mesa_add_state_reference(params, scale_state);
+      bias_p = _mesa_add_state_reference(params, bias_state);
+
+      /* MAD colorTemp, colorTemp, scale, bias; */
+      scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
+      bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
+      inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
+   }
+
+   if (pixel_maps) {
+      st_src_reg temp = v->get_temp(glsl_type::vec4_type);
+      st_dst_reg temp_dst = st_dst_reg(temp);
+
+      assert(st->pixel_xfer.pixelmap_texture);
+
+      /* With a little effort, we can do four pixel map look-ups with
+       * two TEX instructions:
+       */
+
+      /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
+      temp_dst.writemask = WRITEMASK_XY; /* write R,G */
+      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst->sampler = 1;
+      inst->tex_target = TEXTURE_2D_INDEX;
+
+      /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
+      src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
+      temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
+      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst->sampler = 1;
+      inst->tex_target = TEXTURE_2D_INDEX;
+
+      prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
+      v->samplers_used |= (1 << 1);
+
+      /* MOV colorTemp, temp; */
+      inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
+   }
+
+   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+    * new visitor. */
+   foreach_iter(exec_list_iterator, iter, original->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      st_src_reg src_regs[3];
+
+      for (int i=0; i<3; i++) {
+         src_regs[i] = inst->src[i];
+         if (src_regs[i].file == PROGRAM_INPUT &&
+             src_regs[i].index == FRAG_ATTRIB_COL0)
+         {
+            src_regs[i].file = PROGRAM_TEMPORARY;
+            src_regs[i].index = src0.index;
+         }
+         else if (src_regs[i].file == PROGRAM_INPUT)
+            prog->InputsRead |= (1 << src_regs[i].index);
+         else if (src_regs[i].file == PROGRAM_OUTPUT)
+            prog->OutputsWritten |= BITFIELD64_BIT(src_regs[i].index);
+      }
+
+      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+   }
+
+   /* Make modifications to fragment program info. */
+   prog->Parameters = _mesa_combine_parameter_lists(params,
+                                                    original->prog->Parameters);
+   prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
+   prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
+   _mesa_free_parameter_list(params);
+   count_resources(v, prog);
+   fp->glsl_to_tgsi = v;
+}
+
 /* ------------------------- TGSI conversion stuff -------------------------- */
 struct label {
    unsigned branch_target;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
index e21c0d1e0af..7884a9feb71 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -52,6 +52,9 @@ enum pipe_error st_translate_program(
    boolean passthrough_edgeflags);
 
 void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
+void get_pixel_transfer_visitor(struct st_fragment_program *fp,
+                                struct glsl_to_tgsi_visitor *original,
+                                int scale_and_bias, int pixel_maps);
 
 struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
 
-- 
cgit v1.2.3


From 5f0b4b0e9d376f9ec1cb5ae08c36052f4f51ac37 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sun, 10 Jul 2011 17:17:38 -0500
Subject: st/mesa, glsl_to_tgsi: support glBitmap with a GLSL fragment shader
 active

---
 src/mesa/state_tracker/st_cb_bitmap.c      | 35 +++++++++++---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 73 ++++++++++++++++++++++++++++++
 src/mesa/state_tracker/st_glsl_to_tgsi.h   |  3 ++
 3 files changed, 105 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 49b196032b9..f0750b518ad 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -172,6 +172,23 @@ make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex)
 }
 
 
+static struct gl_program *
+make_bitmap_fragment_program_glsl(struct st_context *st,
+                                  struct st_fragment_program *orig,
+                                  GLuint samplerIndex)
+{
+   struct gl_context *ctx = st->ctx;
+   struct st_fragment_program *fp = (struct st_fragment_program *)
+      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+
+   if (!fp)
+      return NULL;
+   
+   get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex);
+   return &fp->Base.Base;
+}
+
+
 static int
 find_free_bit(uint bitfield)
 {
@@ -199,6 +216,7 @@ st_make_bitmap_fragment_program(struct st_context *st,
                                 GLuint *bitmap_sampler)
 {
    struct st_fragment_program *bitmap_prog;
+   struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn;
    struct gl_program *newProg;
    uint sampler;
 
@@ -207,13 +225,18 @@ st_make_bitmap_fragment_program(struct st_context *st,
     * with the bitmap sampler/kill instructions.
     */
    sampler = find_free_bit(fpIn->Base.SamplersUsed);
-   bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
+   
+   if (stfpIn->glsl_to_tgsi)
+      newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler);
+   else {
+      bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
 
-   newProg = _mesa_combine_programs(st->ctx,
-                                    &bitmap_prog->Base.Base,
-                                    &fpIn->Base);
-   /* done with this after combining */
-   st_reference_fragprog(st, &bitmap_prog, NULL);
+      newProg = _mesa_combine_programs(st->ctx,
+                                       &bitmap_prog->Base.Base,
+                                       &fpIn->Base);
+      /* done with this after combining */
+      st_reference_fragprog(st, &bitmap_prog, NULL);
+   }
 
 #if 0
    {
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index ae0c92f5f13..74f15087947 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3620,6 +3620,79 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    fp->glsl_to_tgsi = v;
 }
 
+/**
+ * Make fragment program for glBitmap:
+ *   Sample the texture and kill the fragment if the bit is 0.
+ * This program will be combined with the user's fragment program.
+ *
+ * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
+ */
+extern "C" void
+get_bitmap_visitor(struct st_fragment_program *fp,
+                   glsl_to_tgsi_visitor *original, int samplerIndex)
+{
+   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+   struct st_context *st = st_context(original->ctx);
+   struct gl_program *prog = &fp->Base.Base;
+   st_src_reg coord, src0;
+   st_dst_reg dst0;
+   glsl_to_tgsi_instruction *inst;
+
+   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+   v->ctx = original->ctx;
+   v->prog = prog;
+   v->glsl_version = original->glsl_version;
+   v->options = original->options;
+   v->next_temp = original->next_temp;
+   v->num_address_regs = original->num_address_regs;
+   v->samplers_used = prog->SamplersUsed = original->samplers_used;
+   v->indirect_addr_temps = original->indirect_addr_temps;
+   v->indirect_addr_consts = original->indirect_addr_consts;
+
+   /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+   src0 = v->get_temp(glsl_type::vec4_type);
+   dst0 = st_dst_reg(src0);
+   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst->sampler = samplerIndex;
+   inst->tex_target = TEXTURE_2D_INDEX;
+
+   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
+   v->samplers_used |= (1 << samplerIndex);
+
+   /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
+   src0.negate = NEGATE_XYZW;
+   if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
+      src0.swizzle = SWIZZLE_XXXX;
+   inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
+
+   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+    * new visitor. */
+   foreach_iter(exec_list_iterator, iter, original->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      st_src_reg src_regs[3];
+
+      if (inst->dst.file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+      for (int i=0; i<3; i++) {
+         src_regs[i] = inst->src[i];
+         if (src_regs[i].file == PROGRAM_INPUT)
+            prog->InputsRead |= (1 << src_regs[i].index);
+      }
+
+      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+   }
+
+   /* Make modifications to fragment program info. */
+   prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
+   prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
+   prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
+   count_resources(v, prog);
+   fp->glsl_to_tgsi = v;
+}
+
 /* ------------------------- TGSI conversion stuff -------------------------- */
 struct label {
    unsigned branch_target;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
index 7884a9feb71..d877471785d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -55,6 +55,9 @@ void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
 void get_pixel_transfer_visitor(struct st_fragment_program *fp,
                                 struct glsl_to_tgsi_visitor *original,
                                 int scale_and_bias, int pixel_maps);
+void get_bitmap_visitor(struct st_fragment_program *fp,
+                        struct glsl_to_tgsi_visitor *original,
+                        int samplerIndex);
 
 struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
 
-- 
cgit v1.2.3


From 87f8d8547db9b947ae847c509a464e06d0ac6c64 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sun, 10 Jul 2011 17:36:04 -0500
Subject: glsl_to_tgsi: fix mistakes in get_pixel_transfer_visitor()

I noticed these issues while working on get_bitmap_visitor().
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 74f15087947..3df22eae918 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3534,7 +3534,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    inst->tex_target = TEXTURE_2D_INDEX;
 
    prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
-   prog->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR);
    prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
    v->samplers_used |= (1 << 0);
 
@@ -3593,6 +3592,9 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
       st_src_reg src_regs[3];
 
+      if (inst->dst.file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
       for (int i=0; i<3; i++) {
          src_regs[i] = inst->src[i];
          if (src_regs[i].file == PROGRAM_INPUT &&
@@ -3603,8 +3605,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
          }
          else if (src_regs[i].file == PROGRAM_INPUT)
             prog->InputsRead |= (1 << src_regs[i].index);
-         else if (src_regs[i].file == PROGRAM_OUTPUT)
-            prog->OutputsWritten |= BITFIELD64_BIT(src_regs[i].index);
       }
 
       v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
-- 
cgit v1.2.3


From 7732822c833ee22e259af3f8bd2bfb57c986612e Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 21 Jul 2011 15:49:26 -0500
Subject: glsl_to_tgsi: separate immediates from array constants during IR
 translation

Before, if any uniform or constant array was accessed with indirect
addressing, st_translate_program() would emit uniform constants in the place
of immediates.  This behavior was unavoidable with ir_to_mesa/mesa_to_tgsi, but
glsl_to_tgsi can work around it since the GLSL IR backend and the TGSI
emission are both inside the state tracker.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 143 +++++++++++++++++++----------
 1 file changed, 95 insertions(+), 48 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 3df22eae918..389e5d8e2ef 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -70,6 +70,7 @@ extern "C" {
 #include "st_mesa_to_tgsi.h"
 }
 
+#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
                            (1 << PROGRAM_ENV_PARAM) |    \
                            (1 << PROGRAM_STATE_VAR) |    \
@@ -272,6 +273,7 @@ public:
    struct gl_program *prog;
    struct gl_shader_program *shader_program;
    struct gl_shader_compiler_options *options;
+   struct gl_program_parameter_list *immediates;
 
    int next_temp;
 
@@ -505,6 +507,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
       case PROGRAM_UNIFORM:
          this->indirect_addr_consts = true;
          break;
+      case PROGRAM_IMMEDIATE:
+         assert(!"immediates should not have indirect addressing");
+         break;
       default:
          break;
       }
@@ -524,6 +529,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
             case PROGRAM_UNIFORM:
                this->indirect_addr_consts = true;
                break;
+            case PROGRAM_IMMEDIATE:
+               assert(!"immediates should not have indirect addressing");
+               break;
             default:
                break;
             }
@@ -804,12 +812,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
 struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 {
-   st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT);
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
    union gl_constant_value uval;
 
    uval.f = val;
-   src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
-        				  &uval, 1, GL_FLOAT, &src.swizzle);
+   src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1,
+                                                GL_FLOAT, &src.swizzle);
 
    return src;
 }
@@ -817,14 +825,14 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
 {
-   st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT);
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
    union gl_constant_value uval;
    
    assert(glsl_version >= 130);
 
    uval.i = val;
-   src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
-        				  &uval, 1, GL_INT, &src.swizzle);
+   src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1,
+                                                GL_INT, &src.swizzle);
 
    return src;
 }
@@ -1933,9 +1941,15 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    gl_constant_value *values = (gl_constant_value *) stack_vals;
    GLenum gl_type = GL_NONE;
    unsigned int i;
+   gl_register_file file;
+   gl_program_parameter_list *param_list;
+   static int in_array = 0;
+
+   file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
+   param_list = in_array ? this->prog->Parameters : this->immediates;
 
    /* Unfortunately, 4 floats is all we can get into
-    * _mesa_add_unnamed_constant.  So, make a temp to store an
+    * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
     * aggregate constant and move each constant value into it.  If we
     * get lucky, copy propagation will eliminate the extra moves.
     */
@@ -1969,6 +1983,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
       int size = type_size(ir->type->fields.array);
 
       assert(size > 0);
+      in_array++;
 
       for (i = 0; i < ir->type->length; i++) {
          ir->array_elements[i]->accept(this);
@@ -1981,6 +1996,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          }
       }
       this->result = temp_base;
+      in_array--;
       return;
    }
 
@@ -1992,8 +2008,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
          values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
 
-         src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type);
-         src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+         src = st_src_reg(file, -1, ir->type->base_type);
+         src.index = _mesa_add_typed_unnamed_constant(param_list,
                                                       values,
                                                       ir->type->vector_elements,
                                                       GL_FLOAT,
@@ -2007,7 +2023,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
       return;
    }
 
-   src.file = PROGRAM_CONSTANT;
    switch (ir->type->base_type) {
    case GLSL_TYPE_FLOAT:
       gl_type = GL_FLOAT;
@@ -2046,8 +2061,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
       assert(!"Non-float/uint/int/bool constant");
    }
 
-   this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
-   this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+   this->result = st_src_reg(file, -1, ir->type);
+   this->result.index = _mesa_add_typed_unnamed_constant(param_list,
         					   values, ir->type->vector_elements, gl_type,
         					   &this->result.swizzle);
 }
@@ -2430,11 +2445,13 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
    num_address_regs = 0;
    indirect_addr_temps = false;
    indirect_addr_consts = false;
+   immediates = _mesa_new_parameter_list();
    mem_ctx = ralloc_context(NULL);
 }
 
 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
 {
+   _mesa_free_parameter_list(immediates);
    ralloc_free(mem_ctx);
 }
 
@@ -3521,6 +3538,8 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    v->samplers_used = prog->SamplersUsed = original->samplers_used;
    v->indirect_addr_temps = original->indirect_addr_temps;
    v->indirect_addr_consts = original->indirect_addr_consts;
+   _mesa_free_parameter_list(v->immediates);
+   v->immediates = _mesa_clone_parameter_list(original->immediates);
 
    /*
     * Get initial pixel color from the texture.
@@ -3648,6 +3667,8 @@ get_bitmap_visitor(struct st_fragment_program *fp,
    v->samplers_used = prog->SamplersUsed = original->samplers_used;
    v->indirect_addr_temps = original->indirect_addr_temps;
    v->indirect_addr_consts = original->indirect_addr_consts;
+   _mesa_free_parameter_list(v->immediates);
+   v->immediates = _mesa_clone_parameter_list(original->immediates);
 
    /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
    coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
@@ -3707,6 +3728,7 @@ struct st_translate {
 
    struct ureg_dst temps[MAX_TEMPS];
    struct ureg_src *constants;
+   struct ureg_src *immediates;
    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
    struct ureg_dst address[1];
@@ -3797,6 +3819,43 @@ static void set_insn_start( struct st_translate *t,
    t->insn[t->insn_count++] = start;
 }
 
+/**
+ * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
+ */
+static struct ureg_src
+emit_immediate( struct st_translate *t,
+                struct gl_program_parameter_list *params,
+                int index)
+{
+   struct ureg_program *ureg = t->ureg;
+
+   switch(params->Parameters[index].DataType)
+   {
+   case GL_FLOAT:
+   case GL_FLOAT_VEC2:
+   case GL_FLOAT_VEC3:
+   case GL_FLOAT_VEC4:
+      return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4);
+   case GL_INT:
+   case GL_INT_VEC2:
+   case GL_INT_VEC3:
+   case GL_INT_VEC4:
+      return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4);
+   case GL_UNSIGNED_INT:
+   case GL_UNSIGNED_INT_VEC2:
+   case GL_UNSIGNED_INT_VEC3:
+   case GL_UNSIGNED_INT_VEC4:
+   case GL_BOOL:
+   case GL_BOOL_VEC2:
+   case GL_BOOL_VEC3:
+   case GL_BOOL_VEC4:
+      return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4);
+   default:
+      assert(!"should not get here - type must be float, int, uint, or bool");
+      return ureg_src_undef();
+   }
+}
+
 /**
  * Map a Mesa dst register to a TGSI ureg_dst register.
  */
@@ -3871,6 +3930,9 @@ src_register( struct st_translate *t,
       else
          return t->constants[index];
 
+   case PROGRAM_IMMEDIATE:
+      return t->immediates[index];
+
    case PROGRAM_INPUT:
       assert(t->inputMapping[index] < Elements(t->inputs));
       return t->inputs[t->inputMapping[index]];
@@ -4402,9 +4464,8 @@ st_translate_program(
       }
    }
 
-   /* Emit constants and immediates.  Mesa uses a single index space
-    * for these, so we put all the translated regs in t->constants.
-    * XXX: this entire if block depends on proginfo->Parameters from Mesa IR
+   /* Emit constants and uniforms.  TGSI uses a single index space for these, 
+    * so we put all the translated regs in t->constants.
     */
    if (proginfo->Parameters) {
       t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] );
@@ -4423,49 +4484,34 @@ st_translate_program(
             t->constants[i] = ureg_DECL_constant( ureg, i );
             break;
 
-            /* Emit immediates only when there's no indirect addressing of
-             * the const buffer.
-             * FIXME: Be smarter and recognize param arrays:
-             * indirect addressing is only valid within the referenced
-             * array.
-             */
+         /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
+          * addressing of the const buffer.
+          * FIXME: Be smarter and recognize param arrays:
+          * indirect addressing is only valid within the referenced
+          * array.
+          */
          case PROGRAM_CONSTANT:
             if (program->indirect_addr_consts)
                t->constants[i] = ureg_DECL_constant( ureg, i );
             else
-               switch(proginfo->Parameters->Parameters[i].DataType)
-               {
-               case GL_FLOAT:
-               case GL_FLOAT_VEC2:
-               case GL_FLOAT_VEC3:
-               case GL_FLOAT_VEC4:
-                  t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4);
-                  break;
-               case GL_INT:
-               case GL_INT_VEC2:
-               case GL_INT_VEC3:
-               case GL_INT_VEC4:
-                  t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4);
-                  break;
-               case GL_UNSIGNED_INT:
-               case GL_UNSIGNED_INT_VEC2:
-               case GL_UNSIGNED_INT_VEC3:
-               case GL_UNSIGNED_INT_VEC4:
-               case GL_BOOL:
-               case GL_BOOL_VEC2:
-               case GL_BOOL_VEC3:
-               case GL_BOOL_VEC4:
-                  t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4);
-                  break;
-               default:
-                  assert(!"should not get here");
-               }
+               t->constants[i] = emit_immediate( t, proginfo->Parameters, i );
             break;
          default:
             break;
          }
       }
    }
+   
+   /* Emit immediate values.
+    */
+   t->immediates = (struct ureg_src *)CALLOC( program->immediates->NumParameters * sizeof(struct ureg_src) );
+   if (t->immediates == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto out;
+   }
+   for (i = 0; i < program->immediates->NumParameters; i++) {
+      t->immediates[i] = emit_immediate( t, program->immediates, i );
+   }
 
    /* texture samplers */
    for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
@@ -4512,6 +4558,7 @@ out:
    FREE(t->insn);
    FREE(t->labels);
    FREE(t->constants);
+   FREE(t->immediates);
 
    if (t->error) {
       debug_printf("%s: translate error flag set\n", __FUNCTION__);
-- 
cgit v1.2.3


From 0da994a9f15b461d16cf88ce16dc07e98dfada6f Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 21 Jul 2011 16:29:56 -0500
Subject: glsl_to_tgsi: make assignment hack safer

Fixes an assertion failure in piglit test glsl-texcoord-array.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 389e5d8e2ef..6e01a44a733 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1917,12 +1917,13 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
               ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
               type_size(ir->lhs->type) == 1) {
       /* To avoid emitting an extra MOV when assigning an expression to a 
-       * variable, change the destination register of the last instruction 
-       * emitted as part of the expression to the assignment variable.
+       * variable, emit the last instruction of the expression again, but
+       * replace the destination register with the target of the assignment.
+       * Dead code elimination will remove the original instruction.
        */
       glsl_to_tgsi_instruction *inst;
       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
-      inst->dst = l;
+      emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
    } else {
       for (i = 0; i < type_size(ir->lhs->type); i++) {
          emit(ir, TGSI_OPCODE_MOV, l, r);
-- 
cgit v1.2.3


From a2c3b9f38d81f363bd62abc87dc3abef2beeba95 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 22 Jul 2011 13:23:26 -0500
Subject: glsl_to_tgsi: make coding style more consistent

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 263 ++++++++++++++---------------
 1 file changed, 126 insertions(+), 137 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6e01a44a733..952900a1fb5 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3778,15 +3778,14 @@ static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
  * of labels built here and patch the TGSI code with the actual
  * location of each label.
  */
-static unsigned *get_label( struct st_translate *t,
-                            unsigned branch_target )
+static unsigned *get_label(struct st_translate *t, unsigned branch_target)
 {
    unsigned i;
 
    if (t->labels_count + 1 >= t->labels_size) {
       t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
       t->labels = (struct label *)realloc(t->labels, 
-                                          t->labels_size * sizeof t->labels[0]);
+                                          t->labels_size * sizeof(struct label));
       if (t->labels == NULL) {
          static unsigned dummy;
          t->error = TRUE;
@@ -3805,12 +3804,11 @@ static unsigned *get_label( struct st_translate *t,
  * Update the insn[] array so the next Mesa instruction points to
  * the next TGSI instruction.
  */
-static void set_insn_start( struct st_translate *t,
-                            unsigned start )
+static void set_insn_start(struct st_translate *t, unsigned start)
 {
    if (t->insn_count + 1 >= t->insn_size) {
       t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
-      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]);
+      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
       if (t->insn == NULL) {
          t->error = TRUE;
          return;
@@ -3824,9 +3822,9 @@ static void set_insn_start( struct st_translate *t,
  * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
  */
 static struct ureg_src
-emit_immediate( struct st_translate *t,
-                struct gl_program_parameter_list *params,
-                int index)
+emit_immediate(struct st_translate *t,
+               struct gl_program_parameter_list *params,
+               int index)
 {
    struct ureg_program *ureg = t->ureg;
 
@@ -3861,17 +3859,17 @@ emit_immediate( struct st_translate *t,
  * Map a Mesa dst register to a TGSI ureg_dst register.
  */
 static struct ureg_dst
-dst_register( struct st_translate *t,
-              gl_register_file file,
-              GLuint index )
+dst_register(struct st_translate *t,
+             gl_register_file file,
+             GLuint index)
 {
-   switch( file ) {
+   switch(file) {
    case PROGRAM_UNDEFINED:
       return ureg_dst_undef();
 
    case PROGRAM_TEMPORARY:
       if (ureg_dst_is_undef(t->temps[index]))
-         t->temps[index] = ureg_DECL_temporary( t->ureg );
+         t->temps[index] = ureg_DECL_temporary(t->ureg);
 
       return t->temps[index];
 
@@ -3894,7 +3892,7 @@ dst_register( struct st_translate *t,
       return t->address[index];
 
    default:
-      debug_assert( 0 );
+      assert(!"unknown dst register file");
       return ureg_dst_undef();
    }
 }
@@ -3903,11 +3901,11 @@ dst_register( struct st_translate *t,
  * Map a Mesa src register to a TGSI ureg_src register.
  */
 static struct ureg_src
-src_register( struct st_translate *t,
-              gl_register_file file,
-              GLuint index )
+src_register(struct st_translate *t,
+             gl_register_file file,
+             GLuint index)
 {
-   switch( file ) {
+   switch(file) {
    case PROGRAM_UNDEFINED:
       return ureg_src_undef();
 
@@ -3915,7 +3913,7 @@ src_register( struct st_translate *t,
       assert(index >= 0);
       assert(index < Elements(t->temps));
       if (ureg_dst_is_undef(t->temps[index]))
-         t->temps[index] = ureg_DECL_temporary( t->ureg );
+         t->temps[index] = ureg_DECL_temporary(t->ureg);
       return ureg_src(t->temps[index]);
 
    case PROGRAM_NAMED_PARAM:
@@ -3927,7 +3925,7 @@ src_register( struct st_translate *t,
    case PROGRAM_STATE_VAR:
    case PROGRAM_CONSTANT:       /* ie, immediate */
       if (index < 0)
-         return ureg_DECL_constant( t->ureg, 0 );
+         return ureg_DECL_constant(t->ureg, 0);
       else
          return t->constants[index];
 
@@ -3950,7 +3948,7 @@ src_register( struct st_translate *t,
       return t->systemValues[index];
 
    default:
-      debug_assert( 0 );
+      assert(!"unknown src register file");
       return ureg_src_undef();
    }
 }
@@ -3959,22 +3957,21 @@ src_register( struct st_translate *t,
  * Create a TGSI ureg_dst register from an st_dst_reg.
  */
 static struct ureg_dst
-translate_dst( struct st_translate *t,
-               const st_dst_reg *dst_reg,
-               boolean saturate )
+translate_dst(struct st_translate *t,
+              const st_dst_reg *dst_reg,
+              bool saturate)
 {
-   struct ureg_dst dst = dst_register( t, 
-                                       dst_reg->file,
-                                       dst_reg->index );
+   struct ureg_dst dst = dst_register(t, 
+                                      dst_reg->file,
+                                      dst_reg->index);
 
-   dst = ureg_writemask( dst, 
-                         dst_reg->writemask );
+   dst = ureg_writemask(dst, dst_reg->writemask);
    
    if (saturate)
-      dst = ureg_saturate( dst );
+      dst = ureg_saturate(dst);
 
    if (dst_reg->reladdr != NULL)
-      dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
+      dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
 
    return dst;
 }
@@ -3983,16 +3980,15 @@ translate_dst( struct st_translate *t,
  * Create a TGSI ureg_src register from an st_src_reg.
  */
 static struct ureg_src
-translate_src( struct st_translate *t,
-               const st_src_reg *src_reg )
+translate_src(struct st_translate *t, const st_src_reg *src_reg)
 {
-   struct ureg_src src = src_register( t, src_reg->file, src_reg->index );
+   struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
 
-   src = ureg_swizzle( src,
-                       GET_SWZ( src_reg->swizzle, 0 ) & 0x3,
-                       GET_SWZ( src_reg->swizzle, 1 ) & 0x3,
-                       GET_SWZ( src_reg->swizzle, 2 ) & 0x3,
-                       GET_SWZ( src_reg->swizzle, 3 ) & 0x3);
+   src = ureg_swizzle(src,
+                      GET_SWZ(src_reg->swizzle, 0) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 1) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 2) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 3) & 0x3);
 
    if ((src_reg->negate & 0xf) == NEGATE_XYZW)
       src = ureg_negate(src);
@@ -4024,8 +4020,8 @@ translate_src( struct st_translate *t,
 }
 
 static void
-compile_tgsi_instruction(struct st_translate *t, 
-        			     const struct glsl_to_tgsi_instruction *inst)
+compile_tgsi_instruction(struct st_translate *t,
+                         const struct glsl_to_tgsi_instruction *inst)
 {
    struct ureg_program *ureg = t->ureg;
    GLuint i;
@@ -4034,29 +4030,29 @@ compile_tgsi_instruction(struct st_translate *t,
    unsigned num_dst;
    unsigned num_src;
 
-   num_dst = num_inst_dst_regs( inst->op );
-   num_src = num_inst_src_regs( inst->op );
+   num_dst = num_inst_dst_regs(inst->op);
+   num_src = num_inst_src_regs(inst->op);
 
    if (num_dst) 
-      dst[0] = translate_dst( t, 
-                              &inst->dst,
-                              inst->saturate);
+      dst[0] = translate_dst(t, 
+                             &inst->dst,
+                             inst->saturate);
 
    for (i = 0; i < num_src; i++) 
-      src[i] = translate_src( t, &inst->src[i] );
+      src[i] = translate_src(t, &inst->src[i]);
 
-   switch( inst->op ) {
+   switch(inst->op) {
    case TGSI_OPCODE_BGNLOOP:
    case TGSI_OPCODE_CAL:
    case TGSI_OPCODE_ELSE:
    case TGSI_OPCODE_ENDLOOP:
    case TGSI_OPCODE_IF:
-      debug_assert(num_dst == 0);
-      ureg_label_insn( ureg,
-                       inst->op,
-                       src, num_src,
-                       get_label( t, 
-                                  inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 ));
+      assert(num_dst == 0);
+      ureg_label_insn(ureg,
+                      inst->op,
+                      src, num_src,
+                      get_label(t, 
+                                inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
       return;
 
    case TGSI_OPCODE_TEX:
@@ -4065,27 +4061,23 @@ compile_tgsi_instruction(struct st_translate *t,
    case TGSI_OPCODE_TXL:
    case TGSI_OPCODE_TXP:
       src[num_src++] = t->samplers[inst->sampler];
-      ureg_tex_insn( ureg,
-                     inst->op,
-                     dst, num_dst, 
-                     translate_texture_target( inst->tex_target,
-                                               inst->tex_shadow ),
-                     src, num_src );
+      ureg_tex_insn(ureg,
+                    inst->op,
+                    dst, num_dst, 
+                    translate_texture_target(inst->tex_target, inst->tex_shadow),
+                    src, num_src);
       return;
 
    case TGSI_OPCODE_SCS:
-      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
-      ureg_insn( ureg, 
-                 inst->op, 
-                 dst, num_dst, 
-                 src, num_src );
+      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
+      ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
       break;
 
    default:
-      ureg_insn( ureg, 
-                 inst->op, 
-                 dst, num_dst, 
-                 src, num_src );
+      ureg_insn(ureg,
+                inst->op,
+                dst, num_dst,
+                src, num_src);
       break;
    }
 }
@@ -4095,9 +4087,9 @@ compile_tgsi_instruction(struct st_translate *t,
  * Basically, add (adjX, adjY) to the fragment position.
  */
 static void
-emit_adjusted_wpos( struct st_translate *t,
-                    const struct gl_program *program,
-                    GLfloat adjX, GLfloat adjY)
+emit_adjusted_wpos(struct st_translate *t,
+                   const struct gl_program *program,
+                   float adjX, float adjY)
 {
    struct ureg_program *ureg = t->ureg;
    struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
@@ -4119,9 +4111,9 @@ emit_adjusted_wpos( struct st_translate *t,
  * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
  */
 static void
-emit_wpos_inversion( struct st_translate *t,
-                     const struct gl_program *program,
-                     boolean invert)
+emit_wpos_inversion(struct st_translate *t,
+                    const struct gl_program *program,
+                    bool invert)
 {
    struct ureg_program *ureg = t->ureg;
 
@@ -4140,7 +4132,7 @@ emit_wpos_inversion( struct st_translate *t,
    unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
                                                        wposTransformState);
 
-   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
+   struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
    struct ureg_dst wpos_temp;
    struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
 
@@ -4149,26 +4141,26 @@ emit_wpos_inversion( struct st_translate *t,
    if (wpos_input.File == TGSI_FILE_TEMPORARY)
       wpos_temp = ureg_dst(wpos_input);
    else {
-      wpos_temp = ureg_DECL_temporary( ureg );
-      ureg_MOV( ureg, wpos_temp, wpos_input );
+      wpos_temp = ureg_DECL_temporary(ureg);
+      ureg_MOV(ureg, wpos_temp, wpos_input);
    }
 
    if (invert) {
       /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
        */
-      ureg_MAD( ureg,
-                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
-                wpos_input,
-                ureg_scalar(wpostrans, 0),
-                ureg_scalar(wpostrans, 1));
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 0),
+               ureg_scalar(wpostrans, 1));
    } else {
       /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
        */
-      ureg_MAD( ureg,
-                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
-                wpos_input,
-                ureg_scalar(wpostrans, 2),
-                ureg_scalar(wpostrans, 3));
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 2),
+               ureg_scalar(wpostrans, 3));
    }
 
    /* Use wpos_temp as position input from here on:
@@ -4312,7 +4304,7 @@ st_translate_program(
    const GLuint outputMapping[],
    const ubyte outputSemanticName[],
    const ubyte outputSemanticIndex[],
-   boolean passthrough_edgeflags )
+   boolean passthrough_edgeflags)
 {
    struct st_translate translate, *t;
    unsigned i;
@@ -4358,27 +4350,24 @@ st_translate_program(
       for (i = 0; i < numOutputs; i++) {
          switch (outputSemanticName[i]) {
          case TGSI_SEMANTIC_POSITION:
-            t->outputs[i] = ureg_DECL_output( ureg,
-                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
-                                              outputSemanticIndex[i] );
-
-            t->outputs[i] = ureg_writemask( t->outputs[i],
-                                            TGSI_WRITEMASK_Z );
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_POSITION, /* Z/Depth */
+                                             outputSemanticIndex[i]);
+            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
             break;
          case TGSI_SEMANTIC_STENCIL:
-            t->outputs[i] = ureg_DECL_output( ureg,
-                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
-                                              outputSemanticIndex[i] );
-            t->outputs[i] = ureg_writemask( t->outputs[i],
-                                            TGSI_WRITEMASK_Y );
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_STENCIL, /* Stencil */
+                                             outputSemanticIndex[i]);
+            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
             break;
          case TGSI_SEMANTIC_COLOR:
-            t->outputs[i] = ureg_DECL_output( ureg,
-                                              TGSI_SEMANTIC_COLOR,
-                                              outputSemanticIndex[i] );
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_COLOR,
+                                             outputSemanticIndex[i]);
             break;
          default:
-            debug_assert(0);
+            assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
             return PIPE_ERROR_BAD_INPUT;
          }
       }
@@ -4392,9 +4381,9 @@ st_translate_program(
       }
 
       for (i = 0; i < numOutputs; i++) {
-         t->outputs[i] = ureg_DECL_output( ureg,
-                                           outputSemanticName[i],
-                                           outputSemanticIndex[i] );
+         t->outputs[i] = ureg_DECL_output(ureg,
+                                          outputSemanticName[i],
+                                          outputSemanticIndex[i]);
       }
    }
    else {
@@ -4405,9 +4394,9 @@ st_translate_program(
       }
 
       for (i = 0; i < numOutputs; i++) {
-         t->outputs[i] = ureg_DECL_output( ureg,
-                                           outputSemanticName[i],
-                                           outputSemanticIndex[i] );
+         t->outputs[i] = ureg_DECL_output(ureg,
+                                          outputSemanticName[i],
+                                          outputSemanticIndex[i]);
          if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
             /* Writing to the point size result register requires special
              * handling to implement clamping.
@@ -4421,8 +4410,8 @@ st_translate_program(
             unsigned pointSizeClampConst =
                _mesa_add_state_reference(proginfo->Parameters,
                                          pointSizeClampState);
-            struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
-            t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
+            struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
+            t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
             t->pointSizeResult = t->outputs[i];
             t->pointSizeOutIndex = i;
             t->outputs[i] = psizregtemp;
@@ -4435,8 +4424,8 @@ st_translate_program(
    /* Declare address register.
     */
    if (program->num_address_regs > 0) {
-      debug_assert( program->num_address_regs == 1 );
-      t->address[0] = ureg_DECL_address( ureg );
+      assert(program->num_address_regs == 1);
+      t->address[0] = ureg_DECL_address(ureg);
    }
 
    /* Declare misc input registers
@@ -4461,7 +4450,7 @@ st_translate_program(
        */
       for (i = 0; i < (unsigned)program->next_temp; i++) {
          /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
-         t->temps[i] = ureg_DECL_temporary( t->ureg );
+         t->temps[i] = ureg_DECL_temporary(t->ureg);
       }
    }
 
@@ -4469,7 +4458,7 @@ st_translate_program(
     * so we put all the translated regs in t->constants.
     */
    if (proginfo->Parameters) {
-      t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] );
+      t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
       if (t->constants == NULL) {
          ret = PIPE_ERROR_OUT_OF_MEMORY;
          goto out;
@@ -4482,7 +4471,7 @@ st_translate_program(
          case PROGRAM_STATE_VAR:
          case PROGRAM_NAMED_PARAM:
          case PROGRAM_UNIFORM:
-            t->constants[i] = ureg_DECL_constant( ureg, i );
+            t->constants[i] = ureg_DECL_constant(ureg, i);
             break;
 
          /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
@@ -4493,9 +4482,9 @@ st_translate_program(
           */
          case PROGRAM_CONSTANT:
             if (program->indirect_addr_consts)
-               t->constants[i] = ureg_DECL_constant( ureg, i );
+               t->constants[i] = ureg_DECL_constant(ureg, i);
             else
-               t->constants[i] = emit_immediate( t, proginfo->Parameters, i );
+               t->constants[i] = emit_immediate(t, proginfo->Parameters, i);
             break;
          default:
             break;
@@ -4505,27 +4494,28 @@ st_translate_program(
    
    /* Emit immediate values.
     */
-   t->immediates = (struct ureg_src *)CALLOC( program->immediates->NumParameters * sizeof(struct ureg_src) );
+   t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src));
    if (t->immediates == NULL) {
       ret = PIPE_ERROR_OUT_OF_MEMORY;
       goto out;
    }
    for (i = 0; i < program->immediates->NumParameters; i++) {
-      t->immediates[i] = emit_immediate( t, program->immediates, i );
+      assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE);
+      t->immediates[i] = emit_immediate(t, program->immediates, i);
    }
 
    /* texture samplers */
    for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
       if (program->samplers_used & (1 << i)) {
-         t->samplers[i] = ureg_DECL_sampler( ureg, i );
+         t->samplers[i] = ureg_DECL_sampler(ureg, i);
       }
    }
 
    /* Emit each instruction in turn:
     */
    foreach_iter(exec_list_iterator, iter, program->instructions) {
-      set_insn_start( t, ureg_get_instruction_number( ureg ));
-      compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() );
+      set_insn_start(t, ureg_get_instruction_number(ureg));
+      compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
 
       if (t->prevInstWrotePointSize && proginfo->Id) {
          /* The previous instruction wrote to the (fake) vertex point size
@@ -4535,14 +4525,14 @@ st_translate_program(
           * Note that we can't do this easily at the end of program due to
           * possible early return.
           */
-         set_insn_start( t, ureg_get_instruction_number( ureg ));
-         ureg_MAX( t->ureg,
-                   ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
-                   ureg_src(t->outputs[t->pointSizeOutIndex]),
-                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
-         ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
-                   ureg_src(t->outputs[t->pointSizeOutIndex]),
-                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+         set_insn_start(t, ureg_get_instruction_number(ureg));
+         ureg_MAX(t->ureg,
+                  ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
+                  ureg_src(t->outputs[t->pointSizeOutIndex]),
+                  ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+         ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
+                  ureg_src(t->outputs[t->pointSizeOutIndex]),
+                  ureg_swizzle(t->pointSizeConst, 2,2,2,2));
       }
       t->prevInstWrotePointSize = GL_FALSE;
    }
@@ -4550,9 +4540,8 @@ st_translate_program(
    /* Fix up all emitted labels:
     */
    for (i = 0; i < t->labels_count; i++) {
-      ureg_fixup_label( ureg,
-                        t->labels[i].token,
-                        t->insn[t->labels[i].branch_target] );
+      ureg_fixup_label(ureg, t->labels[i].token,
+                       t->insn[t->labels[i].branch_target]);
    }
 
 out:
@@ -4582,7 +4571,7 @@ get_mesa_program(struct gl_context *ctx,
    struct gl_program *prog;
    GLenum target;
    const char *target_string;
-   GLboolean progress;
+   bool progress;
    struct gl_shader_compiler_options *options =
          &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
 
-- 
cgit v1.2.3


From f751730ad003bb19ce85bc4d0abddaf40edde6c1 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 22 Jul 2011 13:24:42 -0500
Subject: glsl_to_tgsi: update comments

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 952900a1fb5..3a69a439822 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -236,7 +236,7 @@ public:
    /**
     * identifier of this function signature used by the program.
     *
-    * At the point that Mesa instructions for function calls are
+    * At the point that TGSI instructions for function calls are
     * generated, we don't know the address of the first instruction of
     * the function body.  So we make the BranchTarget that is called a
     * small integer and rewrite them during set_branchtargets().
@@ -251,10 +251,9 @@ public:
    glsl_to_tgsi_instruction *bgn_inst;
 
    /**
-    * Index of the first instruction of the function body in actual
-    * Mesa IR.
+    * Index of the first instruction of the function body in actual TGSI.
     *
-    * Set after convertion from glsl_to_tgsi_instruction to prog_instruction.
+    * Set after conversion from glsl_to_tgsi_instruction to TGSI.
     */
    int inst;
 
@@ -1672,7 +1671,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
    } else {
       st_src_reg array_base = this->result;
       /* Variable index array dereference.  It eats the "vec4" of the
-       * base of the array and an index that offsets the Mesa register
+       * base of the array and an index that offsets the TGSI register
        * index.
        */
       ir->array_index->accept(this);
@@ -1879,7 +1878,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       /* Swizzle a small RHS vector into the channels being written.
        *
        * glsl ir treats write_mask as dictating how many channels are
-       * present on the RHS while Mesa IR treats write_mask as just
+       * present on the RHS while TGSI treats write_mask as just
        * showing which channels of the vec4 RHS get written.
        */
       for (int i = 0; i < 4; i++) {
@@ -2202,8 +2201,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
 
    /* Put our coords in a temp.  We'll need to modify them for shadow,
     * projection, or LOD, so the only case we'd use it as is is if
-    * we're doing plain old texturing.  Mesa IR optimization should
-    * handle cleaning up our mess in that case.
+    * we're doing plain old texturing.  The optimization passes on
+    * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
     */
    coord = get_temp(glsl_type::vec4_type);
    coord_dst = st_dst_reg(coord);
@@ -3799,9 +3798,9 @@ static unsigned *get_label(struct st_translate *t, unsigned branch_target)
 }
 
 /**
- * Called prior to emitting the TGSI code for each Mesa instruction.
+ * Called prior to emitting the TGSI code for each instruction.
  * Allocate additional space for instructions if needed.
- * Update the insn[] array so the next Mesa instruction points to
+ * Update the insn[] array so the next glsl_to_tgsi_instruction points to
  * the next TGSI instruction.
  */
 static void set_insn_start(struct st_translate *t, unsigned start)
@@ -3856,7 +3855,7 @@ emit_immediate(struct st_translate *t,
 }
 
 /**
- * Map a Mesa dst register to a TGSI ureg_dst register.
+ * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
  */
 static struct ureg_dst
 dst_register(struct st_translate *t,
@@ -3898,7 +3897,7 @@ dst_register(struct st_translate *t,
 }
 
 /**
- * Map a Mesa src register to a TGSI ureg_src register.
+ * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
  */
 static struct ureg_src
 src_register(struct st_translate *t,
-- 
cgit v1.2.3


From 3354a5b56398f90fc36ab14b6444aae27b50e859 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 27 Jul 2011 15:20:19 -0500
Subject: glsl_to_tgsi: rework immediate tracking to not use
 gl_program_parameter_list

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 135 +++++++++++++++++++----------
 1 file changed, 88 insertions(+), 47 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 3a69a439822..6039488f26b 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -229,6 +229,20 @@ public:
    ir_variable *var; /* variable that maps to this, if any */
 };
 
+class immediate_storage : public exec_node {
+public:
+   immediate_storage(gl_constant_value *values, int size, int type)
+   {
+      memcpy(this->values, values, size * sizeof(gl_constant_value));
+      this->size = size;
+      this->type = type;
+   }
+   
+   gl_constant_value values[4];
+   int size; /**< Number of components (1-4) */
+   int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
+};
+
 class function_entry : public exec_node {
 public:
    ir_function_signature *sig;
@@ -272,7 +286,6 @@ public:
    struct gl_program *prog;
    struct gl_shader_program *shader_program;
    struct gl_shader_compiler_options *options;
-   struct gl_program_parameter_list *immediates;
 
    int next_temp;
 
@@ -285,6 +298,9 @@ public:
 
    variable_storage *find_variable_storage(ir_variable *var);
 
+   int add_constant(gl_register_file file, gl_constant_value values[4],
+                    int size, int datatype, GLuint *swizzle_out);
+
    function_entry *get_function_signature(ir_function_signature *sig);
 
    st_src_reg get_temp(const glsl_type *type);
@@ -326,6 +342,10 @@ public:
    /** List of variable_storage */
    exec_list variables;
 
+   /** List of immediate_storage */
+   exec_list immediates;
+   int num_immediates;
+
    /** List of function_entry */
    exec_list function_signatures;
    int next_signature_id;
@@ -808,6 +828,42 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
    }
 }
 
+int
+glsl_to_tgsi_visitor::add_constant(gl_register_file file,
+        		     gl_constant_value values[4], int size, int datatype,
+        		     GLuint *swizzle_out)
+{
+   if (file == PROGRAM_CONSTANT) {
+      return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
+                                              size, datatype, swizzle_out);
+   } else {
+      int index = 0;
+      immediate_storage *entry;
+      assert(file == PROGRAM_IMMEDIATE);
+      fprintf(stderr, "adding immediate\n");
+
+      /* Search immediate storage to see if we already have an identical
+       * immediate that we can use instead of adding a duplicate entry.
+       */
+      foreach_iter(exec_list_iterator, iter, this->immediates) {
+         entry = (immediate_storage *)iter.get();
+         
+         if (entry->size == size &&
+             entry->type == datatype &&
+             !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
+             return index;
+         }
+         index++;
+      }
+      
+      /* Add this immediate to the list. */
+      entry = new(mem_ctx) immediate_storage(values, size, datatype);
+      this->immediates.push_tail(entry);
+      this->num_immediates++;
+      return index;
+   }
+}
+
 struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 {
@@ -815,8 +871,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
    union gl_constant_value uval;
 
    uval.f = val;
-   src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1,
-                                                GL_FLOAT, &src.swizzle);
+   src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
 
    return src;
 }
@@ -830,8 +885,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
    assert(glsl_version >= 130);
 
    uval.i = val;
-   src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1,
-                                                GL_INT, &src.swizzle);
+   src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
 
    return src;
 }
@@ -1941,12 +1995,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    gl_constant_value *values = (gl_constant_value *) stack_vals;
    GLenum gl_type = GL_NONE;
    unsigned int i;
-   gl_register_file file;
-   gl_program_parameter_list *param_list;
    static int in_array = 0;
-
-   file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
-   param_list = in_array ? this->prog->Parameters : this->immediates;
+   gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
 
    /* Unfortunately, 4 floats is all we can get into
     * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
@@ -2009,11 +2059,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
 
          src = st_src_reg(file, -1, ir->type->base_type);
-         src.index = _mesa_add_typed_unnamed_constant(param_list,
-                                                      values,
-                                                      ir->type->vector_elements,
-                                                      GL_FLOAT,
-                                                      &src.swizzle);
+         src.index = add_constant(file,
+                                  values,
+                                  ir->type->vector_elements,
+                                  GL_FLOAT,
+                                  &src.swizzle);
          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
 
          mat_column.index++;
@@ -2062,9 +2112,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    }
 
    this->result = st_src_reg(file, -1, ir->type);
-   this->result.index = _mesa_add_typed_unnamed_constant(param_list,
-        					   values, ir->type->vector_elements, gl_type,
-        					   &this->result.swizzle);
+   this->result.index = add_constant(file,
+                                     values,
+                                     ir->type->vector_elements,
+                                     gl_type,
+                                     &this->result.swizzle);
 }
 
 function_entry *
@@ -2441,17 +2493,16 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
    result.file = PROGRAM_UNDEFINED;
    next_temp = 1;
    next_signature_id = 1;
+   num_immediates = 0;
    current_function = NULL;
    num_address_regs = 0;
    indirect_addr_temps = false;
    indirect_addr_consts = false;
-   immediates = _mesa_new_parameter_list();
    mem_ctx = ralloc_context(NULL);
 }
 
 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
 {
-   _mesa_free_parameter_list(immediates);
    ralloc_free(mem_ctx);
 }
 
@@ -3538,8 +3589,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    v->samplers_used = prog->SamplersUsed = original->samplers_used;
    v->indirect_addr_temps = original->indirect_addr_temps;
    v->indirect_addr_consts = original->indirect_addr_consts;
-   _mesa_free_parameter_list(v->immediates);
-   v->immediates = _mesa_clone_parameter_list(original->immediates);
+   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
 
    /*
     * Get initial pixel color from the texture.
@@ -3667,8 +3717,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
    v->samplers_used = prog->SamplersUsed = original->samplers_used;
    v->indirect_addr_temps = original->indirect_addr_temps;
    v->indirect_addr_consts = original->indirect_addr_consts;
-   _mesa_free_parameter_list(v->immediates);
-   v->immediates = _mesa_clone_parameter_list(original->immediates);
+   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
 
    /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
    coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
@@ -3822,32 +3871,20 @@ static void set_insn_start(struct st_translate *t, unsigned start)
  */
 static struct ureg_src
 emit_immediate(struct st_translate *t,
-               struct gl_program_parameter_list *params,
-               int index)
+               gl_constant_value values[4],
+               int type, int size)
 {
    struct ureg_program *ureg = t->ureg;
 
-   switch(params->Parameters[index].DataType)
+   switch(type)
    {
    case GL_FLOAT:
-   case GL_FLOAT_VEC2:
-   case GL_FLOAT_VEC3:
-   case GL_FLOAT_VEC4:
-      return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4);
+      return ureg_DECL_immediate(ureg, &values[0].f, size);
    case GL_INT:
-   case GL_INT_VEC2:
-   case GL_INT_VEC3:
-   case GL_INT_VEC4:
-      return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4);
+      return ureg_DECL_immediate_int(ureg, &values[0].i, size);
    case GL_UNSIGNED_INT:
-   case GL_UNSIGNED_INT_VEC2:
-   case GL_UNSIGNED_INT_VEC3:
-   case GL_UNSIGNED_INT_VEC4:
    case GL_BOOL:
-   case GL_BOOL_VEC2:
-   case GL_BOOL_VEC3:
-   case GL_BOOL_VEC4:
-      return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4);
+      return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
    default:
       assert(!"should not get here - type must be float, int, uint, or bool");
       return ureg_src_undef();
@@ -4483,7 +4520,10 @@ st_translate_program(
             if (program->indirect_addr_consts)
                t->constants[i] = ureg_DECL_constant(ureg, i);
             else
-               t->constants[i] = emit_immediate(t, proginfo->Parameters, i);
+               t->constants[i] = emit_immediate(t,
+                                                proginfo->Parameters->ParameterValues[i],
+                                                proginfo->Parameters->Parameters[i].DataType,
+                                                4);
             break;
          default:
             break;
@@ -4493,14 +4533,15 @@ st_translate_program(
    
    /* Emit immediate values.
     */
-   t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src));
+   t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
    if (t->immediates == NULL) {
       ret = PIPE_ERROR_OUT_OF_MEMORY;
       goto out;
    }
-   for (i = 0; i < program->immediates->NumParameters; i++) {
-      assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE);
-      t->immediates[i] = emit_immediate(t, program->immediates, i);
+   i = 0;
+   foreach_iter(exec_list_iterator, iter, program->immediates) {
+      immediate_storage *imm = (immediate_storage *)iter.get();
+      t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
    }
 
    /* texture samplers */
-- 
cgit v1.2.3


From 10d31cb307f90a08fafed5c67945ffe53d279940 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 27 Jul 2011 15:45:16 -0500
Subject: glsl_to_tgsi: lower all ir_quadop_vector expressions

Unlike Mesa IR, TGSI doesn't have a SWZ opcode.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6039488f26b..0cbfc943a05 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4825,7 +4825,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
          progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
 
-         progress = lower_quadop_vector(ir, true) || progress;
+         progress = lower_quadop_vector(ir, false) || progress;
 
          if (options->EmitNoIfs) {
             progress = lower_discard(ir) || progress;
-- 
cgit v1.2.3


From 3e7fce9773ec332665326a785b6ed1fcf5bd578e Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 27 Jul 2011 16:36:10 -0500
Subject: glsl_to_tgsi: add each relative address to the previous

This is a glsl_to_tgsi port of commit d6e1a8f71437.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 0cbfc943a05..f66e240a177 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1741,6 +1741,18 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
               this->result, st_src_reg_for_float(element_size));
       }
 
+      /* If there was already a relative address register involved, add the
+       * new and the old together to get the new offset.
+       */
+      if (src.reladdr != NULL) {
+         st_src_reg accum_reg = get_temp(glsl_type::float_type);
+
+         emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
+              index_reg, *src.reladdr);
+
+         index_reg = accum_reg;
+      }
+
       src.reladdr = ralloc(mem_ctx, st_src_reg);
       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
    }
-- 
cgit v1.2.3


From 189e9f12c7d3a82d7dd28695935a83e4319bb267 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 27 Jul 2011 16:39:40 -0500
Subject: glsl_to_tgsi: copy reladdr in st_src_reg(st_dst_reg) constructor

This is a glsl_to_tgsi port of commit f7cd9a858c04.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f66e240a177..ba4074eecd5 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -174,7 +174,7 @@ st_src_reg::st_src_reg(st_dst_reg reg)
    this->index = reg.index;
    this->swizzle = SWIZZLE_XYZW;
    this->negate = 0;
-   this->reladdr = NULL;
+   this->reladdr = reg.reladdr;
 }
 
 st_dst_reg::st_dst_reg(st_src_reg reg)
-- 
cgit v1.2.3


From 81b036b4d79423c194596461b098a525af0102c2 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 30 Jul 2011 16:44:49 -0700
Subject: i965/gen5+: Fix incorrect miptree layout for non-power-of-two
 cubemaps.

For power-of-two sizes, h0 == mt->height0 since it's already a multiple
of two.  However, for NPOT, they're different; h1 should be computed
based on the original size.

Fixes piglit test "cubemap npot" and oglconform test "textureNPOT".

NOTE: This is a candidate for stable release branches.

Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_tex_layout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index f462f32b19a..46a417a08ed 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -60,7 +60,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
 	   * given in Volume 1 of the BSpec.
 	   */
 	  h0 = ALIGN(mt->height0, align_h);
-	  h1 = ALIGN(minify(h0), align_h);
+	  h1 = ALIGN(minify(mt->height0), align_h);
 	  qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h);
           if (mt->compressed)
 	     qpitch /= 4;
-- 
cgit v1.2.3


From 89193933cbd322cd08fb54232411a8a9221fcca8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 28 Jul 2011 15:10:17 -0700
Subject: mesa: Ensure that gl_shader_program::InfoLog is never NULL

This prevents assertion failures in ralloc_strcat.  The ralloc_free in
_mesa_free_shader_program_data can be omitted because freeing the
gl_shader_program in _mesa_delete_shader_program will take care of
this automatically.

A bunch of this code could use a refactor to use ralloc a bit more
effectively.  A bunch of the things that are allocated with malloc and
owned by the gl_shader_program should be allocated with ralloc (using
the gl_shader_program as the context).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/main.cpp         |  1 +
 src/mesa/main/shaderobj.c | 11 ++++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'src/mesa')

diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index 9f85096e1a1..9b8a50738ac 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -221,6 +221,7 @@ main(int argc, char **argv)
 
    whole_program = rzalloc (NULL, struct gl_shader_program);
    assert(whole_program != NULL);
+   whole_program->InfoLog = ralloc_strdup(whole_program, "");
 
    for (/* empty */; argc > optind; optind++) {
       whole_program->Shaders =
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index 33d91ad594d..f128648f477 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -244,6 +244,8 @@ _mesa_init_shader_program(struct gl_context *ctx, struct gl_shader_program *prog
    prog->Geom.InputType = GL_TRIANGLES;
    prog->Geom.OutputType = GL_TRIANGLE_STRIP;
 #endif
+
+   prog->InfoLog = ralloc_strdup(prog, "");
 }
 
 /**
@@ -283,6 +285,10 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
       _mesa_free_parameter_list(shProg->Varying);
       shProg->Varying = NULL;
    }
+
+   assert(shProg->InfoLog != NULL);
+   ralloc_free(shProg->InfoLog);
+   shProg->InfoLog = ralloc_strdup(shProg, "");
 }
 
 
@@ -317,11 +323,6 @@ _mesa_free_shader_program_data(struct gl_context *ctx,
       shProg->Shaders = NULL;
    }
 
-   if (shProg->InfoLog) {
-      ralloc_free(shProg->InfoLog);
-      shProg->InfoLog = NULL;
-   }
-
    /* Transform feedback varying vars */
    for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) {
       free(shProg->TransformFeedback.VaryingNames[i]);
-- 
cgit v1.2.3


From 8aadd89d07d750aadd10989fa9c81f8a2fdd98e2 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 25 Jul 2011 15:55:59 -0700
Subject: ir_to_mesa: Use Add linker_error instead of fail_link

The functions were almost identical.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 53 +++++++++++++++++------------------------
 1 file changed, 22 insertions(+), 31 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 8b4a535b75f..a0188128e2a 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -331,20 +331,6 @@ dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
 
 dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
 
-static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
-
-static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...)
-{
-   va_list args;
-   va_start(args, fmt);
-   ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
-   va_end(args);
-
-   prog->LinkStatus = GL_FALSE;
-}
-
 static int
 swizzle_for_size(int size)
 {
@@ -789,10 +775,11 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
 
       if (storage->file == PROGRAM_TEMPORARY &&
 	  dst.index != storage->index + (int) ir->num_state_slots) {
-	 fail_link(this->shader_program,
-		   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
-		   ir->name, dst.index - storage->index,
-		   type_size(ir->type));
+	 linker_error(this->shader_program,
+		      "failed to load builtin uniform `%s' "
+		      "(%d/%d regs loaded)\n",
+		      ir->name, dst.index - storage->index,
+		      type_size(ir->type));
       }
    }
 }
@@ -2413,29 +2400,32 @@ check_resources(const struct gl_context *ctx,
    case GL_VERTEX_PROGRAM_ARB:
       if (_mesa_bitcount(prog->SamplersUsed) >
           ctx->Const.MaxVertexTextureImageUnits) {
-         fail_link(shader_program, "Too many vertex shader texture samplers");
+         linker_error(shader_program,
+		      "Too many vertex shader texture samplers");
       }
       if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
-         fail_link(shader_program, "Too many vertex shader constants");
+         linker_error(shader_program, "Too many vertex shader constants");
       }
       break;
    case MESA_GEOMETRY_PROGRAM:
       if (_mesa_bitcount(prog->SamplersUsed) >
           ctx->Const.MaxGeometryTextureImageUnits) {
-         fail_link(shader_program, "Too many geometry shader texture samplers");
+         linker_error(shader_program,
+		      "Too many geometry shader texture samplers");
       }
       if (prog->Parameters->NumParameters >
           MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
-         fail_link(shader_program, "Too many geometry shader constants");
+         linker_error(shader_program, "Too many geometry shader constants");
       }
       break;
    case GL_FRAGMENT_PROGRAM_ARB:
       if (_mesa_bitcount(prog->SamplersUsed) >
           ctx->Const.MaxTextureImageUnits) {
-         fail_link(shader_program, "Too many fragment shader texture samplers");
+         linker_error(shader_program,
+		      "Too many fragment shader texture samplers");
       }
       if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
-         fail_link(shader_program, "Too many fragment shader constants");
+         linker_error(shader_program, "Too many fragment shader constants");
       }
       break;
    default:
@@ -2550,9 +2540,10 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
 	  * from _mesa_add_uniform) has to match what the linker chose.
 	  */
 	 if (index != parameter_index) {
-	    fail_link(shader_program, "Allocation of uniform `%s' to target "
-		      "failed (%d vs %d)\n",
-		      uniform->Name, index, parameter_index);
+	    linker_error(shader_program,
+			 "Allocation of uniform `%s' to target failed "
+			 "(%d vs %d)\n",
+			 uniform->Name, index, parameter_index);
 	 }
       }
    }
@@ -2585,8 +2576,8 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
    int loc = _mesa_get_uniform_location(ctx, shader_program, name);
 
    if (loc == -1) {
-      fail_link(shader_program,
-		"Couldn't find uniform for initializer %s\n", name);
+      linker_error(shader_program,
+		   "Couldn't find uniform for initializer %s\n", name);
       return;
    }
 
@@ -2987,7 +2978,7 @@ get_mesa_program(struct gl_context *ctx,
             prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
 
       if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
-	 fail_link(shader_program, "Couldn't flatten if statement\n");
+	 linker_error(shader_program, "Couldn't flatten if statement\n");
       }
 
       switch (mesa_inst->Opcode) {
@@ -3258,7 +3249,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
    for (i = 0; i < prog->NumShaders; i++) {
       if (!prog->Shaders[i]->CompileStatus) {
-	 fail_link(prog, "linking with uncompiled shader");
+	 linker_error(prog, "linking with uncompiled shader");
 	 prog->LinkStatus = GL_FALSE;
       }
    }
-- 
cgit v1.2.3


From 322c3bf9dc4c6edbf5a8793475ce1307e1c0186b Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 25 Jul 2011 15:58:07 -0700
Subject: ir_to_mesa: Emit warnings instead of errors for IR that can't be
 lowered

Rely on the driver to do the right thing.  This probably means falling
back to software.  Page 88 of the OpenGL 2.1 spec specifically says:

    "A shader should not fail to compile, and a program object should
    not fail to link due to lack of instruction space or lack of
    temporary variables. Implementations should ensure that all valid
    shaders and program objects may be successfully compiled, linked
    and executed."

There is no provision for saying "No" to a valid shader that is
difficult for the hardware to handle, so stop doing that.

On i915 this causes a large number of piglit tests to change from FAIL
to WARN.  The warning is because the driver still emits messages to
stderr like "i915_program_error: Unsupported opcode: BGNLOOP".

It also fixes ES2 conformance CorrectFull_frag and CorrectParse1_frag
on i915 (and probably other hardware that can't handle loops).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index a0188128e2a..382cda0c703 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2977,11 +2977,31 @@ get_mesa_program(struct gl_context *ctx,
          if (mesa_inst->SrcReg[src].RelAddr)
             prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
 
-      if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
-	 linker_error(shader_program, "Couldn't flatten if statement\n");
-      }
-
       switch (mesa_inst->Opcode) {
+      case OPCODE_IF:
+	 if (options->EmitNoIfs) {
+	    linker_warning(shader_program,
+			   "Couldn't flatten if-statement.  "
+			   "This will likely result in software "
+			   "rasterization.\n");
+	 }
+	 break;
+      case OPCODE_BGNLOOP:
+	 if (options->EmitNoLoops) {
+	    linker_warning(shader_program,
+			   "Couldn't unroll loop.  "
+			   "This will likely result in software "
+			   "rasterization.\n");
+	 }
+	 break;
+      case OPCODE_CONT:
+	 if (options->EmitNoCont) {
+	    linker_warning(shader_program,
+			   "Couldn't lower continue-statement.  "
+			   "This will likely result in software "
+			   "rasterization.\n");
+	 }
+	 break;
       case OPCODE_BGNSUB:
 	 inst->function->inst = i;
 	 mesa_inst->Comment = strdup(inst->function->sig->function_name());
-- 
cgit v1.2.3


From 3bb2f0dde1cd813a0b5e0b45be376f4d6606aeb8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 25 Jul 2011 16:41:39 -0700
Subject: i915: Fail without crashing if a Mesa IR program uses too many
 registers

This can only happen in GLSL shaders because assembly shaders that use
too many temps are rejected by core Mesa.  It is easiest to make this
happen with shaders that contain flow-control that could not be lowered.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i915/i915_fragprog.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 6e1d7092237..32050cebf33 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -303,7 +303,7 @@ do {									\
 /* 
  * TODO: consider moving this into core 
  */
-static void calc_live_regs( struct i915_fragment_program *p )
+static bool calc_live_regs( struct i915_fragment_program *p )
 {
     const struct gl_fragment_program *program = &p->FragProg;
     GLuint regsUsed = 0xffff0000;
@@ -317,6 +317,9 @@ static void calc_live_regs( struct i915_fragment_program *p )
 
         /* Register is written to: unmark as live for this and preceeding ops */ 
         if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+	    if (inst->DstReg.Index > 16)
+	       return false;
+
             live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
             if (live_components[inst->DstReg.Index] == 0)
                 regsUsed &= ~(1 << inst->DstReg.Index);
@@ -327,6 +330,9 @@ static void calc_live_regs( struct i915_fragment_program *p )
             if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
                 unsigned c;
 
+		if (inst->SrcReg[a].Index > 16)
+		   return false;
+
                 regsUsed |= 1 << inst->SrcReg[a].Index;
 
                 for (c = 0; c < 4; c++) {
@@ -340,6 +346,8 @@ static void calc_live_regs( struct i915_fragment_program *p )
 
         p->usedRegs[i] = regsUsed;
     }
+
+    return true;
 }
 
 static GLuint get_live_regs( struct i915_fragment_program *p, 
@@ -394,7 +402,10 @@ upload_program(struct i915_fragment_program *p)
 
    /* Not always needed:
     */
-   calc_live_regs(p);
+   if (!calc_live_regs(p)) {
+      i915_program_error(p, "Could not allocate registers");
+      return;
+   }
 
    while (1) {
       GLuint src0, src1, src2, flags;
-- 
cgit v1.2.3


From 0290a018a50bd4a3180af3233f145f4de7b63706 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 10 Feb 2011 13:20:26 -0800
Subject: i915: Only emit program errors when INTEL_DEBUG=wm or
 INTEL_DEBUG=fallbacks

This makes piglit a lot more happy.  The errors are logged when
INTEL_DEBUG=fallbacks because the application is about to hit a big
software fallback.  We frequently ask people to run applications that
are hitting software fallbacks with INTEL_DEBUG=fallbacks so the we
can help them debug the reason for the software fallback.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i915/i915_program.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c
index ca1949b223e..0a600d30bef 100644
--- a/src/mesa/drivers/dri/i915/i915_program.c
+++ b/src/mesa/drivers/dri/i915/i915_program.c
@@ -442,14 +442,16 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values)
 void
 i915_program_error(struct i915_fragment_program *p, const char *fmt, ...)
 {
-   va_list args;
+   if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_FALLBACKS)) != 0)) {
+      va_list args;
 
-   fprintf(stderr, "i915_program_error: ");
-   va_start(args, fmt);
-   vfprintf(stderr, fmt, args);
-   va_end(args);
+      fprintf(stderr, "i915_program_error: ");
+      va_start(args, fmt);
+      vfprintf(stderr, fmt, args);
+      va_end(args);
 
-   fprintf(stderr, "\n");
+      fprintf(stderr, "\n");
+   }
    p->error = 1;
 }
 
-- 
cgit v1.2.3


From 0f1aae3ae7cef051f87dae056c46fcfd0afaab20 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 1 Aug 2011 16:06:59 -0700
Subject: intel: Fix unused variable warning.

---
 src/mesa/drivers/dri/intel/intel_fbo.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index e48d6ef9cbd..65ad621e770 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -596,7 +596,6 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb,
 				   struct intel_texture_image *intel_image,
 				   int zoffset)
 {
-   struct intel_mipmap_tree *mt = intel_image->mt;
    unsigned int dst_x, dst_y;
 
    /* compute offset of the particular 2D image within the texture region */
-- 
cgit v1.2.3


From 7cf799d47269ce01d3e5981709744a16b7c2756c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 2 Aug 2011 13:36:57 -0700
Subject: radeon: Remove set-but-unused color_mask variable.

This has been around since the initial import in 2003 and never used.
---
 src/mesa/drivers/dri/r200/r200_ioctl.c     | 3 ---
 src/mesa/drivers/dri/radeon/radeon_ioctl.c | 3 ---
 2 files changed, 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
index 02201cb53d6..44a794da396 100644
--- a/src/mesa/drivers/dri/r200/r200_ioctl.c
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
@@ -185,7 +185,6 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask )
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
    GLuint flags = 0;
-   GLuint color_mask = 0;
    GLuint orig_mask = mask;
 
    if ( R200_DEBUG & RADEON_IOCTL ) {
@@ -206,13 +205,11 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask )
 
    if ( mask & BUFFER_BIT_FRONT_LEFT ) {
       flags |= RADEON_FRONT;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_FRONT_LEFT;
    }
 
    if ( mask & BUFFER_BIT_BACK_LEFT ) {
       flags |= RADEON_BACK;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_BACK_LEFT;
    }
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
index a91d8727792..c23e9c2d2a2 100644
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
@@ -560,7 +560,6 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask )
    r100ContextPtr rmesa = R100_CONTEXT(ctx);
    __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
    GLuint flags = 0;
-   GLuint color_mask = 0;
    GLuint orig_mask = mask;
 
    if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
@@ -582,13 +581,11 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask )
 
    if ( mask & BUFFER_BIT_FRONT_LEFT ) {
       flags |= RADEON_FRONT;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_FRONT_LEFT;
    }
 
    if ( mask & BUFFER_BIT_BACK_LEFT ) {
       flags |= RADEON_BACK;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_BACK_LEFT;
    }
 
-- 
cgit v1.2.3


From 25fffa9364baef76a7e7e875be1fb3c4f10aadfd Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 2 Aug 2011 13:39:43 -0700
Subject: radeon: Remove set-but-unused log2depth variable.

r100 doesn't support 3D GL_EXT_texture3D.
---
 src/mesa/drivers/dri/radeon/radeon_texstate.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 9ba98e303a7..3abaa1504a4 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -1018,7 +1018,7 @@ static GLboolean radeon_validate_texgen( struct gl_context *ctx, GLuint unit )
 static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
 {
    const struct gl_texture_image *firstImage;
-   GLint log2Width, log2Height, log2Depth, texelBytes;
+   GLint log2Width, log2Height, texelBytes;
 
    if ( t->bo ) {
 	return GL_TRUE;
@@ -1033,7 +1033,6 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int
 
    log2Width  = firstImage->WidthLog2;
    log2Height = firstImage->HeightLog2;
-   log2Depth  = firstImage->DepthLog2;
    texelBytes = _mesa_get_format_bytes(firstImage->TexFormat);
 
    if (!t->image_override) {
-- 
cgit v1.2.3


From f5e612ab594689c7736f8af082e88c107bd7582c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 2 Aug 2011 13:41:59 -0700
Subject: radeon: Remove set-but-unused variables in radeonSetTexBuffer2()
 variants.

These have been unused since 2009.
---
 src/mesa/drivers/dri/r200/r200_texstate.c     | 6 ------
 src/mesa/drivers/dri/r300/r300_texstate.c     | 5 -----
 src/mesa/drivers/dri/r600/evergreen_tex.c     | 7 -------
 src/mesa/drivers/dri/r600/r600_texstate.c     | 5 -----
 src/mesa/drivers/dri/radeon/radeon_texstate.c | 6 ------
 5 files changed, 29 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
index 7adf9ad73ed..8c9bd6d00b2 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -773,18 +773,12 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
 	struct radeon_renderbuffer *rb;
 	radeon_texture_image *rImage;
 	radeonContextPtr radeon;
-	r200ContextPtr rmesa;
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, format;
 	gl_format texFormat;
 
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
-	rmesa = pDRICtx->driverPrivate;
 
 	rfb = dPriv->driverPrivate;
         texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index e24ad6f088d..e4388a021ed 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -427,13 +427,8 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, type, format;
 	gl_format texFormat;
 
-	type = GL_BGRA;
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
 	rmesa = pDRICtx->driverPrivate;
 
diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c
index 9784a8484f2..d240a216817 100644
--- a/src/mesa/drivers/dri/r600/evergreen_tex.c
+++ b/src/mesa/drivers/dri/r600/evergreen_tex.c
@@ -1288,19 +1288,12 @@ void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_textur
 	struct radeon_renderbuffer *rb;
 	radeon_texture_image *rImage;
 	radeonContextPtr radeon;
-	context_t *rmesa;
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, type, format;
 	gl_format texFormat;
 
-	type = GL_BGRA;
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
-	rmesa = pDRICtx->driverPrivate;
 
 	rfb = dPriv->driverPrivate;
         texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 949db29c189..65fae7195fd 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -1141,13 +1141,8 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, type, format;
         gl_format texFormat;
 
-	type = GL_BGRA;
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
 	rmesa = pDRICtx->driverPrivate;
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 3abaa1504a4..430309392a0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -648,18 +648,12 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form
 	struct radeon_renderbuffer *rb;
 	radeon_texture_image *rImage;
 	radeonContextPtr radeon;
-	r100ContextPtr rmesa;
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, format;
 	gl_format texFormat;
 
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA);
-
 	radeon = pDRICtx->driverPrivate;
-	rmesa = pDRICtx->driverPrivate;
 
 	rfb = dPriv->driverPrivate;
         texUnit = _mesa_get_current_tex_unit(radeon->glCtx);
-- 
cgit v1.2.3


From 8de1d42f244f6315c471b01ef52a61f61d227c6d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 2 Aug 2011 13:47:18 -0700
Subject: radeon: Remove set-but-unused variables in radeon_lock.c

These have been unused since this function's introduction in the FBO
support development around 2009.
---
 src/mesa/drivers/dri/radeon/radeon_lock.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
index 7b6bd36dcf7..ae8a212f806 100644
--- a/src/mesa/drivers/dri/radeon/radeon_lock.c
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
@@ -114,16 +114,6 @@ void radeon_lock_hardware(radeonContextPtr radeon
 		)
 {
 	char ret = 0;
-	struct radeon_framebuffer *rfb = NULL;
-	struct radeon_renderbuffer *rrb = NULL;
-
-	if (radeon_get_drawable(radeon)) {
-		rfb = radeon_get_drawable(radeon)->driverPrivate;
-
-		if (rfb)
-			rrb = radeon_get_renderbuffer(&rfb->base,
-						      rfb->base._ColorDrawBufferIndexes[0]);
-	}
 
 	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
 		if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1)
-- 
cgit v1.2.3


From e0e4c2e30552e524c91b2eb98a2dabdcd4666169 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 2 Aug 2011 13:49:05 -0700
Subject: radeon: Remove some remaining set-but-unused variables.

These looked more like copy-and-paste to me than the others (which
looked more like possibly someone forgot to write some code in a
refactor), so I didn't verify where they came from.
---
 src/mesa/drivers/dri/r600/r600_cmdbuf.c             | 2 --
 src/mesa/drivers/dri/radeon/radeon_common.c         | 8 --------
 src/mesa/drivers/dri/radeon/radeon_common_context.c | 2 --
 src/mesa/drivers/dri/radeon/radeon_cs_legacy.c      | 2 --
 4 files changed, 14 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
index ce2f7779563..74f048b1062 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -259,13 +259,11 @@ static int r600_cs_process_relocs(struct radeon_cs_int *csi,
                                   uint32_t * reloc_chunk,
                                   uint32_t * length_dw_reloc_chunk) 
 {
-    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
     struct r600_cs_reloc_legacy *relocs;
     int i, j, r;
 
     uint32_t offset_dw = 0;
 
-    csm = (struct r600_cs_manager_legacy*)csi->csm;
     relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
 restart:
     for (i = 0; i < csi->crelocs; i++) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index bfc307ca987..e7a6623cf84 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -436,7 +436,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv,
 		       const drm_clip_rect_t	  *rect)
 {
 	radeonContextPtr rmesa;
-	struct radeon_framebuffer *rfb;
 	GLint nbox, i, ret;
 
 	assert(dPriv);
@@ -447,8 +446,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv,
 
 	LOCK_HARDWARE(rmesa);
 
-	rfb = dPriv->driverPrivate;
-
 	if ( RADEON_DEBUG & RADEON_IOCTL ) {
 		fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
 	}
@@ -527,8 +524,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv )
 {
 	radeonContextPtr radeon;
 	GLint ret;
-	__DRIscreen *psp;
-	struct radeon_renderbuffer *rrb;
 	struct radeon_framebuffer *rfb;
 
 	assert(dPriv);
@@ -537,9 +532,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv )
 
 	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
 	rfb = dPriv->driverPrivate;
-	rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-
-	psp = dPriv->driScreenPriv;
 
 	LOCK_HARDWARE(radeon);
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index bf8925f61d0..c08b79484af 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -515,7 +515,6 @@ void radeon_prepare_render(radeonContextPtr radeon)
     __DRIcontext *driContext = radeon->dri.context;
     __DRIdrawable *drawable;
     __DRIscreen *screen;
-    struct radeon_framebuffer *draw;
 
     screen = driContext->driScreenPriv;
     if (!screen->dri2.loader)
@@ -527,7 +526,6 @@ void radeon_prepare_render(radeonContextPtr radeon)
 	    radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
 
 	/* Intel driver does the equivalent of this, no clue if it is needed:*/
-	draw = drawable->driverPrivate;
 	radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer);
 
 	driContext->dri2.draw_stamp = drawable->dri2.stamp;
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
index c2722a4e195..5595b705b15 100644
--- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
@@ -218,11 +218,9 @@ static int cs_end(struct radeon_cs_int *cs,
 
 static int cs_process_relocs(struct radeon_cs_int *cs)
 {
-    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
     struct cs_reloc_legacy *relocs;
     int i, j, r;
 
-    csm = (struct cs_manager_legacy*)cs->csm;
     relocs = (struct cs_reloc_legacy *)cs->relocs;
 restart:
     for (i = 0; i < cs->crelocs; i++) 
-- 
cgit v1.2.3


From 57590e173b6f421b1015190aa3c0011ea55f31d8 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 28 Jul 2011 15:26:01 +0200
Subject: st/mesa: determine Const.MaxSamples in init_extensions

v2: Check for non-pow2 sample counts as well.
---
 src/mesa/state_tracker/st_extensions.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index b5f6d356eb0..8e900934054 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -228,6 +228,7 @@ void st_init_extensions(struct st_context *st)
 {
    struct pipe_screen *screen = st->pipe->screen;
    struct gl_context *ctx = st->ctx;
+   int i;
 
    /*
     * Extensions that are supported by all Gallium drivers:
@@ -605,6 +606,16 @@ void st_init_extensions(struct st_context *st)
       ctx->Extensions.EXT_packed_float = GL_TRUE;
    }
 
+   /* Maximum sample count. */
+   for (i = 16; i > 0; --i) {
+      if (screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_UNORM,
+                                      PIPE_TEXTURE_2D, i,
+                                      PIPE_BIND_RENDER_TARGET)) {
+         ctx->Const.MaxSamples = i;
+         break;
+      }
+   }
+
    if (screen->get_param(screen, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE)) {
       ctx->Extensions.ARB_seamless_cube_map = GL_TRUE;
       ctx->Extensions.AMD_seamless_cubemap_per_texture = GL_TRUE;
-- 
cgit v1.2.3


From f253d83bc72e7d26df8cd3a04747b3d46a8543e6 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 3 Aug 2011 16:01:41 +0200
Subject: st/mesa: implement multisample resolve via BlitFramebuffer

---
 src/mesa/state_tracker/st_cb_blit.c | 116 ++++++++++++++++++++++++++++++++++--
 1 file changed, 111 insertions(+), 5 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c
index 416be194d11..276d10fb557 100644
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -61,6 +61,81 @@ st_destroy_blit(struct st_context *st)
 
 #if FEATURE_EXT_framebuffer_blit
 
+static void
+st_BlitFramebuffer_resolve(struct gl_context *ctx,
+                           GLbitfield mask,
+                           struct pipe_resolve_info *info)
+{
+   const GLbitfield depthStencil = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+
+   struct st_context *st = st_context(ctx);
+
+   struct st_renderbuffer *srcRb, *dstRb;
+
+   if (mask & GL_COLOR_BUFFER_BIT) {
+      srcRb = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+      dstRb = st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]);
+
+      info->mask = PIPE_MASK_RGBA;
+
+      info->src.res = srcRb->texture;
+      info->src.layer = srcRb->surface->u.tex.first_layer;
+      info->dst.res = dstRb->texture;
+      info->dst.level = dstRb->surface->u.tex.level;
+      info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+      st->pipe->resource_resolve(st->pipe, info);
+   }
+
+   if (mask & depthStencil) {
+      struct gl_renderbuffer_attachment *srcDepth, *srcStencil;
+      struct gl_renderbuffer_attachment *dstDepth, *dstStencil;
+
+      srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH];
+      dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH];
+      srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL];
+      dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL];
+
+      const boolean combined =
+         st_is_depth_stencil_combined(srcDepth, srcStencil) &&
+         st_is_depth_stencil_combined(dstDepth, dstStencil);
+
+      if ((mask & GL_DEPTH_BUFFER_BIT) || combined) {
+         /* resolve depth and, if combined and requested, stencil as well */
+         srcRb = st_renderbuffer(srcDepth->Renderbuffer);
+         dstRb = st_renderbuffer(dstDepth->Renderbuffer);
+
+         info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0;
+         if (combined && (mask & GL_STENCIL_BUFFER_BIT))
+            info->mask |= PIPE_MASK_S;
+
+         info->src.res = srcRb->texture;
+         info->src.layer = srcRb->surface->u.tex.first_layer;
+         info->dst.res = dstRb->texture;
+         info->dst.level = dstRb->surface->u.tex.level;
+         info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+         st->pipe->resource_resolve(st->pipe, info);
+      }
+
+      if (mask & GL_STENCIL_BUFFER_BIT) {
+         /* resolve separate stencil buffer */
+         srcRb = st_renderbuffer(srcStencil->Renderbuffer);
+         dstRb = st_renderbuffer(dstStencil->Renderbuffer);
+
+         info->mask = PIPE_MASK_S;
+
+         info->src.res = srcRb->texture;
+         info->src.layer = srcRb->surface->u.tex.first_layer;
+         info->dst.res = dstRb->texture;
+         info->dst.level = dstRb->surface->u.tex.level;
+         info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+         st->pipe->resource_resolve(st->pipe, info);
+      }
+   }
+}
+
 static void
 st_BlitFramebuffer(struct gl_context *ctx,
                    GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
@@ -95,6 +170,42 @@ st_BlitFramebuffer(struct gl_context *ctx,
       srcY1 = readFB->Height - srcY1;
    }
 
+   /* Disable conditional rendering. */
+   if (st->render_condition) {
+      st->pipe->render_condition(st->pipe, NULL, 0);
+   }
+
+   if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers) {
+      struct pipe_resolve_info info;
+
+      if (dstX0 < dstX1) {
+         info.dst.x0 = dstX0;
+         info.dst.x1 = dstX1;
+         info.src.x0 = srcX0;
+         info.src.x1 = srcX1;
+      } else {
+         info.dst.x0 = dstX1;
+         info.dst.x1 = dstX0;
+         info.src.x0 = srcX1;
+         info.src.x1 = srcX0;
+      }
+      if (dstY0 < dstY1) {
+         info.dst.y0 = dstY0;
+         info.dst.y1 = dstY1;
+         info.src.y0 = srcY0;
+         info.src.y1 = srcY1;
+      } else {
+         info.dst.y0 = dstY1;
+         info.dst.y1 = dstY0;
+         info.src.y0 = srcY1;
+         info.src.y1 = srcY0;
+      }
+
+      st_BlitFramebuffer_resolve(ctx, mask, &info); /* filter doesn't apply */
+
+      goto done;
+   }
+
    if (srcY0 > srcY1 && dstY0 > dstY1) {
       /* Both src and dst are upside down.  Swap Y to make it
        * right-side up to increase odds of using a fast path.
@@ -109,11 +220,6 @@ st_BlitFramebuffer(struct gl_context *ctx,
       dstY1 = tmp;
    }
 
-   /* Disable conditional rendering. */
-   if (st->render_condition) {
-      st->pipe->render_condition(st->pipe, NULL, 0);
-   }
-
    if (mask & GL_COLOR_BUFFER_BIT) {
       struct gl_renderbuffer_attachment *srcAtt =
          &readFB->Attachment[readFB->_ColorReadBufferIndex];
-- 
cgit v1.2.3


From 88a4f2fe543d7c394c0ad732ae60f8cf94c0d357 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:30 -0600
Subject: mesa: make error handling in glGetTexParameter() a bit more concise

---
 src/mesa/main/texparam.c | 266 +++++++++++++++++------------------------------
 1 file changed, 96 insertions(+), 170 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 134f15346e8..78dcc5dccea 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -994,28 +994,21 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
             *params = 0;
          break;
       case GL_TEXTURE_DEPTH_SIZE_ARB:
-         if (ctx->Extensions.ARB_depth_texture)
-            *params = _mesa_get_format_bits(texFormat, pname);
-         else
+         if (!ctx->Extensions.ARB_depth_texture)
             goto invalid_pname;
+         *params = _mesa_get_format_bits(texFormat, pname);
          break;
       case GL_TEXTURE_STENCIL_SIZE_EXT:
-         if (ctx->Extensions.EXT_packed_depth_stencil ||
-             ctx->Extensions.ARB_framebuffer_object) {
-            *params = _mesa_get_format_bits(texFormat, pname);
-         }
-         else {
+         if (!ctx->Extensions.EXT_packed_depth_stencil &&
+             !ctx->Extensions.ARB_framebuffer_object)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, pname);
          break;
       case GL_TEXTURE_SHARED_SIZE:
-         if (ctx->VersionMajor >= 3 ||
-             ctx->Extensions.EXT_texture_shared_exponent) {
-            *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0;
-         }
-         else {
+         if (ctx->VersionMajor < 3 &&
+             !ctx->Extensions.EXT_texture_shared_exponent)
             goto invalid_pname;
-         }
+         *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0;
          break;
 
       /* GL_ARB_texture_compression */
@@ -1036,67 +1029,46 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
 
       /* GL_ARB_texture_float */
       case GL_TEXTURE_RED_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_GREEN_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_BLUE_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_ALPHA_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_LUMINANCE_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_INTENSITY_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_DEPTH_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
 
       default:
@@ -1118,7 +1090,6 @@ void GLAPIENTRY
 _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
 {
    struct gl_texture_object *obj;
-   GLboolean error = GL_FALSE;
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
@@ -1187,49 +1158,37 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          *params = (GLfloat) obj->MaxLevel;
          break;
       case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-         if (ctx->Extensions.EXT_texture_filter_anisotropic) {
-            *params = obj->Sampler.MaxAnisotropy;
-         }
-	 else
-	    error = GL_TRUE;
+         if (!ctx->Extensions.EXT_texture_filter_anisotropic)
+            goto invalid_pname;
+         *params = obj->Sampler.MaxAnisotropy;
          break;
       case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB:
-         if (ctx->Extensions.ARB_shadow_ambient) {
-            *params = obj->Sampler.CompareFailValue;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow_ambient)
+            goto invalid_pname;
+         *params = obj->Sampler.CompareFailValue;
          break;
       case GL_GENERATE_MIPMAP_SGIS:
 	 *params = (GLfloat) obj->GenerateMipmap;
          break;
       case GL_TEXTURE_COMPARE_MODE_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLfloat) obj->Sampler.CompareMode;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.CompareMode;
          break;
       case GL_TEXTURE_COMPARE_FUNC_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLfloat) obj->Sampler.CompareFunc;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.CompareFunc;
          break;
       case GL_DEPTH_TEXTURE_MODE_ARB:
-         if (ctx->Extensions.ARB_depth_texture) {
-            *params = (GLfloat) obj->Sampler.DepthMode;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_depth_texture)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.DepthMode;
          break;
       case GL_TEXTURE_LOD_BIAS:
-         if (ctx->Extensions.EXT_texture_lod_bias) {
-            *params = obj->Sampler.LodBias;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.EXT_texture_lod_bias)
+            goto invalid_pname;
+         *params = obj->Sampler.LodBias;
          break;
 #if FEATURE_OES_draw_texture
       case GL_TEXTURE_CROP_RECT_OES:
@@ -1244,46 +1203,40 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
       case GL_TEXTURE_SWIZZLE_G_EXT:
       case GL_TEXTURE_SWIZZLE_B_EXT:
       case GL_TEXTURE_SWIZZLE_A_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT;
-            *params = (GLfloat) obj->Swizzle[comp];
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT];
          break;
 
       case GL_TEXTURE_SWIZZLE_RGBA_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
+         if (!ctx->Extensions.EXT_texture_swizzle) {
+            goto invalid_pname;
+         }
+         else {
             GLuint comp;
             for (comp = 0; comp < 4; comp++) {
                params[comp] = (GLfloat) obj->Swizzle[comp];
             }
          }
-         else {
-            error = GL_TRUE;
-         }
          break;
 
       case GL_TEXTURE_CUBE_MAP_SEAMLESS:
-         if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
-            *params = (GLfloat) obj->Sampler.CubeMapSeamless;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.AMD_seamless_cubemap_per_texture)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.CubeMapSeamless;
          break;
 
       default:
-	 error = GL_TRUE;
-	 break;
+         goto invalid_pname;
    }
 
-   if (error)
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)",
-		  pname);
+   /* no error if we get here */
+   _mesa_unlock_texture(ctx, obj);
+   return;
 
+invalid_pname:
    _mesa_unlock_texture(ctx, obj);
+   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", pname);
 }
 
 
@@ -1291,7 +1244,6 @@ void GLAPIENTRY
 _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
 {
    struct gl_texture_object *obj;
-   GLboolean error = GL_FALSE;
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
@@ -1355,55 +1307,37 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
          *params = obj->MaxLevel;
          break;;
       case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-         if (ctx->Extensions.EXT_texture_filter_anisotropic) {
-            *params = (GLint) obj->Sampler.MaxAnisotropy;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_filter_anisotropic)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.MaxAnisotropy;
          break;
       case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB:
-         if (ctx->Extensions.ARB_shadow_ambient) {
-            *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue);
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow_ambient)
+            goto invalid_pname;
+         *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue);
          break;
       case GL_GENERATE_MIPMAP_SGIS:
 	 *params = (GLint) obj->GenerateMipmap;
          break;
       case GL_TEXTURE_COMPARE_MODE_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLint) obj->Sampler.CompareMode;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CompareMode;
          break;
       case GL_TEXTURE_COMPARE_FUNC_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLint) obj->Sampler.CompareFunc;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CompareFunc;
          break;
       case GL_DEPTH_TEXTURE_MODE_ARB:
-         if (ctx->Extensions.ARB_depth_texture) {
-            *params = (GLint) obj->Sampler.DepthMode;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_depth_texture)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.DepthMode;
          break;
       case GL_TEXTURE_LOD_BIAS:
-         if (ctx->Extensions.EXT_texture_lod_bias) {
-            *params = (GLint) obj->Sampler.LodBias;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_lod_bias)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.LodBias;
          break;
 #if FEATURE_OES_draw_texture
       case GL_TEXTURE_CROP_RECT_OES:
@@ -1417,42 +1351,34 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
       case GL_TEXTURE_SWIZZLE_G_EXT:
       case GL_TEXTURE_SWIZZLE_B_EXT:
       case GL_TEXTURE_SWIZZLE_A_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT;
-            *params = obj->Swizzle[comp];
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         *params = obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT];
          break;
 
       case GL_TEXTURE_SWIZZLE_RGBA_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            COPY_4V(params, obj->Swizzle);
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         COPY_4V(params, obj->Swizzle);
          break;
 
       case GL_TEXTURE_CUBE_MAP_SEAMLESS:
-         if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
-            *params = (GLint) obj->Sampler.CubeMapSeamless;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.AMD_seamless_cubemap_per_texture)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CubeMapSeamless;
          break;
 
       default:
-         ; /* silence warnings */
+         goto invalid_pname;
    }
 
-   if (error)
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)",
-		  pname);
+   /* no error if we get here */
+   _mesa_unlock_texture(ctx, obj);
+   return;
 
+invalid_pname:
    _mesa_unlock_texture(ctx, obj);
+   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", pname);
 }
 
 
-- 
cgit v1.2.3


From 1254a2b2e45c6961a57d9c60f561907183ef7de7 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: mesa: condense GL_TEXTURE_RESIDENT query code

---
 src/mesa/main/texparam.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 78dcc5dccea..0dec0172989 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1133,14 +1133,8 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          }
          break;
       case GL_TEXTURE_RESIDENT:
-         {
-            GLboolean resident;
-            if (ctx->Driver.IsTextureResident)
-               resident = ctx->Driver.IsTextureResident(ctx, obj);
-            else
-               resident = GL_TRUE;
-            *params = ENUM_TO_FLOAT(resident);
-         }
+         *params = ctx->Driver.IsTextureResident ?
+            ctx->Driver.IsTextureResident(ctx, obj) : 1.0F;
          break;
       case GL_TEXTURE_PRIORITY:
          *params = obj->Priority;
@@ -1282,14 +1276,8 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
          }
          break;;
       case GL_TEXTURE_RESIDENT:
-         {
-            GLboolean resident;
-            if (ctx->Driver.IsTextureResident)
-               resident = ctx->Driver.IsTextureResident(ctx, obj);
-            else
-               resident = GL_TRUE;
-            *params = (GLint) resident;
-         }
+         *params = ctx->Driver.IsTextureResident ?
+            ctx->Driver.IsTextureResident(ctx, obj) : 1;
          break;;
       case GL_TEXTURE_PRIORITY:
          *params = FLOAT_TO_INT(obj->Priority);
-- 
cgit v1.2.3


From 02d81dfcaf073b5f7073d405e931b3d3e9f577ef Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: mesa: add null ptr checks in GetTexParameterI[u]iv() functions

---
 src/mesa/main/texparam.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 0dec0172989..97d0359f170 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1379,6 +1379,8 @@ _mesa_GetTexParameterIiv(GLenum target, GLenum pname, GLint *params)
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
    texObj = get_texobj(ctx, target, GL_TRUE);
+   if (!texObj)
+      return;
    
    switch (pname) {
    case GL_TEXTURE_BORDER_COLOR:
@@ -1399,6 +1401,8 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params)
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
    texObj = get_texobj(ctx, target, GL_TRUE);
+   if (!texObj)
+      return;
    
    switch (pname) {
    case GL_TEXTURE_BORDER_COLOR:
-- 
cgit v1.2.3


From 1e89a526c6cd21852b440904711c5ee733ce1ad2 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: mesa: whitespace, formatting fixes in GetTexParameter() code

---
 src/mesa/main/texparam.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 97d0359f170..bbbb306b2d9 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1115,17 +1115,15 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          *params = ENUM_TO_FLOAT(obj->Sampler.WrapR);
          break;
       case GL_TEXTURE_BORDER_COLOR:
-         if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
+         if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
             _mesa_update_state_locked(ctx);
-         if(ctx->Color._ClampFragmentColor)
-         {
+         if (ctx->Color._ClampFragmentColor) {
             params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F);
             params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F);
             params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F);
             params[3] = CLAMP(obj->Sampler.BorderColor.f[3], 0.0F, 1.0F);
          }
-         else
-         {
+         else {
             params[0] = obj->Sampler.BorderColor.f[0];
             params[1] = obj->Sampler.BorderColor.f[1];
             params[2] = obj->Sampler.BorderColor.f[2];
@@ -1241,9 +1239,9 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-    obj = get_texobj(ctx, target, GL_TRUE);
-    if (!obj)
-       return;
+   obj = get_texobj(ctx, target, GL_TRUE);
+   if (!obj)
+      return;
 
    _mesa_lock_texture(ctx, obj);
    switch (pname) {
-- 
cgit v1.2.3


From 192baaac0fc4701e82dcc3e19b3033f81dd82a62 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: mesa: minor comment updates in enable.c

---
 src/mesa/main/enable.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index aac8b9c5eaf..3ba4df6342f 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -5,7 +5,6 @@
 
 /*
  * Mesa 3-D graphics library
- * Version:  7.0.3
  *
  * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
  *
@@ -560,7 +559,6 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
          ctx->Polygon.OffsetLine = state;
          break;
       case GL_POLYGON_OFFSET_FILL:
-         /*case GL_POLYGON_OFFSET_EXT:*/
          if (ctx->Polygon.OffsetFill == state)
             return;
          FLUSH_VERTICES(ctx, _NEW_POLYGON);
@@ -643,9 +641,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
          break;
 #endif
 
-      /*
-       * CLIENT STATE!!!
-       */
+      /* client-side state */
       case GL_VERTEX_ARRAY:
       case GL_NORMAL_ARRAY:
       case GL_COLOR_ARRAY:
@@ -1174,7 +1170,6 @@ _mesa_IsEnabled( GLenum cap )
       case GL_POLYGON_OFFSET_LINE:
 	 return ctx->Polygon.OffsetLine;
       case GL_POLYGON_OFFSET_FILL:
-      /*case GL_POLYGON_OFFSET_EXT:*/
 	 return ctx->Polygon.OffsetFill;
       case GL_RESCALE_NORMAL_EXT:
          return ctx->Transform.RescaleNormals;
@@ -1213,9 +1208,7 @@ _mesa_IsEnabled( GLenum cap )
          }
 #endif
 
-      /*
-       * CLIENT STATE!!!
-       */
+      /* client-side state */
       case GL_VERTEX_ARRAY:
          return (ctx->Array.ArrayObj->Vertex.Enabled != 0);
       case GL_NORMAL_ARRAY:
-- 
cgit v1.2.3


From 09ba2527e885f6134002205716a44d01d83638c3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: st/mesa: move declaration before code

---
 src/mesa/state_tracker/st_cb_blit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c
index 276d10fb557..626db12431d 100644
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -90,13 +90,14 @@ st_BlitFramebuffer_resolve(struct gl_context *ctx,
    if (mask & depthStencil) {
       struct gl_renderbuffer_attachment *srcDepth, *srcStencil;
       struct gl_renderbuffer_attachment *dstDepth, *dstStencil;
+      boolean combined;
 
       srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH];
       dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH];
       srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL];
       dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL];
 
-      const boolean combined =
+      combined =
          st_is_depth_stencil_combined(srcDepth, srcStencil) &&
          st_is_depth_stencil_combined(dstDepth, dstStencil);
 
-- 
cgit v1.2.3


From 50073563b2bfe3716b3dc8b1ed2f91381ba24305 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: st/mesa: silence int/float and double/float conversion warnings

---
 src/mesa/state_tracker/st_cb_bitmap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 49b196032b9..067403f396b 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -328,8 +328,8 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
 
    if(!normalized)
    {
-      sRight = width;
-      tBot = height;
+      sRight = (GLfloat) width;
+      tBot = (GLfloat) height;
    }
 
    /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
@@ -381,7 +381,7 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
    /* same for all verts: */
    for (i = 0; i < 4; i++) {
       st->bitmap.vertices[i][0][2] = z;
-      st->bitmap.vertices[i][0][3] = 1.0;
+      st->bitmap.vertices[i][0][3] = 1.0f;
       st->bitmap.vertices[i][1][0] = color[0];
       st->bitmap.vertices[i][1][1] = color[1];
       st->bitmap.vertices[i][1][2] = color[2];
@@ -513,7 +513,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
    cso_set_vertex_elements(cso, 3, st->velems_util_draw);
 
    /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
-   z = z * 2.0 - 1.0;
+   z = z * 2.0f - 1.0f;
 
    /* draw textured quad */
    offset = setup_bitmap_vertex_data(st,
-- 
cgit v1.2.3


From b7e89115310628310bf458a33f2df2bf23384cf3 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Tue, 2 Aug 2011 11:36:44 -0500
Subject: glsl_to_tgsi: remove debugging printf

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index ba4074eecd5..b5f4253ea64 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -840,7 +840,6 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file,
       int index = 0;
       immediate_storage *entry;
       assert(file == PROGRAM_IMMEDIATE);
-      fprintf(stderr, "adding immediate\n");
 
       /* Search immediate storage to see if we already have an identical
        * immediate that we can use instead of adding a duplicate entry.
-- 
cgit v1.2.3


From 9adcab9cd464d659288e31e6767efb5dee3894ff Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 4 Aug 2011 10:15:54 -0500
Subject: st/mesa: replace duplicated create_color_map_texture() function with
 shared function

---
 src/mesa/state_tracker/st_atom_pixeltransfer.c | 22 +---------------------
 src/mesa/state_tracker/st_cb_drawpixels.c      | 23 +----------------------
 src/mesa/state_tracker/st_texture.c            | 20 ++++++++++++++++++++
 src/mesa/state_tracker/st_texture.h            |  4 ++++
 4 files changed, 26 insertions(+), 43 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c
index 95b706cb96c..12b5bc5ba79 100644
--- a/src/mesa/state_tracker/st_atom_pixeltransfer.c
+++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c
@@ -84,26 +84,6 @@ make_state_key(struct gl_context *ctx,  struct state_key *key)
 }
 
 
-static struct pipe_resource *
-create_color_map_texture(struct gl_context *ctx)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_resource *pt;
-   enum pipe_format format;
-   const uint texSize = 256; /* simple, and usually perfect */
-
-   /* find an RGBA texture format */
-   format = st_choose_format(pipe->screen, GL_RGBA,
-                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
-
-   /* create texture for color map/table */
-   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
-                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
-   return pt;
-}
-
-
 /**
  * Update the pixelmap texture with the contents of the R/G/B/A pixel maps.
  */
@@ -219,7 +199,7 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key)
 
       /* create the colormap/texture now if not already done */
       if (!st->pixel_xfer.pixelmap_texture) {
-         st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx);
+         st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
          st->pixel_xfer.pixelmap_sampler_view =
             st_create_texture_sampler_view(st->pipe,
                                            st->pixel_xfer.pixelmap_texture);
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index f4dd2a42847..0c4dc23ccf7 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -94,27 +94,6 @@ is_passthrough_program(const struct gl_fragment_program *prog)
 }
 
 
-/* XXX copied verbatim from st_atom_pixeltransfer.c */
-static struct pipe_resource *
-create_color_map_texture(struct gl_context *ctx)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_resource *pt;
-   enum pipe_format format;
-   const uint texSize = 256; /* simple, and usually perfect */
-
-   /* find an RGBA texture format */
-   format = st_choose_format(pipe->screen, GL_RGBA,
-                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
-
-   /* create texture for color map/table */
-   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
-                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
-   return pt;
-}
-
-
 /**
  * Returns a fragment program which implements the current pixel transfer ops.
  */
@@ -142,7 +121,7 @@ get_glsl_pixel_transfer_program(struct st_context *st,
    if (pixelMaps) {
       /* create the colormap/texture now if not already done */
       if (!st->pixel_xfer.pixelmap_texture) {
-         st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx);
+         st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
          st->pixel_xfer.pixelmap_sampler_view =
             st_create_texture_sampler_view(st->pipe,
                                            st->pixel_xfer.pixelmap_texture);
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index ffe7e256a56..d8ba3ac9252 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -396,3 +396,23 @@ st_texture_image_copy(struct pipe_context *pipe,
    }
 }
 
+
+struct pipe_resource *
+st_create_color_map_texture(struct gl_context *ctx)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_resource *pt;
+   enum pipe_format format;
+   const uint texSize = 256; /* simple, and usually perfect */
+
+   /* find an RGBA texture format */
+   format = st_choose_format(pipe->screen, GL_RGBA,
+                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
+
+   /* create texture for color map/table */
+   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
+                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
+   return pt;
+}
+
diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h
index d50c3c9af79..b822f47cf9e 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -232,4 +232,8 @@ st_texture_image_copy(struct pipe_context *pipe,
                       struct pipe_resource *src, GLuint srcLevel,
                       GLuint face);
 
+
+extern struct pipe_resource *
+st_create_color_map_texture(struct gl_context *ctx);
+
 #endif
-- 
cgit v1.2.3


From d6a0692f9dc055c5e5f0e7c806537ad24aa13709 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 13:07:50 -0600
Subject: mesa: don't use K&R style function pointer calls

---
 src/mesa/main/texobj.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index 3021716a0b6..078a43ab153 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -842,7 +842,7 @@ _mesa_GenTextures( GLsizei n, GLuint *textures )
       struct gl_texture_object *texObj;
       GLuint name = first + i;
       GLenum target = 0;
-      texObj = (*ctx->Driver.NewTextureObject)( ctx, name, target);
+      texObj = ctx->Driver.NewTextureObject(ctx, name, target);
       if (!texObj) {
          _glthread_UNLOCK_MUTEX(ctx->Shared->Mutex);
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTextures");
@@ -1066,7 +1066,7 @@ _mesa_BindTexture( GLenum target, GLuint texName )
       }
       else {
          /* if this is a new texture id, allocate a texture object now */
-         newTexObj = (*ctx->Driver.NewTextureObject)(ctx, texName, target);
+         newTexObj = ctx->Driver.NewTextureObject(ctx, texName, target);
          if (!newTexObj) {
             _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindTexture");
             return;
@@ -1108,7 +1108,7 @@ _mesa_BindTexture( GLenum target, GLuint texName )
 
    /* Pass BindTexture call to device driver */
    if (ctx->Driver.BindTexture)
-      (*ctx->Driver.BindTexture)( ctx, target, newTexObj );
+      ctx->Driver.BindTexture(ctx, target, newTexObj);
 }
 
 
-- 
cgit v1.2.3


From 1c8d079e205919b24e04efdc2421c18d03f078ff Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 15:32:09 -0600
Subject: mesa: fix out of bounds array access in rtgc debug code

Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39841
This would only be hit if someone set RGTC_DEBUG=1.
---
 src/mesa/main/texcompress_rgtc_tmp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/texcompress_rgtc_tmp.h b/src/mesa/main/texcompress_rgtc_tmp.h
index c8bf082a158..48bbd374e08 100644
--- a/src/mesa/main/texcompress_rgtc_tmp.h
+++ b/src/mesa/main/texcompress_rgtc_tmp.h
@@ -181,7 +181,7 @@ static void TAG(encode_rgtc_chan)(TYPE *blkaddr, TYPE srccolors[4][4],
       fprintf(stderr, "%d ", alphaenc1[i]);
    }
    fprintf(stderr, "cutVals ");
-   for (i = 0; i < 8; i++) {
+   for (i = 0; i < 7; i++) {
       fprintf(stderr, "%d ", acutValues[i]);
    }
    fprintf(stderr, "srcVals ");
-- 
cgit v1.2.3


From d7f2e38fca38a5521e930242be46be5a70a9cbd3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 15:55:13 -0600
Subject: mesa: add st_glsl_to_tgsi.cpp to Sconscript

---
 src/mesa/SConscript | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/mesa')

diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 24e2155c387..cbd16625186 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -264,6 +264,7 @@ statetracker_sources = [
     'state_tracker/st_draw_feedback.c',
     'state_tracker/st_extensions.c',
     'state_tracker/st_format.c',
+    'state_tracker/st_glsl_to_tgsi.cpp',
     'state_tracker/st_gen_mipmap.c',
     'state_tracker/st_manager.c',
     'state_tracker/st_mesa_to_tgsi.c',
-- 
cgit v1.2.3


From a0eb83401ef599e597b72e70c8856e1bc0f59dcc Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 15:55:50 -0600
Subject: mesa: use gl_constant_value type in
 _mesa_[Get]ProgramNamedParameter4fNV()

---
 src/mesa/main/nvprogram.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/nvprogram.c b/src/mesa/main/nvprogram.c
index dd198b8141a..7ff7645b7b7 100644
--- a/src/mesa/main/nvprogram.c
+++ b/src/mesa/main/nvprogram.c
@@ -812,7 +812,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,
 {
    struct gl_program *prog;
    struct gl_fragment_program *fragProg;
-   GLfloat *v;
+   gl_constant_value *v;
 
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
@@ -834,10 +834,10 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,
    v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len,
                                     (char *) name);
    if (v) {
-      v[0] = x;
-      v[1] = y;
-      v[2] = z;
-      v[3] = w;
+      v[0].f = x;
+      v[1].f = y;
+      v[2].f = z;
+      v[3].f = w;
       return;
    }
 
@@ -878,7 +878,7 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name,
 {
    struct gl_program *prog;
    struct gl_fragment_program *fragProg;
-   const GLfloat *v;
+   const gl_constant_value *v;
 
    GET_CURRENT_CONTEXT(ctx);
 
@@ -899,10 +899,10 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name,
    v = _mesa_lookup_parameter_value(fragProg->Base.Parameters,
                                     len, (char *) name);
    if (v) {
-      params[0] = v[0];
-      params[1] = v[1];
-      params[2] = v[2];
-      params[3] = v[3];
+      params[0] = v[0].f;
+      params[1] = v[1].f;
+      params[2] = v[2].f;
+      params[3] = v[3].f;
       return;
    }
 
-- 
cgit v1.2.3


From 324857599b2a4735c86e54da9a1776c034dadf72 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 16:00:06 -0600
Subject: mesa: use gl_constant_value type in ARB program parser

---
 src/mesa/program/program_parse.y  | 56 +++++++++++++++++++--------------------
 src/mesa/program/program_parser.h |  3 ++-
 2 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y
index dbf5abaa617..dec35038be5 100644
--- a/src/mesa/program/program_parse.y
+++ b/src/mesa/program/program_parse.y
@@ -1854,64 +1854,64 @@ paramConstUse: paramConstScalarUse | paramConstVector;
 paramConstScalarDecl: signedFloatConstant
 	{
 	   $$.count = 4;
-	   $$.data[0] = $1;
-	   $$.data[1] = $1;
-	   $$.data[2] = $1;
-	   $$.data[3] = $1;
+	   $$.data[0].f = $1;
+	   $$.data[1].f = $1;
+	   $$.data[2].f = $1;
+	   $$.data[3].f = $1;
 	}
 	;
 
 paramConstScalarUse: REAL
 	{
 	   $$.count = 1;
-	   $$.data[0] = $1;
-	   $$.data[1] = $1;
-	   $$.data[2] = $1;
-	   $$.data[3] = $1;
+	   $$.data[0].f = $1;
+	   $$.data[1].f = $1;
+	   $$.data[2].f = $1;
+	   $$.data[3].f = $1;
 	}
 	| INTEGER
 	{
 	   $$.count = 1;
-	   $$.data[0] = (float) $1;
-	   $$.data[1] = (float) $1;
-	   $$.data[2] = (float) $1;
-	   $$.data[3] = (float) $1;
+	   $$.data[0].f = (float) $1;
+	   $$.data[1].f = (float) $1;
+	   $$.data[2].f = (float) $1;
+	   $$.data[3].f = (float) $1;
 	}
 	;
 
 paramConstVector: '{' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = 0.0f;
-	   $$.data[2] = 0.0f;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = 0.0f;
+	   $$.data[2].f = 0.0f;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = 0.0f;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = 0.0f;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant ','
               signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = $6;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = $6;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant ','
               signedFloatConstant ',' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = $6;
-	   $$.data[3] = $8;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = $6;
+	   $$.data[3].f = $8;
 	}
 	;
 
diff --git a/src/mesa/program/program_parser.h b/src/mesa/program/program_parser.h
index 8e5aaee95e5..5637598f3b3 100644
--- a/src/mesa/program/program_parser.h
+++ b/src/mesa/program/program_parser.h
@@ -23,6 +23,7 @@
 #pragma once
 
 #include "main/config.h"
+#include "program/prog_parameter.h"
 
 struct gl_context;
 
@@ -96,7 +97,7 @@ struct asm_symbol {
 
 struct asm_vector {
    unsigned count;
-   float    data[4];
+   gl_constant_value data[4];
 };
 
 
-- 
cgit v1.2.3


From bf8d06c518a8e17e485b18ba03be3e1b45cc7327 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 16:01:27 -0600
Subject: mesa: pass correct constant type to _mesa_fetch_state()

Fixes assorted warnings about float vs. gl_constant_value pointers.
---
 src/mesa/program/prog_statevars.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index 16f9690e865..6aa2409e85e 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -1111,7 +1111,7 @@ _mesa_load_state_parameters(struct gl_context *ctx,
       if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
          _mesa_fetch_state(ctx,
 			   paramList->Parameters[i].StateIndexes,
-                           paramList->ParameterValues[i]);
+                           &paramList->ParameterValues[i][0].f);
       }
    }
 }
-- 
cgit v1.2.3


From a48118e510fcbb57634a7869cb628123fa8c3f2e Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Thu, 4 Aug 2011 18:04:44 -0700
Subject: mesa: Remove MSVC stdint typedefs from compiler.h.

MSVC can now include the stdint.h at include/c99/stdint.h.
---
 src/mesa/main/compiler.h | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index d736fdfc58a..ee7d0b2f880 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -60,29 +60,7 @@ extern "C" {
 /**
  * Get standard integer types
  */
-#if defined(_MSC_VER)
-   typedef __int8             int8_t;
-   typedef unsigned __int8    uint8_t;
-   typedef __int16            int16_t;
-   typedef unsigned __int16   uint16_t;
-   typedef __int32            int32_t;
-   typedef unsigned __int32   uint32_t;
-   typedef __int64            int64_t;
-   typedef unsigned __int64   uint64_t;
-
-#  if defined(_WIN64)
-     typedef __int64            intptr_t;
-     typedef unsigned __int64   uintptr_t;
-#  else
-     typedef __int32            intptr_t;
-     typedef unsigned __int32   uintptr_t;
-#  endif
-
-#  define INT64_C(__val) __val##i64
-#  define UINT64_C(__val) __val##ui64
-#else
-#  include <stdint.h>
-#endif
+#include <stdint.h>
 
 
 /**
-- 
cgit v1.2.3


From c251d83d916336f95109363e919920a024947230 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 07:38:13 +0200
Subject: vbo: do not call _mesa_max_buffer_index in debug builds

That code drops performance in Unigine Heaven and Tropics
by a factor of 10. That's too crazy even for a debug build.

NOTE: This is a candidate for the 7.11 branch.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/vbo/vbo_exec_array.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index b908d5aea7e..32ce0e4a8ff 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -909,11 +909,10 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
       if (0)
          _mesa_print_arrays(ctx);
 
-#ifdef DEBUG
       /* 'end' was out of bounds, but now let's check the actual array
        * indexes to see if any of them are out of bounds.
        */
-      {
+      if (0) {
          GLuint max = _mesa_max_buffer_index(ctx, count, type, indices,
                                              ctx->Array.ElementArrayBufferObj);
          if (max >= ctx->Array.ArrayObj->_MaxElement) {
@@ -934,7 +933,6 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
           * upper bound wrong.
           */
       }
-#endif
 
       /* Set 'end' to the max possible legal value */
       assert(ctx->Array.ArrayObj->_MaxElement >= 1);
-- 
cgit v1.2.3


From 6e7942936c5de59f509779b6f7620d80d2fbc21a Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Fri, 5 Aug 2011 06:57:07 +0200
Subject: st/mesa: remove unused-but-set variables in st_glsl_to_tgsi.cpp

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 9c6a7ed738a..460bafb3821 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1725,7 +1725,6 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
    if (index) {
       src.index += index->value.i[0] * element_size;
    } else {
-      st_src_reg array_base = this->result;
       /* Variable index array dereference.  It eats the "vec4" of the
        * base of the array and an index that offsets the TGSI register
        * index.
@@ -2463,7 +2462,7 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir)
 void
 glsl_to_tgsi_visitor::visit(ir_if *ir)
 {
-   glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL;
+   glsl_to_tgsi_instruction *cond_inst, *if_inst;
    glsl_to_tgsi_instruction *prev_inst;
 
    prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
@@ -2495,7 +2494,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
    visit_exec_list(&ir->then_instructions, this);
 
    if (!ir->else_instructions.is_empty()) {
-      else_inst = emit(ir->condition, TGSI_OPCODE_ELSE);
+      emit(ir->condition, TGSI_OPCODE_ELSE);
       visit_exec_list(&ir->else_instructions, this);
    }
 
-- 
cgit v1.2.3


From 0722edc59cd526437c2d4bad474b934dad84d789 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 28 Jul 2011 09:57:19 -0700
Subject: i965/fs: Don't allocate the old backend's compile structs for our
 compile.

This saves some 35MB when the program only uses GLSL shaders.
---
 src/mesa/drivers/dri/i965/brw_wm.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index b0dfdd536aa..d13ac6124c8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -206,10 +206,6 @@ bool do_wm_prog(struct brw_context *brw,
           */
          return false;
       }
-      c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
-      c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
-      c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
-      c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
    } else {
       void *instruction = c->instruction;
       void *prog_instructions = c->prog_instructions;
@@ -232,6 +228,13 @@ bool do_wm_prog(struct brw_context *brw,
       if (!brw_wm_fs_emit(brw, c, prog))
 	 return false;
    } else {
+      if (!c->instruction) {
+	 c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
+	 c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
+	 c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
+	 c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
+      }
+
       /* Fallback for fixed function and ARB_fp shaders. */
       c->dispatch_width = 16;
       brw_wm_payload_setup(brw, c);
-- 
cgit v1.2.3


From ee0373b833155804bb8846c6f05f897b9ee5afa6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 25 Jul 2011 18:13:04 -0700
Subject: i965/fs: Don't upload unused uniform components.

This saves both register space and upload bandwidth for unused values.

Note that previously we were relying on the visitor not initially
generating references to different sets of uniforms between the 8-wide
and 16-wide code generation, and now we're relying on them dead-code
eliminating the same stuff, too.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 89 ++++++++++++++++++++++++++++++++++--
 src/mesa/drivers/dri/i965/brw_fs.h   | 10 +++-
 2 files changed, 95 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 02041b3bc03..f55be022f72 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -242,11 +242,12 @@ import_uniforms_callback(const void *key,
  * This brings in those uniform definitions
  */
 void
-fs_visitor::import_uniforms(struct hash_table *src_variable_ht)
+fs_visitor::import_uniforms(fs_visitor *v)
 {
-   hash_table_call_foreach(src_variable_ht,
+   hash_table_call_foreach(v->variable_ht,
 			   import_uniforms_callback,
 			   variable_ht);
+   this->params_remap = v->params_remap;
 }
 
 /* Our support for uniforms is piggy-backed on the struct
@@ -798,6 +799,86 @@ fs_visitor::split_virtual_grfs()
    this->live_intervals_valid = false;
 }
 
+bool
+fs_visitor::remove_dead_constants()
+{
+   if (c->dispatch_width == 8) {
+      this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
+	 this->params_remap[i] = -1;
+
+      /* Find which params are still in use. */
+      foreach_list(node, &this->instructions) {
+	 fs_inst *inst = (fs_inst *)node;
+
+	 for (int i = 0; i < 3; i++) {
+	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+
+	    if (inst->src[i].file != UNIFORM)
+	       continue;
+
+	    assert(constant_nr < (int)c->prog_data.nr_params);
+
+	    /* For now, set this to non-negative.  We'll give it the
+	     * actual new number in a moment, in order to keep the
+	     * register numbers nicely ordered.
+	     */
+	    this->params_remap[constant_nr] = 0;
+	 }
+      }
+
+      /* Figure out what the new numbers for the params will be.  At some
+       * point when we're doing uniform array access, we're going to want
+       * to keep the distinction between .reg and .reg_offset, but for
+       * now we don't care.
+       */
+      unsigned int new_nr_params = 0;
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+	 if (this->params_remap[i] != -1) {
+	    this->params_remap[i] = new_nr_params++;
+	 }
+      }
+
+      /* Update the list of params to be uploaded to match our new numbering. */
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+	 int remapped = this->params_remap[i];
+
+	 if (remapped == -1)
+	    continue;
+
+	 /* We've already done setup_paramvalues_refs() so no need to worry
+	  * about param_index and param_offset.
+	  */
+	 c->prog_data.param[remapped] = c->prog_data.param[i];
+	 c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i];
+      }
+
+      c->prog_data.nr_params = new_nr_params;
+   } else {
+      /* This should have been generated in the 8-wide pass already. */
+      assert(this->params_remap);
+   }
+
+   /* Now do the renumbering of the shader to remove unused params. */
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      for (int i = 0; i < 3; i++) {
+	 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+
+	 if (inst->src[i].file != UNIFORM)
+	    continue;
+
+	 assert(this->params_remap[constant_nr] != -1);
+	 inst->src[i].hw_reg = this->params_remap[constant_nr];
+	 inst->src[i].reg_offset = 0;
+      }
+   }
+
+   return true;
+}
+
 /**
  * Choose accesses from the UNIFORM file to demote to using the pull
  * constant buffer.
@@ -1624,6 +1705,8 @@ fs_visitor::run()
 	 progress = dead_code_eliminate() || progress;
       } while (progress);
 
+      remove_dead_constants();
+
       schedule_instructions();
 
       assign_curb_setup();
@@ -1702,7 +1785,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
    if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
       c->dispatch_width = 16;
       fs_visitor v2(c, prog, shader);
-      v2.import_uniforms(v.variable_ht);
+      v2.import_uniforms(&v);
       v2.run();
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 89d6cda7e4f..96e1420038f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -421,7 +421,7 @@ public:
 
    fs_reg *variable_storage(ir_variable *var);
    int virtual_grf_alloc(int size);
-   void import_uniforms(struct hash_table *src_variable_ht);
+   void import_uniforms(fs_visitor *v);
 
    void visit(ir_variable *ir);
    void visit(ir_assignment *ir);
@@ -489,6 +489,7 @@ public:
    bool register_coalesce();
    bool compute_to_mrf();
    bool dead_code_eliminate();
+   bool remove_dead_constants();
    bool remove_duplicate_mrf_writes();
    bool virtual_grf_interferes(int a, int b);
    void schedule_instructions();
@@ -566,6 +567,13 @@ public:
    int *virtual_grf_use;
    bool live_intervals_valid;
 
+   /* This is the map from UNIFORM hw_reg + reg_offset as generated by
+    * the visitor to the packed uniform number after
+    * remove_dead_constants() that represents the actual uploaded
+    * uniform index.
+    */
+   int *params_remap;
+
    struct hash_table *variable_ht;
    ir_variable *frag_color, *frag_data, *frag_depth;
    int first_non_payload_grf;
-- 
cgit v1.2.3


From 69dc529da241747888efefdf0d3e58479dd6248c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 28 Jul 2011 09:52:03 -0700
Subject: mesa: Remove dead "MemPool" field of gl_shader_state.

---
 src/mesa/main/mtypes.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index b88118366b2..2d5f44c1e7b 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2252,8 +2252,6 @@ struct gl_shader_state
     */
    struct gl_shader_program *ActiveProgram;
 
-   void *MemPool;
-
    GLbitfield Flags;                    /**< Mask of GLSL_x flags */
 };
 
-- 
cgit v1.2.3


From 9998df36c271810ecf20041bf6bed28f3952a94f Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 25 Jul 2011 18:15:25 -0700
Subject: i965: Add dumping for gen6 WM constants too.

This looks just like the VS dump for now.
---
 src/mesa/drivers/dri/i965/brw_context.h    |  1 +
 src/mesa/drivers/dri/i965/brw_state_dump.c | 20 ++++++++++++++++++++
 src/mesa/drivers/dri/i965/gen6_wm_state.c  |  2 +-
 3 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 471015cf9d0..22baf978ad4 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -212,6 +212,7 @@ enum state_struct_type {
    AUB_TRACE_BINDING_TABLE =		0x101,
    AUB_TRACE_SURFACE_STATE =		0x102,
    AUB_TRACE_VS_CONSTANTS =		0x103,
+   AUB_TRACE_WM_CONSTANTS =		0x104,
 };
 
 /** Subclass of Mesa vertex program */
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index b9e5cc1a534..cb7a3ef73d3 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -455,6 +455,23 @@ dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
    }
 }
 
+static void
+dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
+{
+   const char *name = "WM_CONST";
+   struct intel_context *intel = &brw->intel;
+   uint32_t *as_uint = intel->batch.bo->virtual + offset;
+   float *as_float = intel->batch.bo->virtual + offset;
+   int i;
+
+   for (i = 0; i < size / 4; i += 4) {
+      batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n",
+		i / 4,
+		as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3],
+		as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]);
+   }
+}
+
 static void dump_binding_table(struct brw_context *brw, uint32_t offset,
 			       uint32_t size)
 {
@@ -602,6 +619,9 @@ dump_state_batch(struct brw_context *brw)
       case AUB_TRACE_VS_CONSTANTS:
 	 dump_vs_constants(brw, offset, size);
 	 break;
+      case AUB_TRACE_WM_CONSTANTS:
+	 dump_wm_constants(brw, offset, size);
+	 break;
       default:
 	 break;
       }
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 185da9c355f..3d525248f25 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -54,7 +54,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
       float *constants;
       unsigned int i;
 
-      constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE,
+      constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS,
 				  brw->wm.prog_data->nr_params *
 				  sizeof(float),
 				  32, &brw->wm.push_const_offset);
-- 
cgit v1.2.3


From 6bd5f43f212962a054a41290b0f8e350dae2f40d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 15:13:08 -0700
Subject: prog_optimize: Add support for saturates to
 _mesa_merge_mov_into_inst.

This fixes the remaining regression from ff_fragment_shader in Mesa IR
instruction count, to now being a 1.9% win overall.
---
 src/mesa/program/prog_optimize.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index f4a7a638d5f..3340ce0498b 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -472,8 +472,7 @@ can_downward_mov_be_modifed(const struct prog_instruction *mov)
       mov->SrcReg[0].HasIndex2 == 0 &&
       mov->SrcReg[0].RelAddr2 == 0 &&
       mov->DstReg.RelAddr == 0 &&
-      mov->DstReg.CondMask == COND_TR &&
-      mov->SaturateMode == SATURATE_OFF;
+      mov->DstReg.CondMask == COND_TR;
 }
 
 
@@ -482,7 +481,8 @@ can_upward_mov_be_modifed(const struct prog_instruction *mov)
 {
    return
       can_downward_mov_be_modifed(mov) &&
-      mov->DstReg.File == PROGRAM_TEMPORARY;
+      mov->DstReg.File == PROGRAM_TEMPORARY &&
+      mov->SaturateMode == SATURATE_OFF;
 }
 
 
@@ -657,6 +657,8 @@ _mesa_merge_mov_into_inst(struct prog_instruction *inst,
    if (mask != (inst->DstReg.WriteMask & mask))
       return GL_FALSE;
 
+   inst->SaturateMode |= mov->SaturateMode;
+
    /* Depending on the instruction, we may need to recompute the swizzles.
     * Also, some other instructions (like TEX) are not linear. We will only
     * consider completely active sources and destinations
-- 
cgit v1.2.3


From 62722d90af9d43d889af33b080a682f2004e049c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 13:54:15 -0700
Subject: ir_to_mesa: Try to avoid emitting a MOV_SAT to saturate an expression
 tree.

Fixes a regression in codegen quality for ff_fragment_shader
conversion to GLSL -- glean texCombine produces 7.5% fewer Mesa IR
instructions.
---
 src/mesa/program/ir_to_mesa.cpp | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index debadb9a398..9b615b68a23 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -915,10 +915,30 @@ ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
    sat_src->accept(this);
    src_reg src = this->result;
 
-   this->result = get_temp(ir->type);
-   ir_to_mesa_instruction *inst;
-   inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
-   inst->saturate = true;
+   /* If we generated an expression instruction into a temporary in
+    * processing the saturate's operand, apply the saturate to that
+    * instruction.  Otherwise, generate a MOV to do the saturate.
+    *
+    * Note that we have to be careful to only do this optimization if
+    * the instruction in question was what generated src->result.  For
+    * example, ir_dereference_array might generate a MUL instruction
+    * to create the reladdr, and return us a src reg using that
+    * reladdr.  That MUL result is not the value we're trying to
+    * saturate.
+    */
+   ir_expression *sat_src_expr = sat_src->as_expression();
+   ir_to_mesa_instruction *new_inst;
+   new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
+   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
+			sat_src_expr->operation == ir_binop_add ||
+			sat_src_expr->operation == ir_binop_dot)) {
+      new_inst->saturate = true;
+   } else {
+      this->result = get_temp(ir->type);
+      ir_to_mesa_instruction *inst;
+      inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
+      inst->saturate = true;
+   }
 
    return true;
 }
-- 
cgit v1.2.3


From 4c7e215c7bb09f827df630cbfc80e87869351f18 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 2 May 2011 16:27:46 -0700
Subject: ir_to_mesa: Replace open-coded swizzle_for_size()

---
 src/mesa/program/ir_to_mesa.cpp | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 9b615b68a23..1ef609fe15d 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -641,8 +641,6 @@ src_reg
 ir_to_mesa_visitor::get_temp(const glsl_type *type)
 {
    src_reg src;
-   int swizzle[4];
-   int i;
 
    src.file = PROGRAM_TEMPORARY;
    src.index = next_temp;
@@ -652,12 +650,7 @@ ir_to_mesa_visitor::get_temp(const glsl_type *type)
    if (type->is_array() || type->is_record()) {
       src.swizzle = SWIZZLE_NOOP;
    } else {
-      for (i = 0; i < type->vector_elements; i++)
-	 swizzle[i] = i;
-      for (; i < 4; i++)
-	 swizzle[i] = type->vector_elements - 1;
-      src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
-				  swizzle[2], swizzle[3]);
+      src.swizzle = swizzle_for_size(type->vector_elements);
    }
    src.negate = 0;
 
-- 
cgit v1.2.3


From b44648c9186d403abaeeeb3190d6759f951a49e4 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 5 Aug 2011 14:09:37 -0500
Subject: glsl_to_tgsi: try to avoid emitting a MOV_SAT to saturate an
 expression tree

This is a port of commit 62722d9 to glsl_to_tgsi, with minor aesthetic
changes (moved the declaration and assignment of new_inst inside the if block).
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 32 ++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 460bafb3821..e10243add8a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1232,12 +1232,32 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
    sat_src->accept(this);
    st_src_reg src = this->result;
 
-   this->result = get_temp(ir->type);
-   st_dst_reg result_dst = st_dst_reg(this->result);
-   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
-   glsl_to_tgsi_instruction *inst;
-   inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
-   inst->saturate = true;
+   /* If we generated an expression instruction into a temporary in
+    * processing the saturate's operand, apply the saturate to that
+    * instruction.  Otherwise, generate a MOV to do the saturate.
+    *
+    * Note that we have to be careful to only do this optimization if
+    * the instruction in question was what generated src->result.  For
+    * example, ir_dereference_array might generate a MUL instruction
+    * to create the reladdr, and return us a src reg using that
+    * reladdr.  That MUL result is not the value we're trying to
+    * saturate.
+    */
+   ir_expression *sat_src_expr = sat_src->as_expression();
+   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
+			sat_src_expr->operation == ir_binop_add ||
+			sat_src_expr->operation == ir_binop_dot)) {
+      glsl_to_tgsi_instruction *new_inst;
+      new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+      new_inst->saturate = true;
+   } else {
+      this->result = get_temp(ir->type);
+      st_dst_reg result_dst = st_dst_reg(this->result);
+      result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+      glsl_to_tgsi_instruction *inst;
+      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
+      inst->saturate = true;
+   }
 
    return true;
 }
-- 
cgit v1.2.3


From 5164244df02f33d6ad9e0a286f4b6d6af2dfbc75 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 5 Aug 2011 14:37:33 -0500
Subject: glsl_to_tgsi: replace open-coded swizzle_for_size()

This is a port of commit 4c7e215c7bb to glsl_to_tgsi.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e10243add8a..d7a1ba80e1d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -949,8 +949,6 @@ st_src_reg
 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
 {
    st_src_reg src;
-   int swizzle[4];
-   int i;
 
    src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
    src.file = PROGRAM_TEMPORARY;
@@ -961,12 +959,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
    if (type->is_array() || type->is_record()) {
       src.swizzle = SWIZZLE_NOOP;
    } else {
-      for (i = 0; i < type->vector_elements; i++)
-         swizzle[i] = i;
-      for (; i < 4; i++)
-         swizzle[i] = type->vector_elements - 1;
-      src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
-        			  swizzle[2], swizzle[3]);
+      src.swizzle = swizzle_for_size(type->vector_elements);
    }
    src.negate = 0;
 
-- 
cgit v1.2.3


From a9e97d022cb68266639eb54947517454c8ffe45e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 12:47:25 -0700
Subject: intel: Fix warnings from gl_constant_parameter changes.

---
 src/mesa/drivers/dri/i915/i915_fragprog.c |  6 ++----
 src/mesa/drivers/dri/i965/brw_wm_fp.c     | 10 +++++-----
 src/mesa/drivers/dri/i965/brw_wm_pass0.c  |  4 ++--
 3 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 32050cebf33..d155b85ffca 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -175,10 +175,8 @@ src_vector(struct i915_fragment_program *p,
    case PROGRAM_STATE_VAR:
    case PROGRAM_NAMED_PARAM:
    case PROGRAM_UNIFORM:
-      src =
-         i915_emit_param4fv(p,
-                            program->Base.Parameters->ParameterValues[source->
-                                                                      Index]);
+      src = i915_emit_param4fv(p,
+	 &program->Base.Parameters->ParameterValues[source->Index][0].f);
       break;
 
    default:
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 7cd3edad235..d52a9581f5e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -535,15 +535,15 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
 						     GLfloat s3)
 {
    struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
-   GLfloat values[4];
+   gl_constant_value values[4];
    GLuint idx;
    GLuint swizzle;
    struct prog_src_register reg;
 
-   values[0] = s0;
-   values[1] = s1;
-   values[2] = s2;
-   values[3] = s3;
+   values[0].f = s0;
+   values[1].f = s1;
+   values[2].f = s2;
+   values[3].f = s3;
 
    idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
    reg = src_reg(PROGRAM_STATE_VAR, idx);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index f78bdc31866..ccf9dc2bc18 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -205,14 +205,14 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 	 case PROGRAM_CONSTANT:
 	    /* These are invarient:
 	     */
-	    ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+	    ref = get_const_ref(c, &plist->ParameterValues[idx][component].f);
 	    break;
 
 	 case PROGRAM_STATE_VAR:
 	 case PROGRAM_UNIFORM:
 	    /* These may change from run to run:
 	     */
-	    ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
+	    ref = get_param_ref(c, &plist->ParameterValues[idx][component].f );
 	    break;
 
 	 default:
-- 
cgit v1.2.3


From db726b048e8858af226dbd0f0fda72d0be01394e Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 30 Jul 2011 21:26:26 -0700
Subject: mesa: In validate_program(), initialize errMsg for safety.

validate_program relies on validate_shader_program to fill in errMsg;
empirically, there exist cases where that doesn't happen.

While tracking those down may be worthwhile, initializing the string so
we don't try to ralloc_strdup random garbage also seems wise.

Fixes issues caught by valgrind while running some test case.

NOTE: This is a candidate for stable release branches.

Reviewed-by: Chad Versace <chad@chad-versace.us>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/main/shaderapi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 8df25c3f988..74997eaaa77 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1125,7 +1125,7 @@ static void
 validate_program(struct gl_context *ctx, GLuint program)
 {
    struct gl_shader_program *shProg;
-   char errMsg[100];
+   char errMsg[100] = "";
 
    shProg = _mesa_lookup_shader_program_err(ctx, program, "glValidateProgram");
    if (!shProg) {
-- 
cgit v1.2.3


From 425b179fafe93ddf4abacbccb67ed6aecbef6a7e Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 5 Aug 2011 20:10:04 +0200
Subject: st/mesa: don't resolve stencil twice

---
 src/mesa/state_tracker/st_cb_blit.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c
index 626db12431d..750f541b5dd 100644
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -107,8 +107,10 @@ st_BlitFramebuffer_resolve(struct gl_context *ctx,
          dstRb = st_renderbuffer(dstDepth->Renderbuffer);
 
          info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0;
-         if (combined && (mask & GL_STENCIL_BUFFER_BIT))
+         if (combined && (mask & GL_STENCIL_BUFFER_BIT)) {
+            mask &= ~GL_STENCIL_BUFFER_BIT;
             info->mask |= PIPE_MASK_S;
+         }
 
          info->src.res = srcRb->texture;
          info->src.layer = srcRb->surface->u.tex.first_layer;
-- 
cgit v1.2.3


From 8488112d20d49d3dc7fefef19c6e550e4b71661c Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 5 Aug 2011 15:01:41 -0600
Subject: mesa: whitespace changes

---
 src/mesa/program/prog_parameter.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index f858cf0fa0d..1a5ed343937 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -46,16 +46,19 @@
 #define PROG_PARAM_BIT_CYL_WRAP  0x10  /**< XXX gallium debug */
 /*@}*/
 
+
 /**
  * Actual data for constant values of parameters.
  */
-typedef union gl_constant_value {
-	GLfloat f;
-	GLboolean b;
-	GLint i;
-	GLuint u;
+typedef union gl_constant_value
+{
+   GLfloat f;
+   GLboolean b;
+   GLint i;
+   GLuint u;
 } gl_constant_value;
 
+
 /**
  * Program parameter.
  * Used by shaders/programs for uniforms, constants, varying vars, etc.
-- 
cgit v1.2.3


From ffb7d02154186402f64e0b628998485309774bb8 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sun, 7 Aug 2011 14:15:35 -0500
Subject: st/mesa: inline st_prepare_fragment_program in
 st_translate_fragment_program

This reverts an unnecessary part of commit 4683529048ee and fixes misrendering
and an assertion failure in Cogs.

Fixes freedesktop.org bug 39888.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/state_tracker/st_program.c | 326 ++++++++++++++++++------------------
 src/mesa/state_tracker/st_program.h |  15 --
 2 files changed, 162 insertions(+), 179 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index ca01d2e1976..a4f47edfcd3 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -416,151 +416,6 @@ st_get_vp_variant(struct st_context *st,
    return vpv;
 }
 
-/**
- * Translate Mesa fragment shader attributes to TGSI attributes.
- * \return GL_TRUE if color output should be written to all render targets, 
- *         GL_FALSE if not
- */
-GLboolean
-st_prepare_fragment_program(struct gl_context *ctx,
-                            struct st_fragment_program *stfp)
-{
-   GLuint attr;
-   const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
-   GLboolean write_all = GL_FALSE;
-
-   /*
-    * Convert Mesa program inputs to TGSI input register semantics.
-    */
-   for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
-      if (inputsRead & (1 << attr)) {
-         const GLuint slot = stfp->num_inputs++;
-
-         stfp->input_to_index[attr] = slot;
-
-         switch (attr) {
-         case FRAG_ATTRIB_WPOS:
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
-            stfp->input_semantic_index[slot] = 0;
-            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
-            break;
-         case FRAG_ATTRIB_COL0:
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            stfp->input_semantic_index[slot] = 0;
-            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
-            break;
-         case FRAG_ATTRIB_COL1:
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            stfp->input_semantic_index[slot] = 1;
-            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
-            break;
-         case FRAG_ATTRIB_FOGC:
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
-            stfp->input_semantic_index[slot] = 0;
-            stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
-            break;
-         case FRAG_ATTRIB_FACE:
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
-            stfp->input_semantic_index[slot] = 0;
-            stfp->interp_mode[slot] = TGSI_INTERPOLATE_CONSTANT;
-            break;
-            /* In most cases, there is nothing special about these
-             * inputs, so adopt a convention to use the generic
-             * semantic name and the mesa FRAG_ATTRIB_ number as the
-             * index. 
-             * 
-             * All that is required is that the vertex shader labels
-             * its own outputs similarly, and that the vertex shader
-             * generates at least every output required by the
-             * fragment shader plus fixed-function hardware (such as
-             * BFC).
-             * 
-             * There is no requirement that semantic indexes start at
-             * zero or be restricted to a particular range -- nobody
-             * should be building tables based on semantic index.
-             */
-         case FRAG_ATTRIB_PNTC:
-         case FRAG_ATTRIB_TEX0:
-         case FRAG_ATTRIB_TEX1:
-         case FRAG_ATTRIB_TEX2:
-         case FRAG_ATTRIB_TEX3:
-         case FRAG_ATTRIB_TEX4:
-         case FRAG_ATTRIB_TEX5:
-         case FRAG_ATTRIB_TEX6:
-         case FRAG_ATTRIB_TEX7:
-         case FRAG_ATTRIB_VAR0:
-         default:
-            /* Actually, let's try and zero-base this just for
-             * readability of the generated TGSI.
-             */
-            assert(attr >= FRAG_ATTRIB_TEX0);
-            stfp->input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0);
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-            if (attr == FRAG_ATTRIB_PNTC)
-               stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
-            else
-               stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
-            break;
-         }
-      }
-      else {
-         stfp->input_to_index[attr] = -1;
-      }
-   }
-
-   /*
-    * Semantics and mapping for outputs
-    */
-   {
-      uint numColors = 0;
-      GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;
-
-      /* if z is written, emit that first */
-      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
-         stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_POSITION;
-         stfp->output_semantic_index[stfp->num_outputs] = 0;
-         stfp->result_to_output[FRAG_RESULT_DEPTH] = stfp->num_outputs;
-         stfp->num_outputs++;
-         outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
-      }
-
-      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
-         stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_STENCIL;
-         stfp->output_semantic_index[stfp->num_outputs] = 0;
-         stfp->result_to_output[FRAG_RESULT_STENCIL] = stfp->num_outputs;
-         stfp->num_outputs++;
-         outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
-      }
-
-      /* handle remaning outputs (color) */
-      for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
-         if (outputsWritten & BITFIELD64_BIT(attr)) {
-            switch (attr) {
-            case FRAG_RESULT_DEPTH:
-            case FRAG_RESULT_STENCIL:
-               /* handled above */
-               assert(0);
-               break;
-            case FRAG_RESULT_COLOR:
-               write_all = GL_TRUE; /* fallthrough */
-            default:
-               assert(attr == FRAG_RESULT_COLOR ||
-                      (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
-               stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_COLOR;
-               stfp->output_semantic_index[stfp->num_outputs] = numColors;
-               stfp->result_to_output[attr] = stfp->num_outputs;
-               numColors++;
-               break;
-            }
-
-            stfp->num_outputs++;
-         }
-      }
-   }
-   
-   return write_all;
-}
-
 
 /**
  * Translate a Mesa fragment shader into a TGSI shader using extra info in
@@ -613,12 +468,155 @@ st_translate_fragment_program(struct st_context *st,
 
    if (!stfp->tgsi.tokens) {
       /* need to translate Mesa instructions to TGSI now */
+      GLuint outputMapping[FRAG_RESULT_MAX];
+      GLuint inputMapping[FRAG_ATTRIB_MAX];
+      GLuint interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
+      GLuint attr;
+      const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
       struct ureg_program *ureg;
-      GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp);
+
+      GLboolean write_all = GL_FALSE;
+
+      ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
+      ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+      uint fs_num_inputs = 0;
+
+      ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
+      ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
+      uint fs_num_outputs = 0;
       
       if (!stfp->glsl_to_tgsi)
          _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
 
+      /*
+       * Convert Mesa program inputs to TGSI input register semantics.
+       */
+      for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
+         if (inputsRead & (1 << attr)) {
+            const GLuint slot = fs_num_inputs++;
+
+            inputMapping[attr] = slot;
+
+            switch (attr) {
+            case FRAG_ATTRIB_WPOS:
+               input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+               input_semantic_index[slot] = 0;
+               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
+               break;
+            case FRAG_ATTRIB_COL0:
+               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+               input_semantic_index[slot] = 0;
+               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
+               break;
+            case FRAG_ATTRIB_COL1:
+               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+               input_semantic_index[slot] = 1;
+               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
+               break;
+            case FRAG_ATTRIB_FOGC:
+               input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+               input_semantic_index[slot] = 0;
+               interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
+               break;
+            case FRAG_ATTRIB_FACE:
+               input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
+               input_semantic_index[slot] = 0;
+               interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
+               break;
+               /* In most cases, there is nothing special about these
+                * inputs, so adopt a convention to use the generic
+                * semantic name and the mesa FRAG_ATTRIB_ number as the
+                * index. 
+                * 
+                * All that is required is that the vertex shader labels
+                * its own outputs similarly, and that the vertex shader
+                * generates at least every output required by the
+                * fragment shader plus fixed-function hardware (such as
+                * BFC).
+                * 
+                * There is no requirement that semantic indexes start at
+                * zero or be restricted to a particular range -- nobody
+                * should be building tables based on semantic index.
+                */
+            case FRAG_ATTRIB_PNTC:
+            case FRAG_ATTRIB_TEX0:
+            case FRAG_ATTRIB_TEX1:
+            case FRAG_ATTRIB_TEX2:
+            case FRAG_ATTRIB_TEX3:
+            case FRAG_ATTRIB_TEX4:
+            case FRAG_ATTRIB_TEX5:
+            case FRAG_ATTRIB_TEX6:
+            case FRAG_ATTRIB_TEX7:
+            case FRAG_ATTRIB_VAR0:
+            default:
+               /* Actually, let's try and zero-base this just for
+                * readability of the generated TGSI.
+                */
+               assert(attr >= FRAG_ATTRIB_TEX0);
+               input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0);
+               input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+               if (attr == FRAG_ATTRIB_PNTC)
+                  interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
+               else
+                  interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
+               break;
+            }
+         }
+         else {
+            inputMapping[attr] = -1;
+         }
+      }
+
+      /*
+       * Semantics and mapping for outputs
+       */
+      {
+         uint numColors = 0;
+         GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;
+
+         /* if z is written, emit that first */
+         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
+            fs_output_semantic_index[fs_num_outputs] = 0;
+            outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
+            fs_num_outputs++;
+            outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
+         }
+
+         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
+            fs_output_semantic_index[fs_num_outputs] = 0;
+            outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
+            fs_num_outputs++;
+            outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
+         }
+
+         /* handle remaning outputs (color) */
+         for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
+            if (outputsWritten & BITFIELD64_BIT(attr)) {
+               switch (attr) {
+               case FRAG_RESULT_DEPTH:
+               case FRAG_RESULT_STENCIL:
+                  /* handled above */
+                  assert(0);
+                  break;
+               case FRAG_RESULT_COLOR:
+                  write_all = GL_TRUE; /* fallthrough */
+               default:
+                  assert(attr == FRAG_RESULT_COLOR ||
+                         (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
+                  fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
+                  fs_output_semantic_index[fs_num_outputs] = numColors;
+                  outputMapping[attr] = fs_num_outputs;
+                  numColors++;
+                  break;
+               }
+
+               fs_num_outputs++;
+            }
+         }
+      }
+
       ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
       if (ureg == NULL)
          return NULL;
@@ -638,32 +636,32 @@ st_translate_fragment_program(struct st_context *st,
                               stfp->glsl_to_tgsi,
                               &stfp->Base.Base,
                               /* inputs */
-                              stfp->num_inputs,
-                              stfp->input_to_index,
-                              stfp->input_semantic_name,
-                              stfp->input_semantic_index,
-                              stfp->interp_mode,
+                              fs_num_inputs,
+                              inputMapping,
+                              input_semantic_name,
+                              input_semantic_index,
+                              interpMode,
                               /* outputs */
-                              stfp->num_outputs,
-                              stfp->result_to_output,
-                              stfp->output_semantic_name,
-                              stfp->output_semantic_index, FALSE );
+                              fs_num_outputs,
+                              outputMapping,
+                              fs_output_semantic_name,
+                              fs_output_semantic_index, FALSE );
       else
          st_translate_mesa_program(st->ctx,
                                    TGSI_PROCESSOR_FRAGMENT,
                                    ureg,
                                    &stfp->Base.Base,
                                    /* inputs */
-                                   stfp->num_inputs,
-                                   stfp->input_to_index,
-                                   stfp->input_semantic_name,
-                                   stfp->input_semantic_index,
-                                   stfp->interp_mode,
+                                   fs_num_inputs,
+                                   inputMapping,
+                                   input_semantic_name,
+                                   input_semantic_index,
+                                   interpMode,
                                    /* outputs */
-                                   stfp->num_outputs,
-                                   stfp->result_to_output,
-                                   stfp->output_semantic_name,
-                                   stfp->output_semantic_index, FALSE );
+                                   fs_num_outputs,
+                                   outputMapping,
+                                   fs_output_semantic_name,
+                                   fs_output_semantic_index, FALSE );
 
       stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL );
       ureg_destroy( ureg );
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index 67723de6d53..699b6e8ccb7 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -85,21 +85,6 @@ struct st_fragment_program
 {
    struct gl_fragment_program Base;
    struct glsl_to_tgsi_visitor* glsl_to_tgsi;
-   
-   /** maps a Mesa FRAG_ATTRIB_x to a packed TGSI input index */
-   GLuint input_to_index[FRAG_ATTRIB_MAX];
-   /** maps a TGSI input index back to a Mesa FRAG_ATTRIB_x */
-   GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
-   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
-   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
-   GLuint num_inputs;
-   GLuint interp_mode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
-
-   /** Maps FRAG_RESULT_x to slot */
-   GLuint result_to_output[FRAG_RESULT_MAX];
-   ubyte output_semantic_name[FRAG_RESULT_MAX];
-   ubyte output_semantic_index[FRAG_RESULT_MAX];
-   GLuint num_outputs;
 
    struct pipe_shader_state tgsi;
 
-- 
cgit v1.2.3


From afd1d857752b5c30a3082068f8bb9002e0c69699 Mon Sep 17 00:00:00 2001
From: Fabio Pedretti <fabio.ped@libero.it>
Date: Tue, 9 Aug 2011 08:08:59 -0600
Subject: swrast: silence unused var warnings

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/swrast/s_span.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
index db102ac7946..9a91be39970 100644
--- a/src/mesa/swrast/s_span.c
+++ b/src/mesa/swrast/s_span.c
@@ -212,10 +212,10 @@ interpolate_active_attribs(struct gl_context *ctx, SWspan *span, GLbitfield attr
 static INLINE void
 interpolate_int_colors(struct gl_context *ctx, SWspan *span)
 {
+#if CHAN_BITS != 32
    const GLuint n = span->end;
    GLuint i;
 
-#if CHAN_BITS != 32
    ASSERT(!(span->arrayMask & SPAN_RGBA));
 #endif
 
-- 
cgit v1.2.3


From e6c64800cc8833fb4083a556c839b51e8ac84a8b Mon Sep 17 00:00:00 2001
From: Henri Verbeet <hverbeet@gmail.com>
Date: Tue, 9 Aug 2011 12:23:47 -0500
Subject: glsl_to_tgsi: improve assignment hack

Fixes StarCraft 2 and Fallout 3 in Wine.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d7a1ba80e1d..aef23e7d207 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1994,15 +1994,17 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
    } else if (ir->rhs->as_expression() &&
               this->instructions.get_tail() &&
               ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
-              type_size(ir->lhs->type) == 1) {
+              type_size(ir->lhs->type) == 1 &&
+              l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
       /* To avoid emitting an extra MOV when assigning an expression to a 
        * variable, emit the last instruction of the expression again, but
        * replace the destination register with the target of the assignment.
        * Dead code elimination will remove the original instruction.
        */
-      glsl_to_tgsi_instruction *inst;
+      glsl_to_tgsi_instruction *inst, *new_inst;
       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
-      emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+      new_inst->saturate = inst->saturate;
    } else {
       for (i = 0; i < type_size(ir->lhs->type); i++) {
          emit(ir, TGSI_OPCODE_MOV, l, r);
-- 
cgit v1.2.3


From fa43477fa33c068915283d511b64e3d6470ccd73 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 May 2011 13:27:33 -0700
Subject: mesa: Add a convenience interface for register allocator conflicts
 setup.

---
 src/mesa/program/register_allocate.c | 21 +++++++++++++++++++++
 src/mesa/program/register_allocate.h |  2 ++
 2 files changed, 23 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c
index de96eb42c9b..f5b5174fc18 100644
--- a/src/mesa/program/register_allocate.c
+++ b/src/mesa/program/register_allocate.c
@@ -200,6 +200,27 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
    }
 }
 
+/**
+ * Adds a conflict between base_reg and reg, and also between reg and
+ * anything that base_reg conflicts with.
+ *
+ * This can simplify code for setting up multiple register classes
+ * which are aggregates of some base hardware registers, compared to
+ * explicitly using ra_add_reg_conflict.
+ */
+void
+ra_add_transitive_reg_conflict(struct ra_regs *regs,
+			       unsigned int base_reg, unsigned int reg)
+{
+   int i;
+
+   ra_add_reg_conflict(regs, reg, base_reg);
+
+   for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) {
+      ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]);
+   }
+}
+
 unsigned int
 ra_alloc_reg_class(struct ra_regs *regs)
 {
diff --git a/src/mesa/program/register_allocate.h b/src/mesa/program/register_allocate.h
index 5b95833f394..ee2e58a4756 100644
--- a/src/mesa/program/register_allocate.h
+++ b/src/mesa/program/register_allocate.h
@@ -40,6 +40,8 @@ struct ra_regs *ra_alloc_reg_set(unsigned int count);
 unsigned int ra_alloc_reg_class(struct ra_regs *regs);
 void ra_add_reg_conflict(struct ra_regs *regs,
 			 unsigned int r1, unsigned int r2);
+void ra_add_transitive_reg_conflict(struct ra_regs *regs,
+				    unsigned int base_reg, unsigned int reg);
 void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg);
 void ra_set_finalize(struct ra_regs *regs);
 /** @} */
-- 
cgit v1.2.3


From bbcf13adbe525bd389a65ba15dd7831a56b8b13c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 May 2011 13:31:01 -0700
Subject: i965/fs: Use the new convenience interface for setting up reg
 conflicts.

That code I wrote was impenetrable, and hard to write the first time.
This makes things a lot more obvious.
---
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 29 ++++++-----------------
 1 file changed, 7 insertions(+), 22 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 78daa491156..f246ac49660 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -157,29 +157,14 @@ fs_visitor::assign_regs()
       classes[i] = ra_alloc_reg_class(regs);
 
       for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
-	 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
-      }
+	 int class_reg = class_base_reg[i] + i_r;
 
-      /* Add conflicts between our contiguous registers aliasing
-       * base regs and other register classes' contiguous registers
-       * that alias base regs, or the base regs themselves for classes[0].
-       */
-      for (int c = 0; c <= i; c++) {
-	 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
-	    for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
-		 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
-		 c_r++) {
-
-	       if (0) {
-		  printf("%d/%d conflicts %d/%d\n",
-			 class_sizes[i], first_assigned_grf + i_r,
-			 class_sizes[c], first_assigned_grf + c_r);
-	       }
-
-	       ra_add_reg_conflict(regs,
-				   class_base_reg[i] + i_r,
-				   class_base_reg[c] + c_r);
-	    }
+	 ra_class_add_reg(regs, classes[i], class_reg);
+
+	 for (int base_reg = i_r;
+	      base_reg < i_r + class_sizes[i];
+	      base_reg++) {
+	    ra_add_transitive_reg_conflict(regs, base_reg, class_reg);
 	 }
       }
    }
-- 
cgit v1.2.3


From b76378d46a211521582cfab56dc05031a57502a6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 May 2011 13:50:13 -0700
Subject: i965/fs: Eliminate the magic nature of virtual GRF 0.

This was a debugging aid at one point -- virtual grf 0 should never be
allocated, and it would be used if undefined register access occurred
in codegen.  However, it made the confusing register allocation code
even more confusing by indexing things off of 1 all over.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp              |  9 +++----
 src/mesa/drivers/dri/i965/brw_fs.h                |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 33 +++++++++--------------
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp      |  4 +--
 4 files changed, 17 insertions(+), 31 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f55be022f72..d57a67cc4fc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -181,9 +181,6 @@ fs_visitor::virtual_grf_alloc(int size)
 	 virtual_grf_array_size *= 2;
       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 				   virtual_grf_array_size);
-
-      /* This slot is always unused. */
-      virtual_grf_sizes[0] = 0;
    }
    virtual_grf_sizes[virtual_grf_next] = size;
    return virtual_grf_next++;
@@ -985,7 +982,7 @@ fs_visitor::calculate_live_intervals()
 	 }
       } else {
 	 for (unsigned int i = 0; i < 3; i++) {
-	    if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
+	    if (inst->src[i].file == GRF) {
 	       int reg = inst->src[i].reg;
 
 	       if (!loop_depth) {
@@ -1001,7 +998,7 @@ fs_visitor::calculate_live_intervals()
 	       }
 	    }
 	 }
-	 if (inst->dst.file == GRF && inst->dst.reg != 0) {
+	 if (inst->dst.file == GRF) {
 	    int reg = inst->dst.reg;
 
 	    if (!loop_depth) {
@@ -1715,7 +1712,7 @@ fs_visitor::run()
       if (0) {
 	 /* Debug of register spilling: Go spill everything. */
 	 int virtual_grf_count = virtual_grf_next;
-	 for (int i = 1; i < virtual_grf_count; i++) {
+	 for (int i = 0; i < virtual_grf_count; i++) {
 	    spill_reg(i);
 	 }
       }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 96e1420038f..0375f672bec 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -402,7 +402,7 @@ public:
       this->base_ir = NULL;
 
       this->virtual_grf_sizes = NULL;
-      this->virtual_grf_next = 1;
+      this->virtual_grf_next = 0;
       this->virtual_grf_array_size = 0;
       this->virtual_grf_def = NULL;
       this->virtual_grf_use = NULL;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index f246ac49660..83dd629aafb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -50,7 +50,7 @@ extern "C" {
 static void
 assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
 {
-   if (reg->file == GRF && reg->reg != 0) {
+   if (reg->file == GRF) {
       assert(reg->reg_offset >= 0);
       reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
       reg->reg = 0;
@@ -60,20 +60,17 @@ assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
 void
 fs_visitor::assign_regs_trivial()
 {
-   int last_grf = 0;
-   int hw_reg_mapping[this->virtual_grf_next];
+   int hw_reg_mapping[this->virtual_grf_next + 1];
    int i;
    int reg_width = c->dispatch_width / 8;
 
-   hw_reg_mapping[0] = 0;
    /* Note that compressed instructions require alignment to 2 registers. */
-   hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width);
-   for (i = 2; i < this->virtual_grf_next; i++) {
+   hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
+   for (i = 1; i <= this->virtual_grf_next; i++) {
       hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
 			   this->virtual_grf_sizes[i - 1] * reg_width);
    }
-   last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] *
-				       reg_width);
+   this->grf_used = hw_reg_mapping[this->virtual_grf_next];
 
    foreach_list(node, &this->instructions) {
       fs_inst *inst = (fs_inst *)node;
@@ -83,12 +80,11 @@ fs_visitor::assign_regs_trivial()
       assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
    }
 
-   if (last_grf >= BRW_MAX_GRF) {
+   if (this->grf_used >= BRW_MAX_GRF) {
       fail("Ran out of regs on trivial allocator (%d/%d)\n",
-	   last_grf, BRW_MAX_GRF);
+	   this->grf_used, BRW_MAX_GRF);
    }
 
-   this->grf_used = last_grf + reg_width;
 }
 
 bool
@@ -101,7 +97,7 @@ fs_visitor::assign_regs()
     * for reg_width == 2.
     */
    int reg_width = c->dispatch_width / 8;
-   int hw_reg_mapping[this->virtual_grf_next + 1];
+   int hw_reg_mapping[this->virtual_grf_next];
    int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
    int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
    int class_sizes[base_reg_count];
@@ -125,7 +121,7 @@ fs_visitor::assign_regs()
        */
       class_sizes[class_count++] = 2;
    }
-   for (int r = 1; r < this->virtual_grf_next; r++) {
+   for (int r = 0; r < this->virtual_grf_next; r++) {
       int i;
 
       for (i = 0; i < class_count; i++) {
@@ -195,12 +191,8 @@ fs_visitor::assign_regs()
 
    struct ra_graph *g = ra_alloc_interference_graph(regs,
 						    this->virtual_grf_next);
-   /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
-    * with nodes.
-    */
-   ra_set_node_class(g, 0, classes[0]);
 
-   for (int i = 1; i < this->virtual_grf_next; i++) {
+   for (int i = 0; i < this->virtual_grf_next; i++) {
       for (int c = 0; c < class_count; c++) {
 	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
 	    if (aligned_pair_class >= 0 &&
@@ -213,7 +205,7 @@ fs_visitor::assign_regs()
 	 }
       }
 
-      for (int j = 1; j < i; j++) {
+      for (int j = 0; j < i; j++) {
 	 if (virtual_grf_interferes(i, j)) {
 	    ra_add_node_interference(g, i, j);
 	 }
@@ -248,8 +240,7 @@ fs_visitor::assign_regs()
     * numbers.
     */
    this->grf_used = first_assigned_grf;
-   hw_reg_mapping[0] = 0; /* unused */
-   for (int i = 1; i < this->virtual_grf_next; i++) {
+   for (int i = 0; i < this->virtual_grf_next; i++) {
       int reg = ra_get_node_reg(g, i);
       int hw_reg = -1;
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2b769ccbba1..2e3f9be75b4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -142,9 +142,7 @@ fs_visitor::visit(ir_dereference_array *ir)
    this->result.type = brw_type_for_base_type(ir->type);
 
    if (index) {
-      assert(this->result.file == UNIFORM ||
-	     (this->result.file == GRF &&
-	      this->result.reg != 0));
+      assert(this->result.file == UNIFORM || this->result.file == GRF);
       this->result.reg_offset += index->value.i[0] * element_size;
    } else {
       assert(!"FINISHME: non-constant array element");
-- 
cgit v1.2.3


From 4e10d5825b31d2c58c0af3e29b7fc2eacb2b4709 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 5 May 2011 19:37:10 -0700
Subject: i965/fs: Simplify the register allocator using a map from RA reg to
 GRF.

It's fewer pointers to track, and when we start caching the register
set, should be algorithmically better in the cache hit case (lookup in
a byte-per-register array, instead of a linear walk through
desctiption of register classes to find how to translate that class).
---
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 79 +++++++++++------------
 1 file changed, 38 insertions(+), 41 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 83dd629aafb..42ab66df6d8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -102,7 +102,7 @@ fs_visitor::assign_regs()
    int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
    int class_sizes[base_reg_count];
    int class_count = 0;
-   int aligned_pair_class = -1;
+   int aligned_pairs_class = -1;
 
    calculate_live_intervals();
 
@@ -137,52 +137,59 @@ fs_visitor::assign_regs()
       }
    }
 
+   /* Compute the total number of registers across all classes. */
    int ra_reg_count = 0;
-   int class_base_reg[class_count];
-   int class_reg_count[class_count];
-   int classes[class_count + 1];
-
    for (int i = 0; i < class_count; i++) {
-      class_base_reg[i] = ra_reg_count;
-      class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
-      ra_reg_count += class_reg_count[i];
+      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
    }
 
    struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
+   uint8_t ra_reg_to_grf[ra_reg_count];
+   int classes[class_count + 1];
+
+   /* Now, add the registers to their classes, and add the conflicts
+    * between them and the base GRF registers (and also each other).
+    */
+   int reg = 0;
+   int pairs_base_reg = 0;
+   int pairs_reg_count = 0;
    for (int i = 0; i < class_count; i++) {
+      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
       classes[i] = ra_alloc_reg_class(regs);
 
-      for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
-	 int class_reg = class_base_reg[i] + i_r;
+      /* Save this off for the aligned pair class at the end. */
+      if (class_sizes[i] == 2) {
+	 pairs_base_reg = reg;
+	 pairs_reg_count = class_reg_count;
+      }
+
+      for (int j = 0; j < class_reg_count; j++) {
+	 ra_class_add_reg(regs, classes[i], reg);
 
-	 ra_class_add_reg(regs, classes[i], class_reg);
+	 ra_reg_to_grf[reg] = j;
 
-	 for (int base_reg = i_r;
-	      base_reg < i_r + class_sizes[i];
+	 for (int base_reg = j;
+	      base_reg < j + class_sizes[i];
 	      base_reg++) {
-	    ra_add_transitive_reg_conflict(regs, base_reg, class_reg);
+	    ra_add_transitive_reg_conflict(regs, base_reg, reg);
 	 }
+
+	 reg++;
       }
    }
+   assert(reg == ra_reg_count);
 
    /* Add a special class for aligned pairs, which we'll put delta_x/y
     * in on gen5 so that we can do PLN.
     */
    if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
-      int reg_count = (base_reg_count - 1) / 2;
-      int unaligned_pair_class = 1;
-      assert(class_sizes[unaligned_pair_class] == 2);
-
-      aligned_pair_class = class_count;
-      classes[aligned_pair_class] = ra_alloc_reg_class(regs);
-      class_sizes[aligned_pair_class] = 2;
-      class_base_reg[aligned_pair_class] = 0;
-      class_reg_count[aligned_pair_class] = 0;
-      int start = (first_assigned_grf & 1) ? 1 : 0;
-
-      for (int i = 0; i < reg_count; i++) {
-	 ra_class_add_reg(regs, classes[aligned_pair_class],
-			  class_base_reg[unaligned_pair_class] + i * 2 + start);
+      aligned_pairs_class = ra_alloc_reg_class(regs);
+
+      for (int i = 0; i < pairs_reg_count; i++) {
+	 if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
+	    ra_class_add_reg(regs, aligned_pairs_class,
+			     pairs_base_reg + i);
+	 }
       }
       class_count++;
    }
@@ -195,9 +202,9 @@ fs_visitor::assign_regs()
    for (int i = 0; i < this->virtual_grf_next; i++) {
       for (int c = 0; c < class_count; c++) {
 	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
-	    if (aligned_pair_class >= 0 &&
+	    if (aligned_pairs_class >= 0 &&
 		this->delta_x.reg == i) {
-	       ra_set_node_class(g, i, classes[aligned_pair_class]);
+	       ra_set_node_class(g, i, aligned_pairs_class);
 	    } else {
 	       ra_set_node_class(g, i, classes[c]);
 	    }
@@ -242,18 +249,8 @@ fs_visitor::assign_regs()
    this->grf_used = first_assigned_grf;
    for (int i = 0; i < this->virtual_grf_next; i++) {
       int reg = ra_get_node_reg(g, i);
-      int hw_reg = -1;
-
-      for (int c = 0; c < class_count; c++) {
-	 if (reg >= class_base_reg[c] &&
-	     reg < class_base_reg[c] + class_reg_count[c]) {
-	    hw_reg = reg - class_base_reg[c];
-	    break;
-	 }
-      }
 
-      assert(hw_reg >= 0);
-      hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width;
+      hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width;
       this->grf_used = MAX2(this->grf_used,
 			    hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
 			    reg_width);
-- 
cgit v1.2.3


From b1f0bffd399f377a19b0541e1d834afad8b9dad0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 9 May 2011 09:56:18 -0700
Subject: i965/fs: Factor out the register allocator setup to a separate
 function.

Besides separating out a logical step of the giant register allocator
function, this now communicates a bunch of the allocator information
through entries in brw_context, which will make this code partially
reusable for caching the expensive allocator setup.
---
 src/mesa/drivers/dri/i965/brw_context.h           |  23 ++++
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 148 ++++++++++++----------
 2 files changed, 105 insertions(+), 66 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 22baf978ad4..cc11d06874d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -748,6 +748,29 @@ struct brw_context
        * Pre-gen6, push constants live in the CURBE.
        */
       uint32_t push_const_offset;
+
+      /** @{ register allocator */
+
+      struct ra_regs *regs;
+
+      /** Array of the ra classes for the unaligned contiguous
+       * register block sizes used.
+       */
+      int *classes;
+
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
+
+      /**
+       * ra class for the aligned pairs we use for PLN, which doesn't
+       * appear in *classes.
+       */
+      int aligned_pairs_class;
+
+      /** @} */
    } wm;
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 42ab66df6d8..8e44a010576 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -87,55 +87,14 @@ fs_visitor::assign_regs_trivial()
 
 }
 
-bool
-fs_visitor::assign_regs()
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+			      int *class_sizes,
+			      int class_count,
+			      int reg_width,
+			      int base_reg_count)
 {
-   /* Most of this allocation was written for a reg_width of 1
-    * (dispatch_width == 8).  In extending to 16-wide, the code was
-    * left in place and it was converted to have the hardware
-    * registers it's allocating be contiguous physical pairs of regs
-    * for reg_width == 2.
-    */
-   int reg_width = c->dispatch_width / 8;
-   int hw_reg_mapping[this->virtual_grf_next];
-   int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
-   int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
-   int class_sizes[base_reg_count];
-   int class_count = 0;
-   int aligned_pairs_class = -1;
-
-   calculate_live_intervals();
-
-   /* Set up the register classes.
-    *
-    * The base registers store a scalar value.  For texture samples,
-    * we get virtual GRFs composed of 4 contiguous hw register.  For
-    * structures and arrays, we store them as contiguous larger things
-    * than that, though we should be able to do better most of the
-    * time.
-    */
-   class_sizes[class_count++] = 1;
-   if (brw->has_pln && intel->gen < 6) {
-      /* Always set up the (unaligned) pairs for gen5, so we can find
-       * them for making the aligned pair class.
-       */
-      class_sizes[class_count++] = 2;
-   }
-   for (int r = 0; r < this->virtual_grf_next; r++) {
-      int i;
-
-      for (i = 0; i < class_count; i++) {
-	 if (class_sizes[i] == this->virtual_grf_sizes[r])
-	    break;
-      }
-      if (i == class_count) {
-	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
-	    fail("Object too large to register allocate.\n");
-	 }
-
-	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
-      }
-   }
+   struct intel_context *intel = &brw->intel;
 
    /* Compute the total number of registers across all classes. */
    int ra_reg_count = 0;
@@ -143,9 +102,14 @@ fs_visitor::assign_regs()
       ra_reg_count += base_reg_count - (class_sizes[i] - 1);
    }
 
-   struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
-   uint8_t ra_reg_to_grf[ra_reg_count];
-   int classes[class_count + 1];
+   ralloc_free(brw->wm.ra_reg_to_grf);
+   brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+   ralloc_free(brw->wm.regs);
+   brw->wm.regs = ra_alloc_reg_set(ra_reg_count);
+   ralloc_free(brw->wm.classes);
+   brw->wm.classes = ralloc_array(brw, int, class_count + 1);
+
+   brw->wm.aligned_pairs_class = -1;
 
    /* Now, add the registers to their classes, and add the conflicts
     * between them and the base GRF registers (and also each other).
@@ -155,7 +119,7 @@ fs_visitor::assign_regs()
    int pairs_reg_count = 0;
    for (int i = 0; i < class_count; i++) {
       int class_reg_count = base_reg_count - (class_sizes[i] - 1);
-      classes[i] = ra_alloc_reg_class(regs);
+      brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs);
 
       /* Save this off for the aligned pair class at the end. */
       if (class_sizes[i] == 2) {
@@ -164,14 +128,14 @@ fs_visitor::assign_regs()
       }
 
       for (int j = 0; j < class_reg_count; j++) {
-	 ra_class_add_reg(regs, classes[i], reg);
+	 ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg);
 
-	 ra_reg_to_grf[reg] = j;
+	 brw->wm.ra_reg_to_grf[reg] = j;
 
 	 for (int base_reg = j;
 	      base_reg < j + class_sizes[i];
 	      base_reg++) {
-	    ra_add_transitive_reg_conflict(regs, base_reg, reg);
+	    ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg);
 	 }
 
 	 reg++;
@@ -183,30 +147,83 @@ fs_visitor::assign_regs()
     * in on gen5 so that we can do PLN.
     */
    if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
-      aligned_pairs_class = ra_alloc_reg_class(regs);
+      brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs);
 
       for (int i = 0; i < pairs_reg_count; i++) {
-	 if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
-	    ra_class_add_reg(regs, aligned_pairs_class,
+	 if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
+	    ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class,
 			     pairs_base_reg + i);
 	 }
       }
       class_count++;
    }
 
-   ra_set_finalize(regs);
+   ra_set_finalize(brw->wm.regs);
+}
+
+bool
+fs_visitor::assign_regs()
+{
+   /* Most of this allocation was written for a reg_width of 1
+    * (dispatch_width == 8).  In extending to 16-wide, the code was
+    * left in place and it was converted to have the hardware
+    * registers it's allocating be contiguous physical pairs of regs
+    * for reg_width == 2.
+    */
+   int reg_width = c->dispatch_width / 8;
+   int hw_reg_mapping[this->virtual_grf_next];
+   int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
+   int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
+   int class_sizes[base_reg_count];
+   int class_count = 0;
+
+   calculate_live_intervals();
+
+   /* Set up the register classes.
+    *
+    * The base registers store a scalar value.  For texture samples,
+    * we get virtual GRFs composed of 4 contiguous hw register.  For
+    * structures and arrays, we store them as contiguous larger things
+    * than that, though we should be able to do better most of the
+    * time.
+    */
+   class_sizes[class_count++] = 1;
+   if (brw->has_pln && intel->gen < 6) {
+      /* Always set up the (unaligned) pairs for gen5, so we can find
+       * them for making the aligned pair class.
+       */
+      class_sizes[class_count++] = 2;
+   }
+   for (int r = 0; r < this->virtual_grf_next; r++) {
+      int i;
+
+      for (i = 0; i < class_count; i++) {
+	 if (class_sizes[i] == this->virtual_grf_sizes[r])
+	    break;
+      }
+      if (i == class_count) {
+	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
+	    fail("Object too large to register allocate.\n");
+	 }
+
+	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
+      }
+   }
+
+   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count,
+				 reg_width, base_reg_count);
 
-   struct ra_graph *g = ra_alloc_interference_graph(regs,
+   struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs,
 						    this->virtual_grf_next);
 
    for (int i = 0; i < this->virtual_grf_next; i++) {
       for (int c = 0; c < class_count; c++) {
 	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
-	    if (aligned_pairs_class >= 0 &&
+	    if (brw->wm.aligned_pairs_class >= 0 &&
 		this->delta_x.reg == i) {
-	       ra_set_node_class(g, i, aligned_pairs_class);
+	       ra_set_node_class(g, i, brw->wm.aligned_pairs_class);
 	    } else {
-	       ra_set_node_class(g, i, classes[c]);
+	       ra_set_node_class(g, i, brw->wm.classes[c]);
 	    }
 	    break;
 	 }
@@ -237,7 +254,6 @@ fs_visitor::assign_regs()
 
 
       ralloc_free(g);
-      ralloc_free(regs);
 
       return false;
    }
@@ -250,7 +266,8 @@ fs_visitor::assign_regs()
    for (int i = 0; i < this->virtual_grf_next; i++) {
       int reg = ra_get_node_reg(g, i);
 
-      hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width;
+      hw_reg_mapping[i] = (first_assigned_grf +
+			   brw->wm.ra_reg_to_grf[reg] * reg_width);
       this->grf_used = MAX2(this->grf_used,
 			    hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
 			    reg_width);
@@ -265,7 +282,6 @@ fs_visitor::assign_regs()
    }
 
    ralloc_free(g);
-   ralloc_free(regs);
 
    return true;
 }
-- 
cgit v1.2.3


From c9e81fe14f36933617c862efb15ae09194485eab Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 15 May 2011 09:36:19 -0700
Subject: i965: Drop the reg/hw_reg distinction.

"reg" was set in only one case, virtual GRFs pre register allocation,
and would be unset and have hw_reg set after allocation.  Since we
never bothered with looking at virtual GRF number after allocation
anyway, just use the same storage and avoid confusion.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp               | 34 +++++++++++-----------
 src/mesa/drivers/dri/i965/brw_fs.h                 | 21 +++++++------
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp          |  6 ++--
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |  4 +--
 .../dri/i965/brw_fs_schedule_instructions.cpp      |  8 ++---
 5 files changed, 37 insertions(+), 36 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d57a67cc4fc..cafb7092ac8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -187,20 +187,20 @@ fs_visitor::virtual_grf_alloc(int size)
 }
 
 /** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg)
+fs_reg::fs_reg(enum register_file file, int reg)
 {
    init();
    this->file = file;
-   this->hw_reg = hw_reg;
+   this->reg = reg;
    this->type = BRW_REGISTER_TYPE_F;
 }
 
 /** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type)
+fs_reg::fs_reg(enum register_file file, int reg, uint32_t type)
 {
    init();
    this->file = file;
-   this->hw_reg = hw_reg;
+   this->reg = reg;
    this->type = type;
 }
 
@@ -636,7 +636,7 @@ fs_visitor::assign_curb_setup()
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == UNIFORM) {
-	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+	    int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
 	    struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
 						  constant_nr / 8,
 						  constant_nr % 8);
@@ -810,7 +810,7 @@ fs_visitor::remove_dead_constants()
 	 fs_inst *inst = (fs_inst *)node;
 
 	 for (int i = 0; i < 3; i++) {
-	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+	    int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
 
 	    if (inst->src[i].file != UNIFORM)
 	       continue;
@@ -862,13 +862,13 @@ fs_visitor::remove_dead_constants()
       fs_inst *inst = (fs_inst *)node;
 
       for (int i = 0; i < 3; i++) {
-	 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+	 int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
 
 	 if (inst->src[i].file != UNIFORM)
 	    continue;
 
 	 assert(this->params_remap[constant_nr] != -1);
-	 inst->src[i].hw_reg = this->params_remap[constant_nr];
+	 inst->src[i].reg = this->params_remap[constant_nr];
 	 inst->src[i].reg_offset = 0;
       }
    }
@@ -912,7 +912,7 @@ fs_visitor::setup_pull_constants()
 	 if (inst->src[i].file != UNIFORM)
 	    continue;
 
-	 int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+	 int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
 	 if (uniform_nr < pull_uniform_base)
 	    continue;
 
@@ -1374,9 +1374,9 @@ fs_visitor::compute_to_mrf()
       /* Work out which hardware MRF registers are written by this
        * instruction.
        */
-      int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+      int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
       int mrf_high;
-      if (inst->dst.hw_reg & BRW_MRF_COMPR4) {
+      if (inst->dst.reg & BRW_MRF_COMPR4) {
 	 mrf_high = mrf_low + 4;
       } else if (c->dispatch_width == 16 &&
 		 (!inst->force_uncompressed && !inst->force_sechalf)) {
@@ -1443,7 +1443,7 @@ fs_visitor::compute_to_mrf()
 	    if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
 	       /* Found the creator of our MRF's source value. */
 	       scan_inst->dst.file = MRF;
-	       scan_inst->dst.hw_reg = inst->dst.hw_reg;
+	       scan_inst->dst.reg = inst->dst.reg;
 	       scan_inst->saturate |= inst->saturate;
 	       inst->remove();
 	       progress = true;
@@ -1480,10 +1480,10 @@ fs_visitor::compute_to_mrf()
 	    /* If somebody else writes our MRF here, we can't
 	     * compute-to-MRF before that.
 	     */
-	    int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+	    int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
 	    int scan_mrf_high;
 
-	    if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) {
+	    if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
 	       scan_mrf_high = scan_mrf_low + 4;
 	    } else if (c->dispatch_width == 16 &&
 		       (!scan_inst->force_uncompressed &&
@@ -1555,7 +1555,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 
       if (inst->opcode == BRW_OPCODE_MOV &&
 	  inst->dst.file == MRF) {
-	 fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg];
+	 fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
 	 if (prev_inst && inst->equals(prev_inst)) {
 	    inst->remove();
 	    progress = true;
@@ -1565,7 +1565,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 
       /* Clear out the last-write records for MRFs that were overwritten. */
       if (inst->dst.file == MRF) {
-	 last_mrf_move[inst->dst.hw_reg] = NULL;
+	 last_mrf_move[inst->dst.reg] = NULL;
       }
 
       if (inst->mlen > 0) {
@@ -1591,7 +1591,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 	  inst->dst.file == MRF &&
 	  inst->src[0].file == GRF &&
 	  !inst->predicated) {
-	 last_mrf_move[inst->dst.hw_reg] = inst;
+	 last_mrf_move[inst->dst.reg] = inst;
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 0375f672bec..4ec649014de 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -51,7 +51,7 @@ enum register_file {
    MRF = BRW_MESSAGE_REGISTER_FILE,
    IMM = BRW_IMMEDIATE_VALUE,
    FIXED_HW_REG, /* a struct brw_reg */
-   UNIFORM, /* prog_data->params[hw_reg] */
+   UNIFORM, /* prog_data->params[reg] */
    BAD_FILE
 };
 
@@ -99,7 +99,6 @@ public:
    void init()
    {
       memset(this, 0, sizeof(*this));
-      this->hw_reg = -1;
       this->smear = -1;
    }
 
@@ -146,8 +145,8 @@ public:
       this->type = fixed_hw_reg.type;
    }
 
-   fs_reg(enum register_file file, int hw_reg);
-   fs_reg(enum register_file file, int hw_reg, uint32_t type);
+   fs_reg(enum register_file file, int reg);
+   fs_reg(enum register_file file, int reg, uint32_t type);
    fs_reg(class fs_visitor *v, const struct glsl_type *type);
 
    bool equals(fs_reg *r)
@@ -155,7 +154,6 @@ public:
       return (file == r->file &&
 	      reg == r->reg &&
 	      reg_offset == r->reg_offset &&
-	      hw_reg == r->hw_reg &&
 	      type == r->type &&
 	      negate == r->negate &&
 	      abs == r->abs &&
@@ -167,12 +165,17 @@ public:
 
    /** Register file: ARF, GRF, MRF, IMM. */
    enum register_file file;
-   /** virtual register number.  0 = fixed hw reg */
+   /**
+    * Register number.  For ARF/MRF, it's the hardware register.  For
+    * GRF, it's a virtual register number until register allocation
+    */
    int reg;
-   /** Offset within the virtual register. */
+   /**
+    * For virtual registers, this is a hardware register offset from
+    * the start of the register block (for example, a constant index
+    * in an array access).
+    */
    int reg_offset;
-   /** HW register number.  Generally unset until register allocation. */
-   int hw_reg;
    /** Register type.  BRW_REGISTER_TYPE_* */
    int type;
    bool negate;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 9fb0153d1f8..e168e541bef 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -538,11 +538,9 @@ brw_reg_from_fs_reg(fs_reg *reg)
    case ARF:
    case MRF:
       if (reg->smear == -1) {
-	 brw_reg = brw_vec8_reg(reg->file,
-				reg->hw_reg, 0);
+	 brw_reg = brw_vec8_reg(reg->file, reg->reg, 0);
       } else {
-	 brw_reg = brw_vec1_reg(reg->file,
-				reg->hw_reg, reg->smear);
+	 brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear);
       }
       brw_reg = retype(brw_reg, reg->type);
       if (reg->sechalf)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 8e44a010576..5c9cba99ae5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -52,8 +52,8 @@ assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
 {
    if (reg->file == GRF) {
       assert(reg->reg_offset >= 0);
-      reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
-      reg->reg = 0;
+      reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
+      reg->reg_offset = 0;
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index 9ec3f502764..f1a88fcfa79 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -321,12 +321,12 @@ instruction_scheduler::calculate_deps()
 	 add_dep(last_grf_write[inst->dst.reg], n);
 	 last_grf_write[inst->dst.reg] = n;
       } else if (inst->dst.file == MRF) {
-	 int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+	 int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
 
 	 add_dep(last_mrf_write[reg], n);
 	 last_mrf_write[reg] = n;
 	 if (is_compressed(inst)) {
-	    if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+	    if (inst->dst.reg & BRW_MRF_COMPR4)
 	       reg += 4;
 	    else
 	       reg++;
@@ -401,12 +401,12 @@ instruction_scheduler::calculate_deps()
       if (inst->dst.file == GRF) {
 	 last_grf_write[inst->dst.reg] = n;
       } else if (inst->dst.file == MRF) {
-	 int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+	 int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
 
 	 last_mrf_write[reg] = n;
 
 	 if (is_compressed(inst)) {
-	    if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+	    if (inst->dst.reg & BRW_MRF_COMPR4)
 	       reg += 4;
 	    else
 	       reg++;
-- 
cgit v1.2.3


From 09eeb0ff27005c0ffccd5cdbe46862e181a4ee6c Mon Sep 17 00:00:00 2001
From: Carl Simonson <simonsonc@gmail.com>
Date: Wed, 10 Aug 2011 11:10:43 -0700
Subject: i830: Add missing vtable entry for i830 from the hiz work.

---
 src/mesa/drivers/dri/i915/i830_vtbl.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c
index 6d43726beb1..ed5286fd7d9 100644
--- a/src/mesa/drivers/dri/i915/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@@ -881,6 +881,12 @@ i830_invalidate_state(struct intel_context *intel, GLuint new_state)
       i830_update_provoking_vertex(&intel->ctx);
 }
 
+static bool
+i830_is_hiz_depth_format(struct intel_context *intel, gl_format format)
+{
+   return false;
+}
+
 void
 i830InitVtbl(struct i830_context *i830)
 {
@@ -898,4 +904,5 @@ i830InitVtbl(struct i830_context *i830)
    i830->intel.vtbl.finish_batch = intel_finish_vb;
    i830->intel.vtbl.invalidate_state = i830_invalidate_state;
    i830->intel.vtbl.render_target_supported = i830_render_target_supported;
+   i830->intel.vtbl.is_hiz_depth_format = i830_is_hiz_depth_format;
 }
-- 
cgit v1.2.3


From fa351bd2e0aecccd5ed6ef8744d5ba4a6dbf5d2c Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Sun, 7 Aug 2011 17:04:04 -0700
Subject: intel: GetBuffer fix

After copy buffer on preGEN6, it is necessary to wait for the blit to
complete before returning data to the user.

This should fix the piglit test: copy_buffer_coherency (pre-GEN6).

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 439d6fc8247..703300b31af 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -282,12 +282,17 @@ intel_bufferobj_get_subdata(struct gl_context * ctx,
                             GLvoid * data, struct gl_buffer_object *obj)
 {
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   struct intel_context *intel = intel_context(ctx);
 
    assert(intel_obj);
    if (intel_obj->sys_buffer)
       memcpy(data, (char *)intel_obj->sys_buffer + offset, size);
-   else
+   else {
+      if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) {
+	 intel_batchbuffer_flush(intel);
+      }
       drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
+   }
 }
 
 
-- 
cgit v1.2.3


From e411cd7b0a54d2f9b9f4cda4918aa7742ed5c2a6 Mon Sep 17 00:00:00 2001
From: Andreas Fänger <a.faenger@e-sign.com>
Date: Wed, 10 Aug 2011 08:07:29 +0000
Subject: swrast: initial multi-threaded span rendering

Optional parallel rendering of spans using OpenMP.
Initial implementation for aa triangles. A new option for scons is
also provided to activate the openmp support (off by default).

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 common.py                      |  1 +
 scons/gallium.py               | 12 +++++++
 src/mesa/swrast/s_aatritemp.h  | 72 ++++++++++++++++++++++++------------------
 src/mesa/swrast/s_context.c    | 26 +++++++++++----
 src/mesa/swrast/s_texcombine.c |  4 +++
 src/mesa/tnl/t_pipeline.c      | 12 +++++++
 6 files changed, 91 insertions(+), 36 deletions(-)

(limited to 'src/mesa')

diff --git a/common.py b/common.py
index 8657030ea3f..cfee1b5dc2e 100644
--- a/common.py
+++ b/common.py
@@ -88,6 +88,7 @@ def AddOptions(opts):
 	opts.Add('toolchain', 'compiler toolchain', default_toolchain)
 	opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no'))
 	opts.Add(BoolOption('llvm', 'use LLVM', default_llvm))
+	opts.Add(BoolOption('openmp', 'EXPERIMENTAL: compile with openmp (swrast)', 'no'))
 	opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes'))
 	opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no'))
 	opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes'))
diff --git a/scons/gallium.py b/scons/gallium.py
index 8cd3bc7f6e0..7135251d7a3 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -596,6 +596,18 @@ def generate(env):
         libs += ['m', 'pthread', 'dl']
     env.Append(LIBS = libs)
 
+    # OpenMP
+    if env['openmp']:
+        if env['msvc']:
+            env.Append(CCFLAGS = ['/openmp'])
+            # When building openmp release VS2008 link.exe crashes with LNK1103 error.
+            # Workaround: overwrite PDB flags with empty value as it isn't required anyways
+            if env['build'] == 'release':
+                env['PDB'] = ''
+        if env['gcc']:
+            env.Append(CCFLAGS = ['-fopenmp'])
+            env.Append(LIBS = ['gomp'])
+
     # Load tools
     env.Tool('lex')
     env.Tool('yacc')
diff --git a/src/mesa/swrast/s_aatritemp.h b/src/mesa/swrast/s_aatritemp.h
index 91d4f7a10ab..77b3ae6ec7a 100644
--- a/src/mesa/swrast/s_aatritemp.h
+++ b/src/mesa/swrast/s_aatritemp.h
@@ -181,13 +181,20 @@
       const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS];
       const GLfloat dxdy = majDx / majDy;
       const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F;
-      GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
       GLint iy;
-      for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span)
+#endif
+      for (iy = iyMin; iy < iyMax; iy++) {
+         GLfloat x = pMin[0] - (yMin - iy) * dxdy;
          GLint ix, startX = (GLint) (x - xAdj);
          GLuint count;
          GLfloat coverage = 0.0F;
 
+#ifdef _OPENMP
+         /* each thread needs to use a different (global) SpanArrays variable */
+         span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num();
+#endif
          /* skip over fragments with zero coverage */
          while (startX < MAX_WIDTH) {
             coverage = compute_coveragef(pMin, pMid, pMax, startX, iy);
@@ -228,13 +235,12 @@
             coverage = compute_coveragef(pMin, pMid, pMax, ix, iy);
          }
          
-         if (ix <= startX)
-            continue;
-         
-         span.x = startX;
-         span.y = iy;
-         span.end = (GLuint) ix - (GLuint) startX;
-         _swrast_write_rgba_span(ctx, &span);
+         if (ix > startX) {
+            span.x = startX;
+            span.y = iy;
+            span.end = (GLuint) ix - (GLuint) startX;
+            _swrast_write_rgba_span(ctx, &span);
+         }
       }
    }
    else {
@@ -244,13 +250,20 @@
       const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS];
       const GLfloat dxdy = majDx / majDy;
       const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F;
-      GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
       GLint iy;
-      for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span)
+#endif
+      for (iy = iyMin; iy < iyMax; iy++) {
+         GLfloat x = pMin[0] - (yMin - iy) * dxdy;
          GLint ix, left, startX = (GLint) (x + xAdj);
          GLuint count, n;
          GLfloat coverage = 0.0F;
          
+#ifdef _OPENMP
+         /* each thread needs to use a different (global) SpanArrays variable */
+         span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num();
+#endif
          /* make sure we're not past the window edge */
          if (startX >= ctx->DrawBuffer->_Xmax) {
             startX = ctx->DrawBuffer->_Xmax - 1;
@@ -296,31 +309,30 @@
          ATTRIB_LOOP_END
 #endif
 
-         if (startX <= ix)
-            continue;
-
-         n = (GLuint) startX - (GLuint) ix;
+         if (startX > ix) {
+            n = (GLuint) startX - (GLuint) ix;
 
-         left = ix + 1;
+            left = ix + 1;
 
-         /* shift all values to the left */
-         /* XXX this is temporary */
-         {
-            SWspanarrays *array = span.array;
-            GLint j;
-            for (j = 0; j < (GLint) n; j++) {
-               array->coverage[j] = array->coverage[j + left];
-               COPY_CHAN4(array->rgba[j], array->rgba[j + left]);
+            /* shift all values to the left */
+            /* XXX this is temporary */
+            {
+               SWspanarrays *array = span.array;
+               GLint j;
+               for (j = 0; j < (GLint) n; j++) {
+                  array->coverage[j] = array->coverage[j + left];
+                  COPY_CHAN4(array->rgba[j], array->rgba[j + left]);
 #ifdef DO_Z
-               array->z[j] = array->z[j + left];
+                  array->z[j] = array->z[j + left];
 #endif
+               }
             }
-         }
 
-         span.x = left;
-         span.y = iy;
-         span.end = n;
-         _swrast_write_rgba_span(ctx, &span);
+            span.x = left;
+            span.y = iy;
+            span.end = n;
+            _swrast_write_rgba_span(ctx, &span);
+         }
       }
    }
 }
diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c
index def1531d7ff..4434f11b990 100644
--- a/src/mesa/swrast/s_context.c
+++ b/src/mesa/swrast/s_context.c
@@ -772,6 +772,11 @@ _swrast_CreateContext( struct gl_context *ctx )
 {
    GLuint i;
    SWcontext *swrast = (SWcontext *)CALLOC(sizeof(SWcontext));
+#ifdef _OPENMP
+   const GLint maxThreads = omp_get_max_threads();
+#else
+   const GLint maxThreads = 1;
+#endif
 
    if (SWRAST_DEBUG) {
       _mesa_debug(ctx, "_swrast_CreateContext\n");
@@ -806,19 +811,25 @@ _swrast_CreateContext( struct gl_context *ctx )
    for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++)
       swrast->TextureSample[i] = NULL;
 
-   swrast->SpanArrays = MALLOC_STRUCT(sw_span_arrays);
+   /* SpanArrays is global and shared by all SWspan instances. However, when
+    * using multiple threads, it is necessary to have one SpanArrays instance
+    * per thread.
+    */
+   swrast->SpanArrays = (SWspanarrays *) MALLOC(maxThreads * sizeof(SWspanarrays));
    if (!swrast->SpanArrays) {
       FREE(swrast);
       return GL_FALSE;
    }
-   swrast->SpanArrays->ChanType = CHAN_TYPE;
+   for(i = 0; i < maxThreads; i++) {
+      swrast->SpanArrays[i].ChanType = CHAN_TYPE;
 #if CHAN_TYPE == GL_UNSIGNED_BYTE
-   swrast->SpanArrays->rgba = swrast->SpanArrays->rgba8;
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba8;
 #elif CHAN_TYPE == GL_UNSIGNED_SHORT
-   swrast->SpanArrays->rgba = swrast->SpanArrays->rgba16;
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba16;
 #else
-   swrast->SpanArrays->rgba = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0];
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].attribs[FRAG_ATTRIB_COL0];
 #endif
+   }
 
    /* init point span buffer */
    swrast->PointSpan.primitive = GL_POINT;
@@ -826,7 +837,10 @@ _swrast_CreateContext( struct gl_context *ctx )
    swrast->PointSpan.facing = 0;
    swrast->PointSpan.array = swrast->SpanArrays;
 
-   swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits *
+   /* TexelBuffer is also global and normally shared by all SWspan instances;
+    * when running with multiple threads, create one per thread.
+    */
+   swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
                                            MAX_WIDTH * 4 * sizeof(GLfloat));
    if (!swrast->TexelBuffer) {
       FREE(swrast->SpanArrays);
diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c
index 086ed0b33d7..80b9dff3cc2 100644
--- a/src/mesa/swrast/s_texcombine.c
+++ b/src/mesa/swrast/s_texcombine.c
@@ -48,7 +48,11 @@ typedef float (*float4_array)[4];
 static INLINE float4_array
 get_texel_array(SWcontext *swrast, GLuint unit)
 {
+#ifdef _OPENMP
+   return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num()));
+#else
    return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4);
+#endif
 }
 
 
diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c
index 18f095f0d4b..881d5d5f535 100644
--- a/src/mesa/tnl/t_pipeline.c
+++ b/src/mesa/tnl/t_pipeline.c
@@ -146,7 +146,17 @@ void _tnl_run_pipeline( struct gl_context *ctx )
 	 _tnl_notify_pipeline_output_change( ctx );
    }
 
+#ifndef _OPENMP
+   /* Don't adjust FPU precision mode in case multiple threads are to be used.
+    * This would require that the additional threads also changed the FPU mode
+    * which is quite a mess as this had to be done in all parallelized sections;
+    * otherwise the master thread and all other threads are running in different
+    * modes, producing inconsistent results.
+    * Note that all x64 implementations don't define/use START_FAST_MATH, so
+    * this is "hack" is only used in i386 mode
+    */
    START_FAST_MATH(__tmp);
+#endif
 
    for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
@@ -154,7 +164,9 @@ void _tnl_run_pipeline( struct gl_context *ctx )
 	 break;
    }
 
+#ifndef _OPENMP
    END_FAST_MATH(__tmp);
+#endif
 }
 
 
-- 
cgit v1.2.3


From 37a64baea87c470a68f9b2582af86783eb3509c4 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 11 Aug 2011 08:52:41 -0600
Subject: swrast: don't try to do depth testing if there's no depth buffer

Fixes piglit hiz-depth-stencil-test-fbo-d0-s8 crash.
See http://bugs.freedesktop.org/show_bug.cgi?id=37907

NOTE: This is a candidate for the 7.11 branch.
---
 src/mesa/swrast/s_stencil.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c
index 5bec71c057b..fa5093a3407 100644
--- a/src/mesa/swrast/s_stencil.c
+++ b/src/mesa/swrast/s_stencil.c
@@ -462,7 +462,8 @@ stencil_and_ztest_span(struct gl_context *ctx, SWspan *span, GLuint face)
     * Some fragments passed the stencil test, apply depth test to them
     * and apply Zpass and Zfail stencil ops.
     */
-   if (ctx->Depth.Test == GL_FALSE) {
+   if (ctx->Depth.Test == GL_FALSE ||
+       ctx->DrawBuffer->_DepthBuffer == NULL) {
       /*
        * No depth buffer, just apply zpass stencil function to active pixels.
        */
-- 
cgit v1.2.3


From 9b8287f8f5398647ced3a52885233d58e548c2b7 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 11 Aug 2011 08:58:08 -0600
Subject: mesa: fix ColorMask array index in _mesa_init_driver_state()

This doesn't really make any difference because all the colormasks
are the same upon context set-up, but it makes more sense.
---
 src/mesa/drivers/common/driverfuncs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 76630264bf7..70f8727a092 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -248,10 +248,10 @@ _mesa_init_driver_state(struct gl_context *ctx)
       GLuint i;
       for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
          ctx->Driver.ColorMaskIndexed(ctx, i,
-                                      ctx->Color.ColorMask[0][RCOMP],
-                                      ctx->Color.ColorMask[0][GCOMP],
-                                      ctx->Color.ColorMask[0][BCOMP],
-                                      ctx->Color.ColorMask[0][ACOMP]);
+                                      ctx->Color.ColorMask[i][RCOMP],
+                                      ctx->Color.ColorMask[i][GCOMP],
+                                      ctx->Color.ColorMask[i][BCOMP],
+                                      ctx->Color.ColorMask[i][ACOMP]);
       }
    }
    else {
-- 
cgit v1.2.3


From 099aad2fb0dba8baff61dc7a6803c6c976c08069 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 11 Aug 2011 09:02:16 -0600
Subject: mesa: fix initialization of GL_FOG_MODE in _mesa_init_driver_state()

---
 src/mesa/drivers/common/driverfuncs.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 70f8727a092..a6174ee2f56 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -286,7 +286,10 @@ _mesa_init_driver_state(struct gl_context *ctx)
    ctx->Driver.Enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE);
 
    ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
-   ctx->Driver.Fogfv(ctx, GL_FOG_MODE, 0);
+   {
+      GLfloat mode = (GLfloat) ctx->Fog.Mode;
+      ctx->Driver.Fogfv(ctx, GL_FOG_MODE, &mode);
+   }
    ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
    ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
    ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
-- 
cgit v1.2.3


From 9cd64ec35acd54cbe0be4d03236d2c5a9d4be6fe Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Wed, 10 Aug 2011 15:46:14 -0700
Subject: x86-64: Fix compile error with clang

Remove the 'f' suffix from a float literal.
    - .float 0.0f+1.0
    + .float 1.0

This fixes the following compile error with clang:
    error: unexpected token in directive
    .float 0.0f+1.0
              ^

Note: This is a candidate for the stable branches.
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/x86-64/xform4.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S
index 6141e434679..5abd5a25de5 100644
--- a/src/mesa/x86-64/xform4.S
+++ b/src/mesa/x86-64/xform4.S
@@ -118,7 +118,7 @@ p4_constants:
 .byte  0x00, 0x00, 0x00, 0x00
 .byte  0x00, 0x00, 0x00, 0x00
 .byte  0x00, 0x00, 0x00, 0x00
-.float 0f+1.0
+.float 1.0
 
 .text
 .align 16
-- 
cgit v1.2.3


From 5076561b35b9c2c78f277ab03bf1e642094ee20e Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Mon, 8 Aug 2011 10:14:44 +0900
Subject: glapi: use gl_and_es_API.xml to generate GLES headers

glapi/gen-es/ defines two sets of GLAPI XMLs for OpenGL ES 1.1
(es1_API.xml) and 2.0 (es2_API.xml) respectively.  They are used to
generate dispatch.h and remap_helper.h for GLES.  Together with
gl_and_es_API.xml, we have to maintain three sets of GLAPI XMLs.

This commit makes dispatch.h and remap_helper.h for GLES be generated
from gl_and_es_API.xml.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mapi/glapi/gen-es/Makefile     | 10 ++++++----
 src/mapi/glapi/gen/Makefile        |  6 ++----
 src/mapi/glapi/gen/gl_table.py     | 20 +++++++++++++++-----
 src/mapi/glapi/gen/remap_helper.py | 18 ++++++++++++++++--
 src/mesa/SConscript                | 16 ++++++++--------
 5 files changed, 47 insertions(+), 23 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mapi/glapi/gen-es/Makefile b/src/mapi/glapi/gen-es/Makefile
index bf66ec037cf..3fd539d26d5 100644
--- a/src/mapi/glapi/gen-es/Makefile
+++ b/src/mapi/glapi/gen-es/Makefile
@@ -11,8 +11,8 @@ OUTPUTS :=			\
 COMMON = gl_and_es_API.xml gl_XML.py glX_XML.py license.py typeexpr.py
 COMMON := $(addprefix $(GLAPI)/, $(COMMON))
 
-ES1_APIXML := es1_API.xml
-ES2_APIXML := es2_API.xml
+ES1_APIXML := $(GLAPI)/gl_and_es_API.xml
+ES2_APIXML := $(GLAPI)/gl_and_es_API.xml
 ES1_OUTPUT_DIR := $(TOP)/src/mapi/es1api
 ES2_OUTPUT_DIR := $(TOP)/src/mapi/es2api
 
@@ -37,10 +37,12 @@ shared-glapi: $(SHARED_GLAPI_OUTPUTS)
 
 $(ES1_OUTPUTS): APIXML := $(ES1_APIXML)
 $(ES1_OUTPUTS): PRINTER := es1api
+$(ES1_OUTPUTS): GLES_VER := es1
 $(ES1_OUTPUTS): $(ES1_DEPS)
 
 $(ES2_OUTPUTS): APIXML := $(ES2_APIXML)
 $(ES2_OUTPUTS): PRINTER := es2api
+$(ES2_OUTPUTS): GLES_VER := es2
 $(ES2_OUTPUTS): $(ES2_DEPS)
 
 $(SHARED_GLAPI_OUTPUTS): APIXML := $(SHARED_GLAPI_APIXML)
@@ -49,7 +51,7 @@ $(SHARED_GLAPI_OUTPUTS): $(SHARED_GLAPI_DEPS)
 
 define gen-glapi
 	@mkdir -p $(dir $@)
-	$(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) $(1) > $@
+	$(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) -c $(GLES_VER) $(1) > $@
 endef
 
 %/glapi_mapi_tmp.h: $(MAPI)/mapi_abi.py $(COMMON)
@@ -58,7 +60,7 @@ endef
 		--printer $(PRINTER) --mode lib $(GLAPI)/gl_and_es_API.xml > $@
 
 %/main/dispatch.h: $(GLAPI)/gl_table.py $(COMMON)
-	$(call gen-glapi,-c -m remap_table)
+	$(call gen-glapi,-m remap_table)
 
 %/main/remap_helper.h: $(GLAPI)/remap_helper.py $(COMMON)
 	$(call gen-glapi)
diff --git a/src/mapi/glapi/gen/Makefile b/src/mapi/glapi/gen/Makefile
index 3e101f3a10f..c386b8766c4 100644
--- a/src/mapi/glapi/gen/Makefile
+++ b/src/mapi/glapi/gen/Makefile
@@ -180,10 +180,8 @@ $(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON)
 
 ######################################################################
 
-$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON) $(ES_API)
-	$(PYTHON2) $(PYTHON_FLAGS) $< -f gl_API.xml \
-		-f $(MESA_GLAPI_DIR)/gen-es/es1_API.xml \
-		-f $(MESA_GLAPI_DIR)/gen-es/es2_API.xml > $@
+$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON_ES)
+	$(PYTHON2) $(PYTHON_FLAGS) $< -f gl_and_es_API.xml > $@
 
 $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON)
 	$(PYTHON2) $(PYTHON_FLAGS) $< -m remap_table > $@
diff --git a/src/mapi/glapi/gen/gl_table.py b/src/mapi/glapi/gen/gl_table.py
index 05979e3813f..2cbbd971a86 100644
--- a/src/mapi/glapi/gen/gl_table.py
+++ b/src/mapi/glapi/gen/gl_table.py
@@ -211,28 +211,28 @@ class PrintRemapTable(gl_XML.gl_print_base):
 
 
 def show_usage():
-	print "Usage: %s [-f input_file_name] [-m mode] [-c]" % sys.argv[0]
+	print "Usage: %s [-f input_file_name] [-m mode] [-c ver]" % sys.argv[0]
 	print "    -m mode   Mode can be 'table' or 'remap_table'."
-	print "    -c        Enable compatibility with OpenGL ES."
+	print "    -c ver    Version can be 'es1' or 'es2'."
 	sys.exit(1)
 
 if __name__ == '__main__':
 	file_name = "gl_API.xml"
     
 	try:
-		(args, trail) = getopt.getopt(sys.argv[1:], "f:m:c")
+		(args, trail) = getopt.getopt(sys.argv[1:], "f:m:c:")
 	except Exception,e:
 		show_usage()
 
 	mode = "table"
-	es = False
+	es = None
 	for (arg,val) in args:
 		if arg == "-f":
 			file_name = val
 		elif arg == "-m":
 			mode = val
 		elif arg == "-c":
-			es = True
+			es = val
 
 	if mode == "table":
 		printer = PrintGlTable(es)
@@ -243,4 +243,14 @@ if __name__ == '__main__':
 
 	api = gl_XML.parse_GL_API( file_name )
 
+	if es is not None:
+		import gles_api
+
+		api_map = {
+			'es1': gles_api.es1_api,
+			'es2': gles_api.es2_api,
+		}
+
+		api.filter_functions(api_map[es])
+
 	printer.Print( api )
diff --git a/src/mapi/glapi/gen/remap_helper.py b/src/mapi/glapi/gen/remap_helper.py
index 69b8e5e9d02..367ae24c75c 100644
--- a/src/mapi/glapi/gen/remap_helper.py
+++ b/src/mapi/glapi/gen/remap_helper.py
@@ -197,22 +197,36 @@ class PrintGlRemap(gl_XML.gl_print_base):
 
 
 def show_usage():
-	print "Usage: %s [-f input_file_name]" % sys.argv[0]
+	print "Usage: %s [-f input_file_name] [-c ver]" % sys.argv[0]
+	print "    -c ver    Version can be 'es1' or 'es2'."
 	sys.exit(1)
 
 if __name__ == '__main__':
 	file_name = "gl_API.xml"
 
 	try:
-		(args, trail) = getopt.getopt(sys.argv[1:], "f:")
+		(args, trail) = getopt.getopt(sys.argv[1:], "f:c:")
 	except Exception,e:
 		show_usage()
 
+	es = None
 	for (arg,val) in args:
 		if arg == "-f":
 			file_name = val
+		elif arg == "-c":
+			es = val
 
 	api = gl_XML.parse_GL_API( file_name )
 
+	if es is not None:
+		import gles_api
+
+		api_map = {
+			'es1': gles_api.es1_api,
+			'es2': gles_api.es2_api,
+		}
+
+		api.filter_functions(api_map[es])
+
 	printer = PrintGlRemap()
 	printer.Print( api )
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index cbd16625186..ac85a3eeb05 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -349,26 +349,26 @@ if env['gles']:
     gles_headers += env.CodeGenerate(
         target = 'es1api/main/dispatch.h',
         script = GLAPI + 'gen/gl_table.py',
-        source = GLAPI + 'gen-es/es1_API.xml',
-        command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
         target = 'es1api/main/remap_helper.h',
         script = GLAPI + 'gen/remap_helper.py',
-        source = GLAPI + 'gen-es/es1_API.xml',
-        command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
         target = 'es2api/main/dispatch.h',
         script = GLAPI + 'gen/gl_table.py',
-        source = GLAPI + 'gen-es/es2_API.xml',
-        command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
         target = 'es2api/main/remap_helper.h',
         script = GLAPI + 'gen/remap_helper.py',
-        source = GLAPI + 'gen-es/es2_API.xml',
-        command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET',
     )
 
     env.Depends(gles_sources, gles_headers)
-- 
cgit v1.2.3


From 6eff33dc7f2cd6e1430bd8dcaef4a7eb9fe3f6ee Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Thu, 11 Aug 2011 16:41:09 +0800
Subject: glapi: generate ES dispatch headers from core mesa

GLESv1 and GLESv2 have their own dispatch.h and remap_helper.h.  These
headers are only used by api_exec_es1.c and api_exec_es2.c in core mesa.
Move the rules to generate them from glapi to core mesa.

Reviewed-by: Brian Paul <brianp@vmware.com>

[olv: updated after reviewing to fix SCons build]
---
 src/mapi/es1api/Makefile      |  9 +--------
 src/mesa/Makefile             | 20 ++++++++++++++++++++
 src/mesa/SConscript           |  8 ++++----
 src/mesa/main/es_generator.py |  4 ++--
 4 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mapi/es1api/Makefile b/src/mapi/es1api/Makefile
index da5aa45806c..0a0449b10a3 100644
--- a/src/mapi/es1api/Makefile
+++ b/src/mapi/es1api/Makefile
@@ -48,7 +48,7 @@ esapi_CPPFLAGS += -DMAPI_MODE_BRIDGE
 esapi_LIB_DEPS := -L$(TOP)/$(LIB_DIR) -l$(GLAPI_LIB) $(esapi_LIB_DEPS)
 
 .PHONY: default
-default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME) main/dispatch.h main/remap_helper.h
+default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME)
 
 $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME): $(esapi_OBJECTS)
 	$(MKLIB) -o $(esapi_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
@@ -72,12 +72,6 @@ include $(GLAPI)/gen/glapi_gen.mk
 glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
 	$(call glapi_gen_mapi,$<,$(ESAPI))
 
-main/dispatch.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_dispatch_deps)
-	$(call glapi_gen_dispatch,$<,$(ES))
-
-main/remap_helper.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_remap_deps)
-	$(call glapi_gen_remap,$<,$(ES))
-
 .PHONY: clean
 clean:
 	-rm -f $(esapi_PC)
@@ -86,7 +80,6 @@ clean:
 	-rm -f $(esapi_OBJECTS)
 	-rm -f depend depend.bak
 	-rm -f glapi_mapi_tmp.h
-	-rm -rf main
 
 pcedit = \
 	-e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \
diff --git a/src/mesa/Makefile b/src/mesa/Makefile
index 88f31b68695..0e15d61bd8d 100644
--- a/src/mesa/Makefile
+++ b/src/mesa/Makefile
@@ -67,6 +67,26 @@ $(DRICORE_OBJ_DIR)/%.o: %.S
 # then convenience libs (.a) and finally the device drivers:
 default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs
 
+# include glapi_gen.mk for generating glapi headers for GLES
+GLAPI := $(TOP)/src/mapi/glapi/gen
+include $(GLAPI)/glapi_gen.mk
+
+main/api_exec_es1_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps)
+	$(call glapi_gen_dispatch,$<,es1)
+
+main/api_exec_es1_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps)
+	$(call glapi_gen_remap,$<,es1)
+
+main/api_exec_es1.o: main/api_exec_es1_dispatch.h main/api_exec_es1_remap_helper.h 
+
+main/api_exec_es2_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps)
+	$(call glapi_gen_dispatch,$<,es2)
+
+main/api_exec_es2_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps)
+	$(call glapi_gen_remap,$<,es2)
+
+main/api_exec_es2.o: main/api_exec_es2_dispatch.h main/api_exec_es2_remap_helper.h 
+
 main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
 	$(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@
 
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index ac85a3eeb05..05aa0e8010e 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -347,25 +347,25 @@ if env['gles']:
     GLAPI = '#src/mapi/glapi/'
     gles_headers = []
     gles_headers += env.CodeGenerate(
-        target = 'es1api/main/dispatch.h',
+        target = 'main/api_exec_es1_dispatch.h',
         script = GLAPI + 'gen/gl_table.py',
         source = GLAPI + 'gen/gl_and_es_API.xml',
         command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
-        target = 'es1api/main/remap_helper.h',
+        target = 'main/api_exec_es1_remap_helper.h',
         script = GLAPI + 'gen/remap_helper.py',
         source = GLAPI + 'gen/gl_and_es_API.xml',
         command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
-        target = 'es2api/main/dispatch.h',
+        target = 'main/api_exec_es2_dispatch.h',
         script = GLAPI + 'gen/gl_table.py',
         source = GLAPI + 'gen/gl_and_es_API.xml',
         command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
-        target = 'es2api/main/remap_helper.h',
+        target = 'main/api_exec_es2_remap_helper.h',
         script = GLAPI + 'gen/remap_helper.py',
         source = GLAPI + 'gen/gl_and_es_API.xml',
         command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET',
diff --git a/src/mesa/main/es_generator.py b/src/mesa/main/es_generator.py
index c0b0a445806..cad3deaef94 100644
--- a/src/mesa/main/es_generator.py
+++ b/src/mesa/main/es_generator.py
@@ -681,10 +681,10 @@ print """
 #if FEATURE_remap_table
 
 /* define esLocalRemapTable */
-#include "%sapi/main/dispatch.h"
+#include "main/api_exec_%s_dispatch.h"
 
 #define need_MESA_remap_table
-#include "%sapi/main/remap_helper.h"
+#include "main/api_exec_%s_remap_helper.h"
 
 static void
 init_remap_table(void)
-- 
cgit v1.2.3


From 5880a9a4a7247e4c31df606bef089c45b4052aaa Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 3 Aug 2011 16:36:42 -0700
Subject: radeon: Explain to the user what went wrong when built without
 libdrm.

Before this commit, even LIBGL_DEBUG=verbose would just fail with:
libGL error: failed to create dri screen
---
 src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
index 607b7470d4b..a74c6c7a575 100644
--- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
+++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
@@ -78,6 +78,9 @@ static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy)
 
 static inline void *radeon_bo_manager_gem_ctor(int fd)
 {
+  fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n",
+	  __func__, __LINE__);
+
   return NULL;
 }
 
@@ -87,6 +90,9 @@ static inline void radeon_bo_manager_gem_dtor(void *dummy)
 
 static inline void *radeon_cs_manager_gem_ctor(int fd)
 {
+  fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n",
+	  __func__, __LINE__);
+
   return NULL;
 }
 
-- 
cgit v1.2.3


From 11e4ea0010c3a756cfdaf427c14e104c9a11a645 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 16 Aug 2011 13:05:26 -0600
Subject: mesa: ChooseTextureFormat() returns gl_format, not GLuint

---
 src/mesa/main/dd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index e0c5844e193..b5ed9a40c70 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -194,7 +194,7 @@ struct dd_function_table {
     * cases, srcFormat and srcType can be GL_NONE.
     * Called by glTexImage(), etc.
     */
-   GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat,
+   gl_format (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat,
                                      GLenum srcFormat, GLenum srcType );
 
    /**
-- 
cgit v1.2.3


From c1f00731fd48dde68b67f157c27eb20982e82193 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 3 May 2011 15:27:38 -0700
Subject: i965: Generate driver-specific IR for non-fragment shaders as well.

This will be used by the new vertex shader backend.  The scalarizing
passes are skipped for non-fragment, since vertex and geometry threads
are based on vec4s.
---
 src/mesa/drivers/dri/i965/brw_shader.cpp | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 9471883fb2b..f4005f80055 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -75,10 +75,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 {
    struct brw_context *brw = brw_context(ctx);
    struct intel_context *intel = &brw->intel;
+   unsigned int stage;
+
+   for (stage = 0; stage < ARRAY_SIZE(prog->_LinkedShaders); stage++) {
+      struct brw_shader *shader =
+	 (struct brw_shader *)prog->_LinkedShaders[stage];
+
+      if (!shader)
+	 continue;
 
-   struct brw_shader *shader =
-      (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-   if (shader != NULL) {
       void *mem_ctx = ralloc_context(NULL);
       bool progress;
 
@@ -116,8 +121,10 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       do {
 	 progress = false;
 
-	 brw_do_channel_expressions(shader->ir);
-	 brw_do_vector_splitting(shader->ir);
+	 if (stage == MESA_SHADER_FRAGMENT) {
+	    brw_do_channel_expressions(shader->ir);
+	    brw_do_vector_splitting(shader->ir);
+	 }
 
 	 progress = do_lower_jumps(shader->ir, true, true,
 				   true, /* main return */
-- 
cgit v1.2.3


From 6034b9a5124475d300d0678bd2fb6160865fa972 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 3 May 2011 10:55:50 -0700
Subject: i965: Create a shared enum for hardware and compiler-internal
 opcodes.

This should make gdbing more pleasant, and it might be used in sharing
part of the codegen between the VS and FS backends.
---
 src/mesa/drivers/dri/i965/brw_defines.h            | 134 +++++++++++++--------
 src/mesa/drivers/dri/i965/brw_fs.cpp               |  11 +-
 src/mesa/drivers/dri/i965/brw_fs.h                 |  56 +++------
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp          |   6 +
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |  20 +--
 .../dri/i965/brw_fs_schedule_instructions.cpp      |  15 ---
 src/mesa/drivers/dri/i965/brw_shader.h             |   4 +
 7 files changed, 120 insertions(+), 126 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0a3027d04ad..fe5d29c4328 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -557,58 +557,88 @@
 #define BRW_WE_ALL		1
 /** @} */
 
-#define BRW_OPCODE_MOV        1
-#define BRW_OPCODE_SEL        2
-#define BRW_OPCODE_NOT        4
-#define BRW_OPCODE_AND        5
-#define BRW_OPCODE_OR         6
-#define BRW_OPCODE_XOR        7
-#define BRW_OPCODE_SHR        8
-#define BRW_OPCODE_SHL        9
-#define BRW_OPCODE_RSR        10
-#define BRW_OPCODE_RSL        11
-#define BRW_OPCODE_ASR        12
-#define BRW_OPCODE_CMP        16
-#define BRW_OPCODE_CMPN       17
-#define BRW_OPCODE_JMPI       32
-#define BRW_OPCODE_IF         34
-#define BRW_OPCODE_IFF        35
-#define BRW_OPCODE_ELSE       36
-#define BRW_OPCODE_ENDIF      37
-#define BRW_OPCODE_DO         38
-#define BRW_OPCODE_WHILE      39
-#define BRW_OPCODE_BREAK      40
-#define BRW_OPCODE_CONTINUE   41
-#define BRW_OPCODE_HALT       42
-#define BRW_OPCODE_MSAVE      44
-#define BRW_OPCODE_MRESTORE   45
-#define BRW_OPCODE_PUSH       46
-#define BRW_OPCODE_POP        47
-#define BRW_OPCODE_WAIT       48
-#define BRW_OPCODE_SEND       49
-#define BRW_OPCODE_SENDC      50
-#define BRW_OPCODE_MATH       56
-#define BRW_OPCODE_ADD        64
-#define BRW_OPCODE_MUL        65
-#define BRW_OPCODE_AVG        66
-#define BRW_OPCODE_FRC        67
-#define BRW_OPCODE_RNDU       68
-#define BRW_OPCODE_RNDD       69
-#define BRW_OPCODE_RNDE       70
-#define BRW_OPCODE_RNDZ       71
-#define BRW_OPCODE_MAC        72
-#define BRW_OPCODE_MACH       73
-#define BRW_OPCODE_LZD        74
-#define BRW_OPCODE_SAD2       80
-#define BRW_OPCODE_SADA2      81
-#define BRW_OPCODE_DP4        84
-#define BRW_OPCODE_DPH        85
-#define BRW_OPCODE_DP3        86
-#define BRW_OPCODE_DP2        87
-#define BRW_OPCODE_DPA2       88
-#define BRW_OPCODE_LINE       89
-#define BRW_OPCODE_PLN        90
-#define BRW_OPCODE_NOP        126
+enum opcode {
+   /* These are the actual hardware opcodes. */
+   BRW_OPCODE_MOV =	1,
+   BRW_OPCODE_SEL =	2,
+   BRW_OPCODE_NOT =	4,
+   BRW_OPCODE_AND =	5,
+   BRW_OPCODE_OR =	6,
+   BRW_OPCODE_XOR =	7,
+   BRW_OPCODE_SHR =	8,
+   BRW_OPCODE_SHL =	9,
+   BRW_OPCODE_RSR =	10,
+   BRW_OPCODE_RSL =	11,
+   BRW_OPCODE_ASR =	12,
+   BRW_OPCODE_CMP =	16,
+   BRW_OPCODE_CMPN =	17,
+   BRW_OPCODE_JMPI =	32,
+   BRW_OPCODE_IF =	34,
+   BRW_OPCODE_IFF =	35,
+   BRW_OPCODE_ELSE =	36,
+   BRW_OPCODE_ENDIF =	37,
+   BRW_OPCODE_DO =	38,
+   BRW_OPCODE_WHILE =	39,
+   BRW_OPCODE_BREAK =	40,
+   BRW_OPCODE_CONTINUE = 41,
+   BRW_OPCODE_HALT =	42,
+   BRW_OPCODE_MSAVE =	44,
+   BRW_OPCODE_MRESTORE = 45,
+   BRW_OPCODE_PUSH =	46,
+   BRW_OPCODE_POP =	47,
+   BRW_OPCODE_WAIT =	48,
+   BRW_OPCODE_SEND =	49,
+   BRW_OPCODE_SENDC =	50,
+   BRW_OPCODE_MATH =	56,
+   BRW_OPCODE_ADD =	64,
+   BRW_OPCODE_MUL =	65,
+   BRW_OPCODE_AVG =	66,
+   BRW_OPCODE_FRC =	67,
+   BRW_OPCODE_RNDU =	68,
+   BRW_OPCODE_RNDD =	69,
+   BRW_OPCODE_RNDE =	70,
+   BRW_OPCODE_RNDZ =	71,
+   BRW_OPCODE_MAC =	72,
+   BRW_OPCODE_MACH =	73,
+   BRW_OPCODE_LZD =	74,
+   BRW_OPCODE_SAD2 =	80,
+   BRW_OPCODE_SADA2 =	81,
+   BRW_OPCODE_DP4 =	84,
+   BRW_OPCODE_DPH =	85,
+   BRW_OPCODE_DP3 =	86,
+   BRW_OPCODE_DP2 =	87,
+   BRW_OPCODE_DPA2 =	88,
+   BRW_OPCODE_LINE =	89,
+   BRW_OPCODE_PLN =	90,
+   BRW_OPCODE_NOP =	126,
+
+   /* These are compiler backend opcodes that get translated into other
+    * instructions.
+    */
+   FS_OPCODE_FB_WRITE = 128,
+   FS_OPCODE_RCP,
+   FS_OPCODE_RSQ,
+   FS_OPCODE_SQRT,
+   FS_OPCODE_EXP2,
+   FS_OPCODE_LOG2,
+   FS_OPCODE_POW,
+   FS_OPCODE_SIN,
+   FS_OPCODE_COS,
+   FS_OPCODE_DDX,
+   FS_OPCODE_DDY,
+   FS_OPCODE_PIXEL_X,
+   FS_OPCODE_PIXEL_Y,
+   FS_OPCODE_CINTERP,
+   FS_OPCODE_LINTERP,
+   FS_OPCODE_TEX,
+   FS_OPCODE_TXB,
+   FS_OPCODE_TXD,
+   FS_OPCODE_TXL,
+   FS_OPCODE_DISCARD,
+   FS_OPCODE_SPILL,
+   FS_OPCODE_UNSPILL,
+   FS_OPCODE_PULL_CONSTANT_LOAD,
+};
 
 #define BRW_PREDICATE_NONE             0
 #define BRW_PREDICATE_NORMAL           1
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index cafb7092ac8..a0d75cc6f96 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -522,7 +522,7 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
 }
 
 fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
 {
    switch (opcode) {
    case FS_OPCODE_RCP:
@@ -565,7 +565,7 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
 }
 
 fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
 {
    int base_mrf = 2;
    fs_inst *inst;
@@ -1149,6 +1149,9 @@ fs_visitor::propagate_constants()
 		  progress = true;
 	       }
 	       break;
+
+	    default:
+	       break;
 	    }
 	 }
 
@@ -1200,6 +1203,8 @@ fs_visitor::opt_algebraic()
 	    break;
 	 }
 
+	 break;
+      default:
 	 break;
       }
    }
@@ -1267,6 +1272,8 @@ fs_visitor::register_coalesce()
       case BRW_OPCODE_ENDIF:
 	 if_depth--;
 	 break;
+      default:
+	 break;
       }
       if (loop_depth || if_depth)
 	 continue;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 4ec649014de..d207ac27aa2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -25,6 +25,8 @@
  *
  */
 
+#include "brw_shader.h"
+
 extern "C" {
 
 #include <sys/types.h>
@@ -55,33 +57,6 @@ enum register_file {
    BAD_FILE
 };
 
-enum fs_opcodes {
-   FS_OPCODE_FB_WRITE = 256,
-   FS_OPCODE_RCP,
-   FS_OPCODE_RSQ,
-   FS_OPCODE_SQRT,
-   FS_OPCODE_EXP2,
-   FS_OPCODE_LOG2,
-   FS_OPCODE_POW,
-   FS_OPCODE_SIN,
-   FS_OPCODE_COS,
-   FS_OPCODE_DDX,
-   FS_OPCODE_DDY,
-   FS_OPCODE_PIXEL_X,
-   FS_OPCODE_PIXEL_Y,
-   FS_OPCODE_CINTERP,
-   FS_OPCODE_LINTERP,
-   FS_OPCODE_TEX,
-   FS_OPCODE_TXB,
-   FS_OPCODE_TXD,
-   FS_OPCODE_TXL,
-   FS_OPCODE_DISCARD,
-   FS_OPCODE_SPILL,
-   FS_OPCODE_UNSPILL,
-   FS_OPCODE_PULL_CONSTANT_LOAD,
-};
-
-
 class fs_reg {
 public:
    /* Callers of this ralloc-based new need not call delete. It's
@@ -227,13 +202,13 @@ public:
       init();
    }
 
-   fs_inst(int opcode)
+   fs_inst(enum opcode opcode)
    {
       init();
       this->opcode = opcode;
    }
 
-   fs_inst(int opcode, fs_reg dst)
+   fs_inst(enum opcode opcode, fs_reg dst)
    {
       init();
       this->opcode = opcode;
@@ -243,7 +218,7 @@ public:
 	 assert(dst.reg_offset >= 0);
    }
 
-   fs_inst(int opcode, fs_reg dst, fs_reg src0)
+   fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0)
    {
       init();
       this->opcode = opcode;
@@ -256,7 +231,7 @@ public:
 	 assert(src[0].reg_offset >= 0);
    }
 
-   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+   fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    {
       init();
       this->opcode = opcode;
@@ -272,7 +247,7 @@ public:
 	 assert(src[1].reg_offset >= 0);
    }
 
-   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+   fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
    {
       init();
       this->opcode = opcode;
@@ -331,7 +306,7 @@ public:
 	      opcode == FS_OPCODE_POW);
    }
 
-   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+   enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
    fs_reg dst;
    fs_reg src[3];
    bool saturate;
@@ -448,27 +423,28 @@ public:
 
    fs_inst *emit(fs_inst inst);
 
-   fs_inst *emit(int opcode)
+   fs_inst *emit(enum opcode opcode)
    {
       return emit(fs_inst(opcode));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst)
+   fs_inst *emit(enum opcode opcode, fs_reg dst)
    {
       return emit(fs_inst(opcode, dst));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst, fs_reg src0)
+   fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0)
    {
       return emit(fs_inst(opcode, dst, src0));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+   fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    {
       return emit(fs_inst(opcode, dst, src0, src1));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+   fs_inst *emit(enum opcode opcode, fs_reg dst,
+		 fs_reg src0, fs_reg src1, fs_reg src2)
    {
       return emit(fs_inst(opcode, dst, src0, src1, src2));
    }
@@ -529,8 +505,8 @@ public:
 			      int sampler);
    fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 			      int sampler);
-   fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
-   fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
+   fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
    bool try_emit_saturate(ir_expression *ir);
    void emit_bool_to_cond_code(ir_rvalue *condition);
    void emit_if_gen6(ir_if *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index e168e541bef..529df0880f0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -277,6 +277,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	 /* There is no sample_d_c message; comparisons are done manually */
 	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
 	 break;
+      default:
+	 assert(!"not reached");
+	 break;
       }
    } else {
       switch (inst->opcode) {
@@ -317,6 +320,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	 assert(inst->mlen == 7 || inst->mlen == 10);
 	 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
 	 break;
+      default:
+	 assert(!"not reached");
+	 break;
       }
    }
    assert(msg_type != -1);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 5c9cba99ae5..7c5414ac26c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -25,23 +25,6 @@
  *
  */
 
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
 #include "brw_fs.h"
 #include "../glsl/glsl_types.h"
 #include "../glsl/ir_optimization.h"
@@ -359,6 +342,9 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
 	 if (inst->dst.file == GRF)
 	    no_spill[inst->dst.reg] = true;
 	 break;
+
+      default:
+	 break;
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index f1a88fcfa79..965a5b333a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -25,21 +25,6 @@
  *
  */
 
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
 #include "brw_fs.h"
 #include "../glsl/glsl_types.h"
 #include "../glsl/ir_optimization.h"
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 4c568a26caa..21671d1c8d6 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -21,5 +21,9 @@
  * IN THE SOFTWARE.
  */
 
+#include <stdint.h>
+
+#pragma once
+
 int brw_type_for_base_type(const struct glsl_type *type);
 uint32_t brw_conditional_for_comparison(unsigned int op);
-- 
cgit v1.2.3


From 65b5cbbcf783f6c668ab5b31a0734680dd396794 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 12:38:58 -0700
Subject: i965: Rename math FS_OPCODE_* to SHADER_OPCODE_*.

I want to just use the same enums in the VS.
---
 src/mesa/drivers/dri/i965/brw_defines.h            | 16 +++++-----
 src/mesa/drivers/dri/i965/brw_fs.cpp               | 34 +++++++++++-----------
 src/mesa/drivers/dri/i965/brw_fs.h                 | 16 +++++-----
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp          | 34 +++++++++++-----------
 .../dri/i965/brw_fs_schedule_instructions.cpp      | 16 +++++-----
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       | 20 ++++++-------
 6 files changed, 68 insertions(+), 68 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index fe5d29c4328..da8d016da42 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -616,14 +616,14 @@ enum opcode {
     * instructions.
     */
    FS_OPCODE_FB_WRITE = 128,
-   FS_OPCODE_RCP,
-   FS_OPCODE_RSQ,
-   FS_OPCODE_SQRT,
-   FS_OPCODE_EXP2,
-   FS_OPCODE_LOG2,
-   FS_OPCODE_POW,
-   FS_OPCODE_SIN,
-   FS_OPCODE_COS,
+   SHADER_OPCODE_RCP,
+   SHADER_OPCODE_RSQ,
+   SHADER_OPCODE_SQRT,
+   SHADER_OPCODE_EXP2,
+   SHADER_OPCODE_LOG2,
+   SHADER_OPCODE_POW,
+   SHADER_OPCODE_SIN,
+   SHADER_OPCODE_COS,
    FS_OPCODE_DDX,
    FS_OPCODE_DDY,
    FS_OPCODE_PIXEL_X,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a0d75cc6f96..693ef0ce31a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -143,15 +143,15 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
       return 0;
 
    switch (inst->opcode) {
-   case FS_OPCODE_RCP:
-   case FS_OPCODE_RSQ:
-   case FS_OPCODE_SQRT:
-   case FS_OPCODE_EXP2:
-   case FS_OPCODE_LOG2:
-   case FS_OPCODE_SIN:
-   case FS_OPCODE_COS:
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
       return 1 * c->dispatch_width / 8;
-   case FS_OPCODE_POW:
+   case SHADER_OPCODE_POW:
       return 2 * c->dispatch_width / 8;
    case FS_OPCODE_TEX:
    case FS_OPCODE_TXB:
@@ -525,13 +525,13 @@ fs_inst *
 fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
 {
    switch (opcode) {
-   case FS_OPCODE_RCP:
-   case FS_OPCODE_RSQ:
-   case FS_OPCODE_SQRT:
-   case FS_OPCODE_EXP2:
-   case FS_OPCODE_LOG2:
-   case FS_OPCODE_SIN:
-   case FS_OPCODE_COS:
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
       break;
    default:
       assert(!"not reached: bad math opcode");
@@ -570,7 +570,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    int base_mrf = 2;
    fs_inst *inst;
 
-   assert(opcode == FS_OPCODE_POW);
+   assert(opcode == SHADER_OPCODE_POW);
 
    if (intel->gen >= 6) {
       /* Can't do hstride == 0 args to gen6 math, so expand it out.
@@ -1135,7 +1135,7 @@ fs_visitor::propagate_constants()
 	       }
 	       break;
 
-	    case FS_OPCODE_RCP:
+	    case SHADER_OPCODE_RCP:
 	       /* The hardware doesn't do math on immediate values
 		* (because why are you doing that, seriously?), but
 		* the correct answer is to just constant fold it
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index d207ac27aa2..94af0e1af16 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -296,14 +296,14 @@ public:
 
    bool is_math()
    {
-      return (opcode == FS_OPCODE_RCP ||
-	      opcode == FS_OPCODE_RSQ ||
-	      opcode == FS_OPCODE_SQRT ||
-	      opcode == FS_OPCODE_EXP2 ||
-	      opcode == FS_OPCODE_LOG2 ||
-	      opcode == FS_OPCODE_SIN ||
-	      opcode == FS_OPCODE_COS ||
-	      opcode == FS_OPCODE_POW);
+      return (opcode == SHADER_OPCODE_RCP ||
+	      opcode == SHADER_OPCODE_RSQ ||
+	      opcode == SHADER_OPCODE_SQRT ||
+	      opcode == SHADER_OPCODE_EXP2 ||
+	      opcode == SHADER_OPCODE_LOG2 ||
+	      opcode == SHADER_OPCODE_SIN ||
+	      opcode == SHADER_OPCODE_COS ||
+	      opcode == SHADER_OPCODE_POW);
    }
 
    enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 529df0880f0..285ba46bd46 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -149,28 +149,28 @@ fs_visitor::generate_math(fs_inst *inst,
    int op;
 
    switch (inst->opcode) {
-   case FS_OPCODE_RCP:
+   case SHADER_OPCODE_RCP:
       op = BRW_MATH_FUNCTION_INV;
       break;
-   case FS_OPCODE_RSQ:
+   case SHADER_OPCODE_RSQ:
       op = BRW_MATH_FUNCTION_RSQ;
       break;
-   case FS_OPCODE_SQRT:
+   case SHADER_OPCODE_SQRT:
       op = BRW_MATH_FUNCTION_SQRT;
       break;
-   case FS_OPCODE_EXP2:
+   case SHADER_OPCODE_EXP2:
       op = BRW_MATH_FUNCTION_EXP;
       break;
-   case FS_OPCODE_LOG2:
+   case SHADER_OPCODE_LOG2:
       op = BRW_MATH_FUNCTION_LOG;
       break;
-   case FS_OPCODE_POW:
+   case SHADER_OPCODE_POW:
       op = BRW_MATH_FUNCTION_POW;
       break;
-   case FS_OPCODE_SIN:
+   case SHADER_OPCODE_SIN:
       op = BRW_MATH_FUNCTION_SIN;
       break;
-   case FS_OPCODE_COS:
+   case SHADER_OPCODE_COS:
       op = BRW_MATH_FUNCTION_COS;
       break;
    default:
@@ -182,7 +182,7 @@ fs_visitor::generate_math(fs_inst *inst,
    if (intel->gen >= 6) {
       assert(inst->mlen == 0);
 
-      if (inst->opcode == FS_OPCODE_POW) {
+      if (inst->opcode == SHADER_OPCODE_POW) {
 	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 	 brw_math2(p, dst, op, src[0], src[1]);
 
@@ -775,14 +775,14 @@ fs_visitor::generate_code()
       }
 	 break;
 
-      case FS_OPCODE_RCP:
-      case FS_OPCODE_RSQ:
-      case FS_OPCODE_SQRT:
-      case FS_OPCODE_EXP2:
-      case FS_OPCODE_LOG2:
-      case FS_OPCODE_POW:
-      case FS_OPCODE_SIN:
-      case FS_OPCODE_COS:
+      case SHADER_OPCODE_RCP:
+      case SHADER_OPCODE_RSQ:
+      case SHADER_OPCODE_SQRT:
+      case SHADER_OPCODE_EXP2:
+      case SHADER_OPCODE_LOG2:
+      case SHADER_OPCODE_POW:
+      case SHADER_OPCODE_SIN:
+      case SHADER_OPCODE_COS:
 	 generate_math(inst, dst, src);
 	 break;
       case FS_OPCODE_PIXEL_X:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index 965a5b333a2..0ea4e5c36f0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -69,26 +69,26 @@ public:
       int math_latency = 22;
 
       switch (inst->opcode) {
-      case FS_OPCODE_RCP:
+      case SHADER_OPCODE_RCP:
 	 this->latency = 1 * chans * math_latency;
 	 break;
-      case FS_OPCODE_RSQ:
+      case SHADER_OPCODE_RSQ:
 	 this->latency = 2 * chans * math_latency;
 	 break;
-      case FS_OPCODE_SQRT:
-      case FS_OPCODE_LOG2:
+      case SHADER_OPCODE_SQRT:
+      case SHADER_OPCODE_LOG2:
 	 /* full precision log.  partial is 2. */
 	 this->latency = 3 * chans * math_latency;
 	 break;
-      case FS_OPCODE_EXP2:
+      case SHADER_OPCODE_EXP2:
 	 /* full precision.  partial is 3, same throughput. */
 	 this->latency = 4 * chans * math_latency;
 	 break;
-      case FS_OPCODE_POW:
+      case SHADER_OPCODE_POW:
 	 this->latency = 8 * chans * math_latency;
 	 break;
-      case FS_OPCODE_SIN:
-      case FS_OPCODE_COS:
+      case SHADER_OPCODE_SIN:
+      case SHADER_OPCODE_COS:
 	 /* minimum latency, max is 12 rounds. */
 	 this->latency = 5 * chans * math_latency;
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2e3f9be75b4..8b4f5bbac15 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -250,14 +250,14 @@ fs_visitor::visit(ir_expression *ir)
 
       break;
    case ir_unop_rcp:
-      emit_math(FS_OPCODE_RCP, this->result, op[0]);
+      emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
       break;
 
    case ir_unop_exp2:
-      emit_math(FS_OPCODE_EXP2, this->result, op[0]);
+      emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
       break;
    case ir_unop_log2:
-      emit_math(FS_OPCODE_LOG2, this->result, op[0]);
+      emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
       break;
    case ir_unop_exp:
    case ir_unop_log:
@@ -265,11 +265,11 @@ fs_visitor::visit(ir_expression *ir)
       break;
    case ir_unop_sin:
    case ir_unop_sin_reduced:
-      emit_math(FS_OPCODE_SIN, this->result, op[0]);
+      emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
       break;
    case ir_unop_cos:
    case ir_unop_cos_reduced:
-      emit_math(FS_OPCODE_COS, this->result, op[0]);
+      emit_math(SHADER_OPCODE_COS, this->result, op[0]);
       break;
 
    case ir_unop_dFdx:
@@ -340,11 +340,11 @@ fs_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_sqrt:
-      emit_math(FS_OPCODE_SQRT, this->result, op[0]);
+      emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
       break;
 
    case ir_unop_rsq:
-      emit_math(FS_OPCODE_RSQ, this->result, op[0]);
+      emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
       break;
 
    case ir_unop_i2u:
@@ -423,7 +423,7 @@ fs_visitor::visit(ir_expression *ir)
       break;
 
    case ir_binop_pow:
-      emit_math(FS_OPCODE_POW, this->result, op[0], op[1]);
+      emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
       break;
 
    case ir_unop_bit_not:
@@ -1694,7 +1694,7 @@ fs_visitor::emit_interpolation_setup_gen4()
 	interp_reg(FRAG_ATTRIB_WPOS, 3));
    /* Compute the pixel 1/W value from wpos.w. */
    this->pixel_w = fs_reg(this, glsl_type::float_type);
-   emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
+   emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
    this->current_annotation = NULL;
 }
 
@@ -1731,7 +1731,7 @@ fs_visitor::emit_interpolation_setup_gen6()
    this->current_annotation = "compute pos.w";
    this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
    this->wpos_w = fs_reg(this, glsl_type::float_type);
-   emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w);
+   emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
 
    this->delta_x = fs_reg(brw_vec8_grf(2, 0));
    this->delta_y = fs_reg(brw_vec8_grf(3, 0));
-- 
cgit v1.2.3


From af3c9803d818fd33139f1247a387d64b967b8992 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 2 May 2011 09:45:40 -0700
Subject: i965: Start adding the VS visitor and codegen.

The low-level IR is a mashup of brw_fs.cpp and ir_to_mesa.cpp.  It's
currently controlled by the INTEL_NEW_VS=1 environment variable, and
only tested for the trivial "gl_Position = gl_Vertex;" shader so far.
---
 src/mesa/drivers/dri/i965/Makefile                 |    5 +-
 src/mesa/drivers/dri/i965/brw_context.h            |    2 +-
 src/mesa/drivers/dri/i965/brw_defines.h            |    2 +
 src/mesa/drivers/dri/i965/brw_eu.h                 |    3 +
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp          |   33 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp           |   26 +
 src/mesa/drivers/dri/i965/brw_shader.h             |    2 +
 src/mesa/drivers/dri/i965/brw_vec4.h               |  434 ++++++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp        |  568 +++++++
 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp     |   77 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp     | 1649 ++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vs.c                 |   12 +-
 src/mesa/drivers/dri/i965/brw_vs.h                 |    3 +-
 src/mesa/drivers/dri/i965/brw_vs_emit.c            |    2 +-
 14 files changed, 2781 insertions(+), 37 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4.h
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 44f28cd9d15..45a5350a383 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -124,7 +124,10 @@ CXX_SOURCES = \
 	brw_fs_reg_allocate.cpp \
 	brw_fs_schedule_instructions.cpp \
 	brw_fs_vector_splitting.cpp \
-	brw_shader.cpp
+	brw_shader.cpp \
+	brw_vec4_emit.cpp \
+	brw_vec4_reg_allocate.cpp \
+	brw_vec4_visitor.cpp
 
 ASM_SOURCES = 
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index cc11d06874d..7b6b64c1a5c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -529,7 +529,7 @@ struct brw_context
        * the CURBE, the depth buffer, and a query BO.
        */
       drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
-      int validated_bo_count;
+      unsigned int validated_bo_count;
    } state;
 
    struct brw_cache cache;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index da8d016da42..e3823c65d1a 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -638,6 +638,8 @@ enum opcode {
    FS_OPCODE_SPILL,
    FS_OPCODE_UNSPILL,
    FS_OPCODE_PULL_CONSTANT_LOAD,
+
+   VS_OPCODE_URB_WRITE,
 };
 
 #define BRW_PREDICATE_NONE             0
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 72d50eadbce..38dd99b693d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -44,6 +44,9 @@
 #define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
 #define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
 #define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
+#define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
+#define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 285ba46bd46..7367ccaa7e0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -146,38 +146,7 @@ void
 fs_visitor::generate_math(fs_inst *inst,
 			  struct brw_reg dst, struct brw_reg *src)
 {
-   int op;
-
-   switch (inst->opcode) {
-   case SHADER_OPCODE_RCP:
-      op = BRW_MATH_FUNCTION_INV;
-      break;
-   case SHADER_OPCODE_RSQ:
-      op = BRW_MATH_FUNCTION_RSQ;
-      break;
-   case SHADER_OPCODE_SQRT:
-      op = BRW_MATH_FUNCTION_SQRT;
-      break;
-   case SHADER_OPCODE_EXP2:
-      op = BRW_MATH_FUNCTION_EXP;
-      break;
-   case SHADER_OPCODE_LOG2:
-      op = BRW_MATH_FUNCTION_LOG;
-      break;
-   case SHADER_OPCODE_POW:
-      op = BRW_MATH_FUNCTION_POW;
-      break;
-   case SHADER_OPCODE_SIN:
-      op = BRW_MATH_FUNCTION_SIN;
-      break;
-   case SHADER_OPCODE_COS:
-      op = BRW_MATH_FUNCTION_COS;
-      break;
-   default:
-      assert(!"not reached: unknown math function");
-      op = 0;
-      break;
-   }
+   int op = brw_math_function(inst->opcode);
 
    if (intel->gen >= 6) {
       assert(inst->mlen == 0);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index f4005f80055..2eeeec25cac 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -199,3 +199,29 @@ brw_conditional_for_comparison(unsigned int op)
       return BRW_CONDITIONAL_NZ;
    }
 }
+
+uint32_t
+brw_math_function(enum opcode op)
+{
+   switch (op) {
+   case SHADER_OPCODE_RCP:
+      return BRW_MATH_FUNCTION_INV;
+   case SHADER_OPCODE_RSQ:
+      return BRW_MATH_FUNCTION_RSQ;
+   case SHADER_OPCODE_SQRT:
+      return BRW_MATH_FUNCTION_SQRT;
+   case SHADER_OPCODE_EXP2:
+      return BRW_MATH_FUNCTION_EXP;
+   case SHADER_OPCODE_LOG2:
+      return BRW_MATH_FUNCTION_LOG;
+   case SHADER_OPCODE_POW:
+      return BRW_MATH_FUNCTION_POW;
+   case SHADER_OPCODE_SIN:
+      return BRW_MATH_FUNCTION_SIN;
+   case SHADER_OPCODE_COS:
+      return BRW_MATH_FUNCTION_COS;
+   default:
+      assert(!"not reached: unknown math function");
+      return 0;
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 21671d1c8d6..1054d7a589e 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -22,8 +22,10 @@
  */
 
 #include <stdint.h>
+#include "brw_defines.h"
 
 #pragma once
 
 int brw_type_for_base_type(const struct glsl_type *type);
 uint32_t brw_conditional_for_comparison(unsigned int op);
+uint32_t brw_math_function(enum opcode op);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
new file mode 100644
index 00000000000..10168fc1cb0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -0,0 +1,434 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_H
+#define BRW_VEC4_H
+
+#include <stdint.h>
+#include "brw_shader.h"
+#include "main/compiler.h"
+#include "program/hash_table.h"
+
+extern "C" {
+#include "brw_vs.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+};
+
+#include "../glsl/ir.h"
+
+namespace brw {
+
+class dst_reg;
+
+/**
+ * Common helper for constructing swizzles.  When only a subset of
+ * channels of a vec4 are used, we don't want to reference the other
+ * channels, as that will tell optimization passes that those other
+ * channels are used.
+ */
+static int
+swizzle_for_size(int size)
+{
+   int size_swizzles[4] = {
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+   };
+
+   assert((size >= 1) && (size <= 4));
+   return size_swizzles[size - 1];
+}
+
+enum register_file {
+   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+   GRF = BRW_GENERAL_REGISTER_FILE,
+   MRF = BRW_MESSAGE_REGISTER_FILE,
+   IMM = BRW_IMMEDIATE_VALUE,
+   HW_REG, /* a struct brw_reg */
+   ATTR,
+   UNIFORM, /* prog_data->params[hw_reg] */
+   BAD_FILE
+};
+
+class reg
+{
+public:
+   /** Register file: ARF, GRF, MRF, IMM. */
+   enum register_file file;
+   /** virtual register number.  0 = fixed hw reg */
+   int reg;
+   /** Offset within the virtual register. */
+   int reg_offset;
+   /** Register type.  BRW_REGISTER_TYPE_* */
+   int type;
+   bool sechalf;
+   struct brw_reg fixed_hw_reg;
+   int smear; /* -1, or a channel of the reg to smear to all channels. */
+
+   /** Value for file == BRW_IMMMEDIATE_FILE */
+   union {
+      int32_t i;
+      uint32_t u;
+      float f;
+   } imm;
+};
+
+class src_reg : public reg
+{
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   void init()
+   {
+      memset(this, 0, sizeof(*this));
+
+      this->file = BAD_FILE;
+   }
+
+   src_reg(register_file file, int reg, const glsl_type *type)
+   {
+      init();
+
+      this->file = file;
+      this->reg = reg;
+      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+	 this->swizzle = swizzle_for_size(type->vector_elements);
+      else
+	 this->swizzle = SWIZZLE_XYZW;
+   }
+
+   /** Generic unset register constructor. */
+   src_reg()
+   {
+      init();
+   }
+
+   src_reg(float f)
+   {
+      init();
+
+      this->file = IMM;
+      this->type = BRW_REGISTER_TYPE_F;
+      this->imm.f = f;
+   }
+
+   src_reg(uint32_t u)
+   {
+      init();
+
+      this->file = IMM;
+      this->type = BRW_REGISTER_TYPE_UD;
+      this->imm.f = u;
+   }
+
+   src_reg(int32_t i)
+   {
+      init();
+
+      this->file = IMM;
+      this->type = BRW_REGISTER_TYPE_D;
+      this->imm.i = i;
+   }
+
+   src_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+   explicit src_reg(dst_reg reg);
+
+   GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
+   bool negate;
+   bool abs;
+};
+
+class dst_reg : public reg
+{
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   void init()
+   {
+      memset(this, 0, sizeof(*this));
+      this->file = BAD_FILE;
+      this->writemask = WRITEMASK_XYZW;
+   }
+
+   dst_reg()
+   {
+      init();
+   }
+
+   dst_reg(register_file file, int reg)
+   {
+      init();
+
+      this->file = file;
+      this->reg = reg;
+   }
+
+   dst_reg(struct brw_reg reg)
+   {
+      init();
+
+      this->file = HW_REG;
+      this->fixed_hw_reg = reg;
+   }
+
+   dst_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+   explicit dst_reg(src_reg reg);
+
+   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+};
+
+class vec4_instruction : public exec_node {
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   struct brw_reg get_dst(void);
+   struct brw_reg get_src(int i);
+
+   enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+   dst_reg dst;
+   src_reg src[3];
+
+   bool saturate;
+   bool predicate_inverse;
+   uint32_t predicate;
+
+   int conditional_mod; /**< BRW_CONDITIONAL_* */
+
+   int sampler;
+   int target; /**< MRT target. */
+   bool shadow_compare;
+
+   bool eot;
+   bool header_present;
+   int mlen; /**< SEND message length */
+   int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
+
+   uint32_t offset; /* spill/unspill offset */
+   /** @{
+    * Annotation for the generated IR.  One of the two can be set.
+    */
+   ir_instruction *ir;
+   const char *annotation;
+};
+
+class vec4_visitor : public ir_visitor
+{
+public:
+   vec4_visitor(struct brw_vs_compile *c,
+		struct gl_shader_program *prog, struct brw_shader *shader);
+   ~vec4_visitor();
+
+   dst_reg dst_null_f()
+   {
+      return dst_reg(brw_null_reg());
+   }
+
+   dst_reg dst_null_d()
+   {
+      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   }
+
+   dst_reg dst_null_cmp()
+   {
+      if (intel->gen > 4)
+	 return dst_null_d();
+      else
+	 return dst_null_f();
+   }
+
+   struct brw_context *brw;
+   const struct gl_vertex_program *vp;
+   struct intel_context *intel;
+   struct gl_context *ctx;
+   struct brw_vs_compile *c;
+   struct brw_vs_prog_data *prog_data;
+   struct brw_compile *p;
+   struct brw_shader *shader;
+   struct gl_shader_program *prog;
+   void *mem_ctx;
+   exec_list instructions;
+
+   char *fail_msg;
+   bool failed;
+
+   /**
+    * GLSL IR currently being processed, which is associated with our
+    * driver IR instructions for debugging purposes.
+    */
+   ir_instruction *base_ir;
+   const char *current_annotation;
+
+   int *virtual_grf_sizes;
+   int virtual_grf_count;
+   int virtual_grf_array_size;
+   int first_non_payload_grf;
+
+   dst_reg *variable_storage(ir_variable *var);
+
+   void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
+
+   src_reg src_reg_for_float(float val);
+
+   /**
+    * \name Visit methods
+    *
+    * As typical for the visitor pattern, there must be one \c visit method for
+    * each concrete subclass of \c ir_instruction.  Virtual base classes within
+    * the hierarchy should not have \c visit methods.
+    */
+   /*@{*/
+   virtual void visit(ir_variable *);
+   virtual void visit(ir_loop *);
+   virtual void visit(ir_loop_jump *);
+   virtual void visit(ir_function_signature *);
+   virtual void visit(ir_function *);
+   virtual void visit(ir_expression *);
+   virtual void visit(ir_swizzle *);
+   virtual void visit(ir_dereference_variable  *);
+   virtual void visit(ir_dereference_array *);
+   virtual void visit(ir_dereference_record *);
+   virtual void visit(ir_assignment *);
+   virtual void visit(ir_constant *);
+   virtual void visit(ir_call *);
+   virtual void visit(ir_return *);
+   virtual void visit(ir_discard *);
+   virtual void visit(ir_texture *);
+   virtual void visit(ir_if *);
+   /*@}*/
+
+   src_reg result;
+
+   /* Regs for vertex results.  Generated at ir_variable visiting time
+    * for the ir->location's used.
+    */
+   dst_reg output_reg[VERT_RESULT_MAX];
+
+   struct hash_table *variable_ht;
+
+   bool run(void);
+   void fail(const char *msg, ...);
+
+   int virtual_grf_alloc(int size);
+   int setup_attributes(int payload_reg);
+   void setup_payload();
+   void reg_allocate_trivial();
+   void reg_allocate();
+
+   vec4_instruction *emit(enum opcode opcode);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+			  src_reg src0, src_reg src1);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+			  src_reg src0, src_reg src1, src_reg src2);
+
+   /** Walks an exec_list of ir_instruction and sends it through this visitor. */
+   void visit_instructions(const exec_list *list);
+
+   void emit_bool_to_cond_code(ir_rvalue *ir);
+   void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_if_gen6(ir_if *ir);
+
+   void emit_block_move(ir_assignment *ir);
+
+   /**
+    * Emit the correct dot-product instruction for the type of arguments
+    */
+   void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
+
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+		    dst_reg dst, src_reg src0);
+
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+		    dst_reg dst, src_reg src0, src_reg src1);
+
+   void emit_scs(ir_instruction *ir, enum prog_opcode op,
+		 dst_reg dst, const src_reg &src);
+
+   void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+
+   int emit_vue_header_gen6(int header_mrf);
+   int emit_vue_header_gen4(int header_mrf);
+   void emit_urb_writes(void);
+
+   GLboolean try_emit_sat(ir_expression *ir);
+
+   bool process_move_condition(ir_rvalue *ir);
+
+   void generate_code();
+   void generate_vs_instruction(vec4_instruction *inst,
+				struct brw_reg dst,
+				struct brw_reg *src);
+   void generate_math1_gen4(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src);
+   void generate_math1_gen6(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src);
+   void generate_urb_write(vec4_instruction *inst);
+};
+
+} /* namespace brw */
+
+#endif /* BRW_VEC4_H */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
new file mode 100644
index 00000000000..bdc7a79d83d
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -0,0 +1,568 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+extern "C" {
+#include "brw_eu.h"
+};
+
+using namespace brw;
+
+namespace brw {
+
+int
+vec4_visitor::setup_attributes(int payload_reg)
+{
+   int nr_attributes;
+   int attribute_map[VERT_ATTRIB_MAX];
+
+   nr_attributes = 0;
+   for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
+      if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
+	 attribute_map[i] = payload_reg + nr_attributes;
+	 nr_attributes++;
+      }
+   }
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+      for (int i = 0; i < 3; i++) {
+	 if (inst->src[i].file != ATTR)
+	    continue;
+
+	 inst->src[i].file = HW_REG;
+	 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0);
+	 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
+      }
+   }
+
+   /* The BSpec says we always have to read at least one thing from
+    * the VF, and it appears that the hardware wedges otherwise.
+    */
+   if (nr_attributes == 0)
+      nr_attributes = 1;
+
+   prog_data->urb_read_length = (nr_attributes + 1) / 2;
+
+   return nr_attributes;
+}
+
+void
+vec4_visitor::setup_payload(void)
+{
+   int reg = 0;
+
+   /* r0 is always reserved, as it contains the payload with the URB
+    * handles that are passed on to the URB write at the end of the
+    * thread.
+    */
+   reg++;
+
+   /* User clip planes from curbe:
+    */
+   if (c->key.nr_userclip) {
+      if (intel->gen >= 6) {
+	 for (int i = 0; i < c->key.nr_userclip; i++) {
+	    c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
+						  (i % 2) * 4), 0, 4, 1);
+	 }
+	 reg += ALIGN(c->key.nr_userclip, 2) / 2;
+      } else {
+	 for (int i = 0; i < c->key.nr_userclip; i++) {
+	    c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
+						  (i % 2) * 4), 0, 4, 1);
+	 }
+	 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
+      }
+   }
+
+   /* FINISHME: push constants */
+   c->prog_data.curb_read_length = reg - 1;
+   c->prog_data.nr_params = 0;
+   /* XXX 0 causes a bug elsewhere... */
+   if (intel->gen < 6 && c->prog_data.nr_params == 0)
+      c->prog_data.nr_params = 4;
+
+   reg += setup_attributes(reg);
+
+   this->first_non_payload_grf = reg;
+}
+
+struct brw_reg
+vec4_instruction::get_dst(void)
+{
+   struct brw_reg brw_reg;
+
+   switch (dst.file) {
+   case GRF:
+      assert(dst.reg_offset == 0);
+      brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+      brw_reg = retype(brw_reg, dst.type);
+      brw_reg.dw1.bits.writemask = dst.writemask;
+      break;
+
+   case HW_REG:
+      brw_reg = dst.fixed_hw_reg;
+      break;
+
+   case BAD_FILE:
+      brw_reg = brw_null_reg();
+      break;
+
+   default:
+      assert(!"not reached");
+      brw_reg = brw_null_reg();
+      break;
+   }
+   return brw_reg;
+}
+
+struct brw_reg
+vec4_instruction::get_src(int i)
+{
+   struct brw_reg brw_reg;
+
+   switch (src[i].file) {
+   case GRF:
+      brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
+      brw_reg = retype(brw_reg, src[i].type);
+      brw_reg.dw1.bits.swizzle = src[i].swizzle;
+      if (src[i].abs)
+	 brw_reg = brw_abs(brw_reg);
+      if (src[i].negate)
+	 brw_reg = negate(brw_reg);
+      break;
+
+   case IMM:
+      switch (src[i].type) {
+      case BRW_REGISTER_TYPE_F:
+	 brw_reg = brw_imm_f(src[i].imm.f);
+	 break;
+      case BRW_REGISTER_TYPE_D:
+	 brw_reg = brw_imm_d(src[i].imm.i);
+	 break;
+      case BRW_REGISTER_TYPE_UD:
+	 brw_reg = brw_imm_ud(src[i].imm.u);
+	 break;
+      default:
+	 assert(!"not reached");
+	 brw_reg = brw_null_reg();
+	 break;
+      }
+      break;
+
+   case HW_REG:
+      brw_reg = src[i].fixed_hw_reg;
+      break;
+
+   case BAD_FILE:
+      /* Probably unused. */
+      brw_reg = brw_null_reg();
+      break;
+   case ATTR:
+   default:
+      assert(!"not reached");
+      brw_reg = brw_null_reg();
+      break;
+   }
+
+   return brw_reg;
+}
+
+void
+vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src)
+{
+   brw_math(p,
+	    dst,
+	    brw_math_function(inst->opcode),
+	    BRW_MATH_SATURATE_NONE,
+	    inst->base_mrf,
+	    src,
+	    BRW_MATH_DATA_SCALAR,
+	    BRW_MATH_PRECISION_FULL);
+}
+
+void
+vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src)
+{
+   brw_math(p,
+	    dst,
+	    brw_math_function(inst->opcode),
+	    BRW_MATH_SATURATE_NONE,
+	    inst->base_mrf,
+	    src,
+	    BRW_MATH_DATA_SCALAR,
+	    BRW_MATH_PRECISION_FULL);
+}
+
+void
+vec4_visitor::generate_urb_write(vec4_instruction *inst)
+{
+   brw_urb_WRITE(p,
+		 brw_null_reg(), /* dest */
+		 inst->base_mrf, /* starting mrf reg nr */
+		 brw_vec8_grf(0, 0), /* src */
+		 false,		/* allocate */
+		 true,		/* used */
+		 inst->mlen,
+		 0,		/* response len */
+		 inst->eot,	/* eot */
+		 inst->eot,	/* writes complete */
+		 inst->offset,	/* urb destination offset */
+		 BRW_URB_SWIZZLE_INTERLEAVE);
+}
+
+void
+vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
+				      struct brw_reg dst,
+				      struct brw_reg *src)
+{
+   vec4_instruction *inst = (vec4_instruction *)instruction;
+
+   switch (inst->opcode) {
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      if (intel->gen >= 6) {
+	 generate_math1_gen6(inst, dst, src[0]);
+      } else {
+	 generate_math1_gen4(inst, dst, src[0]);
+      }
+      break;
+
+   case SHADER_OPCODE_POW:
+      assert(!"finishme");
+      break;
+
+   case VS_OPCODE_URB_WRITE:
+      generate_urb_write(inst);
+      break;
+
+   default:
+      if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
+	 fail("unsupported opcode in `%s' in VS\n",
+	      brw_opcodes[inst->opcode].name);
+      } else {
+	 fail("Unsupported opcode %d in VS", inst->opcode);
+      }
+   }
+}
+
+bool
+vec4_visitor::run()
+{
+   /* Generate FS IR for main().  (the visitor only descends into
+    * functions called "main").
+    */
+   foreach_iter(exec_list_iterator, iter, *shader->ir) {
+      ir_instruction *ir = (ir_instruction *)iter.get();
+      base_ir = ir;
+      ir->accept(this);
+   }
+
+   emit_urb_writes();
+
+   if (failed)
+      return false;
+
+   setup_payload();
+   reg_allocate();
+
+   brw_set_access_mode(p, BRW_ALIGN_16);
+
+   generate_code();
+
+   return !failed;
+}
+
+void
+vec4_visitor::generate_code()
+{
+   int last_native_inst = p->nr_insn;
+   const char *last_annotation_string = NULL;
+   ir_instruction *last_annotation_ir = NULL;
+
+   int loop_stack_array_size = 16;
+   int loop_stack_depth = 0;
+   brw_instruction **loop_stack =
+      rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
+   int *if_depth_in_loop =
+      rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
+
+
+   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+      printf("Native code for vertex shader %d:\n", prog->Name);
+   }
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      struct brw_reg src[3], dst;
+
+      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+	 if (last_annotation_ir != inst->ir) {
+	    last_annotation_ir = inst->ir;
+	    if (last_annotation_ir) {
+	       printf("   ");
+	       last_annotation_ir->print();
+	       printf("\n");
+	    }
+	 }
+	 if (last_annotation_string != inst->annotation) {
+	    last_annotation_string = inst->annotation;
+	    if (last_annotation_string)
+	       printf("   %s\n", last_annotation_string);
+	 }
+      }
+
+      for (unsigned int i = 0; i < 3; i++) {
+	 src[i] = inst->get_src(i);
+      }
+      dst = inst->get_dst();
+
+      brw_set_conditionalmod(p, inst->conditional_mod);
+      brw_set_predicate_control(p, inst->predicate);
+      brw_set_predicate_inverse(p, inst->predicate_inverse);
+      brw_set_saturate(p, inst->saturate);
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MOV:
+	 brw_MOV(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_ADD:
+	 brw_ADD(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_MUL:
+	 brw_MUL(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_FRC:
+	 brw_FRC(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_RNDD:
+	 brw_RNDD(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_RNDE:
+	 brw_RNDE(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_RNDZ:
+	 brw_RNDZ(p, dst, src[0]);
+	 break;
+
+      case BRW_OPCODE_AND:
+	 brw_AND(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_OR:
+	 brw_OR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_XOR:
+	 brw_XOR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_NOT:
+	 brw_NOT(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_ASR:
+	 brw_ASR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_SHR:
+	 brw_SHR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_SHL:
+	 brw_SHL(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_CMP:
+	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_SEL:
+	 brw_SEL(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_IF:
+	 if (inst->src[0].file != BAD_FILE) {
+	    /* The instruction has an embedded compare (only allowed on gen6) */
+	    assert(intel->gen == 6);
+	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
+	 } else {
+	    brw_IF(p, BRW_EXECUTE_8);
+	 }
+	 if_depth_in_loop[loop_stack_depth]++;
+	 break;
+
+      case BRW_OPCODE_ELSE:
+	 brw_ELSE(p);
+	 break;
+      case BRW_OPCODE_ENDIF:
+	 brw_ENDIF(p);
+	 if_depth_in_loop[loop_stack_depth]--;
+	 break;
+
+      case BRW_OPCODE_DO:
+	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
+	 if (loop_stack_array_size <= loop_stack_depth) {
+	    loop_stack_array_size *= 2;
+	    loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
+				  loop_stack_array_size);
+	    if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
+				        loop_stack_array_size);
+	 }
+	 if_depth_in_loop[loop_stack_depth] = 0;
+	 break;
+
+      case BRW_OPCODE_BREAK:
+	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	 break;
+      case BRW_OPCODE_CONTINUE:
+	 /* FINISHME: We need to write the loop instruction support still. */
+	 if (intel->gen >= 6)
+	    gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
+	 else
+	    brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	 break;
+
+      case BRW_OPCODE_WHILE: {
+	 struct brw_instruction *inst0, *inst1;
+	 GLuint br = 1;
+
+	 if (intel->gen >= 5)
+	    br = 2;
+
+	 assert(loop_stack_depth > 0);
+	 loop_stack_depth--;
+	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
+	 if (intel->gen < 6) {
+	    /* patch all the BREAK/CONT instructions from last BGNLOOP */
+	    while (inst0 > loop_stack[loop_stack_depth]) {
+	       inst0--;
+	       if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+		   inst0->bits3.if_else.jump_count == 0) {
+		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+	    }
+	       else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+			inst0->bits3.if_else.jump_count == 0) {
+		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+	       }
+	    }
+	 }
+      }
+	 break;
+
+      default:
+	 generate_vs_instruction(inst, dst, src);
+	 break;
+      }
+
+      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+	 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
+	    if (0) {
+	       printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+		      ((uint32_t *)&p->store[i])[3],
+		      ((uint32_t *)&p->store[i])[2],
+		      ((uint32_t *)&p->store[i])[1],
+		      ((uint32_t *)&p->store[i])[0]);
+	    }
+	    brw_disasm(stdout, &p->store[i], intel->gen);
+	 }
+      }
+
+      last_native_inst = p->nr_insn;
+   }
+
+   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+      printf("\n");
+   }
+
+   ralloc_free(loop_stack);
+   ralloc_free(if_depth_in_loop);
+
+   brw_set_uip_jip(p);
+
+   /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
+    * emit issues, it doesn't get the jump distances into the output,
+    * which is often something we want to debug.  So this is here in
+    * case you're doing that.
+    */
+   if (0) {
+      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+	 for (unsigned int i = 0; i < p->nr_insn; i++) {
+	    printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+		   ((uint32_t *)&p->store[i])[3],
+		   ((uint32_t *)&p->store[i])[2],
+		   ((uint32_t *)&p->store[i])[1],
+		   ((uint32_t *)&p->store[i])[0]);
+	    brw_disasm(stdout, &p->store[i], intel->gen);
+	 }
+      }
+   }
+}
+
+extern "C" {
+
+bool
+brw_vs_emit(struct brw_vs_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
+
+   if (!prog)
+      return false;
+
+   struct brw_shader *shader =
+     (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
+   if (!shader)
+      return false;
+
+   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+      printf("GLSL IR for native vertex shader %d:\n", prog->Name);
+      _mesa_print_ir(shader->ir, NULL);
+      printf("\n\n");
+   }
+
+   vec4_visitor v(c, prog, shader);
+   if (!v.run()) {
+      /* FINISHME: Cleanly fail, test at link time, etc. */
+      assert(!"not reached");
+      return false;
+   }
+
+   return true;
+}
+
+} /* extern "C" */
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
new file mode 100644
index 00000000000..e7f6b28a536
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+using namespace brw;
+
+namespace brw {
+
+static void
+assign(int *reg_hw_locations, reg *reg)
+{
+   if (reg->file == GRF) {
+      reg->reg = reg_hw_locations[reg->reg];
+   }
+}
+
+void
+vec4_visitor::reg_allocate_trivial()
+{
+   int last_grf = 0;
+   int hw_reg_mapping[this->virtual_grf_count];
+   int i;
+   int next;
+
+   /* Note that compressed instructions require alignment to 2 registers. */
+   hw_reg_mapping[0] = this->first_non_payload_grf;
+   next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
+   for (i = 1; i < this->virtual_grf_count; i++) {
+      hw_reg_mapping[i] = next;
+      next += this->virtual_grf_sizes[i];
+   }
+   prog_data->total_grf = next;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+      assign(hw_reg_mapping, &inst->dst);
+      assign(hw_reg_mapping, &inst->src[0]);
+      assign(hw_reg_mapping, &inst->src[1]);
+      assign(hw_reg_mapping, &inst->src[2]);
+   }
+
+   if (last_grf >= BRW_MAX_GRF) {
+      fail("Ran out of regs on trivial allocator (%d/%d)\n",
+	   last_grf, BRW_MAX_GRF);
+   }
+}
+
+void
+vec4_visitor::reg_allocate()
+{
+   reg_allocate_trivial();
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
new file mode 100644
index 00000000000..bba1d810f19
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -0,0 +1,1649 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "main/macros.h"
+
+namespace brw {
+
+src_reg::src_reg(dst_reg reg)
+{
+   init();
+
+   this->file = reg.file;
+   this->reg = reg.reg;
+   this->reg_offset = reg.reg_offset;
+   this->type = reg.type;
+
+   int swizzles[4];
+   int next_chan = 0;
+   int last = 0;
+
+   for (int i = 0; i < 4; i++) {
+      if (!(reg.writemask & (1 << i)))
+	 continue;
+
+      swizzles[next_chan++] = last = i;
+   }
+
+   for (; next_chan < 4; next_chan++) {
+      swizzles[next_chan] = last;
+   }
+
+   this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+				swizzles[2], swizzles[3]);
+}
+
+dst_reg::dst_reg(src_reg reg)
+{
+   init();
+
+   this->file = reg.file;
+   this->reg = reg.reg;
+   this->reg_offset = reg.reg_offset;
+   this->type = reg.type;
+   this->writemask = WRITEMASK_XYZW;
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst,
+		   src_reg src0, src_reg src1, src_reg src2)
+{
+   vec4_instruction *inst = new(mem_ctx) vec4_instruction();
+
+   inst->opcode = opcode;
+   inst->dst = dst;
+   inst->src[0] = src0;
+   inst->src[1] = src1;
+   inst->src[2] = src2;
+   inst->ir = this->base_ir;
+   inst->annotation = this->current_annotation;
+
+   this->instructions.push_tail(inst);
+
+   return inst;
+}
+
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
+{
+   return emit(opcode, dst, src0, src1, src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
+{
+   assert(dst.writemask != 0);
+   return emit(opcode, dst, src0, src_reg(), src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode)
+{
+   return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
+}
+
+void
+vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
+{
+   static enum opcode dot_opcodes[] = {
+      BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
+   };
+
+   emit(dot_opcodes[elements - 2], dst, src0, src1);
+}
+
+void
+vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
+{
+   /* The gen6 math instruction ignores the source modifiers --
+    * swizzle, abs, negate, and at least some parts of the register
+    * region description.  Move the source to the corresponding slots
+    * of the destination generally work.
+    */
+   src_reg expanded = src_reg(this, glsl_type::float_type);
+   emit(BRW_OPCODE_MOV, dst, src);
+   src = expanded;
+
+   emit(opcode, dst, src);
+}
+
+void
+vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
+{
+   vec4_instruction *inst = emit(opcode, dst, src);
+   inst->base_mrf = 1;
+   inst->mlen = 1;
+}
+
+void
+vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
+{
+   switch (opcode) {
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      break;
+   default:
+      assert(!"not reached: bad math opcode");
+      return;
+   }
+
+   if (intel->gen >= 6) {
+      return emit_math1_gen6(opcode, dst, src);
+   } else {
+      return emit_math1_gen4(opcode, dst, src);
+   }
+}
+
+void
+vec4_visitor::emit_math2_gen6(enum opcode opcode,
+			      dst_reg dst, src_reg src0, src_reg src1)
+{
+   src_reg expanded;
+
+   /* The gen6 math instruction ignores the source modifiers --
+    * swizzle, abs, negate, and at least some parts of the register
+    * region description.  Move the sources to temporaries to make it
+    * generally work.
+    */
+
+   expanded = src_reg(this, glsl_type::vec4_type);
+   emit(BRW_OPCODE_MOV, dst, src0);
+   src0 = expanded;
+
+   expanded = src_reg(this, glsl_type::vec4_type);
+   emit(BRW_OPCODE_MOV, dst, src1);
+   src1 = expanded;
+
+   emit(opcode, dst, src0, src1);
+}
+
+void
+vec4_visitor::emit_math2_gen4(enum opcode opcode,
+			      dst_reg dst, src_reg src0, src_reg src1)
+{
+   vec4_instruction *inst = emit(opcode, dst, src0, src1);
+   inst->base_mrf = 1;
+   inst->mlen = 2;
+}
+
+void
+vec4_visitor::emit_math(enum opcode opcode,
+			dst_reg dst, src_reg src0, src_reg src1)
+{
+   assert(opcode == SHADER_OPCODE_POW);
+
+   if (intel->gen >= 6) {
+      return emit_math2_gen6(opcode, dst, src0, src1);
+   } else {
+      return emit_math2_gen4(opcode, dst, src0, src1);
+   }
+}
+
+void
+vec4_visitor::visit_instructions(const exec_list *list)
+{
+   foreach_iter(exec_list_iterator, iter, *list) {
+      ir_instruction *ir = (ir_instruction *)iter.get();
+
+      base_ir = ir;
+      ir->accept(this);
+   }
+}
+
+
+static int
+type_size(const struct glsl_type *type)
+{
+   unsigned int i;
+   int size;
+
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      if (type->is_matrix()) {
+	 return type->matrix_columns;
+      } else {
+	 /* Regardless of size of vector, it gets a vec4. This is bad
+	  * packing for things like floats, but otherwise arrays become a
+	  * mess.  Hopefully a later pass over the code can pack scalars
+	  * down if appropriate.
+	  */
+	 return 1;
+      }
+   case GLSL_TYPE_ARRAY:
+      assert(type->length > 0);
+      return type_size(type->fields.array) * type->length;
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i < type->length; i++) {
+	 size += type_size(type->fields.structure[i].type);
+      }
+      return size;
+   case GLSL_TYPE_SAMPLER:
+      /* Samplers take up one slot in UNIFORMS[], but they're baked in
+       * at link time.
+       */
+      return 1;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+int
+vec4_visitor::virtual_grf_alloc(int size)
+{
+   if (virtual_grf_array_size <= virtual_grf_count) {
+      if (virtual_grf_array_size == 0)
+	 virtual_grf_array_size = 16;
+      else
+	 virtual_grf_array_size *= 2;
+      virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
+				   virtual_grf_array_size);
+   }
+   virtual_grf_sizes[virtual_grf_count] = size;
+   return virtual_grf_count++;
+}
+
+src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+   init();
+
+   this->file = GRF;
+   this->reg = v->virtual_grf_alloc(type_size(type));
+
+   if (type->is_array() || type->is_record()) {
+      this->swizzle = BRW_SWIZZLE_NOOP;
+   } else {
+      this->swizzle = swizzle_for_size(type->vector_elements);
+   }
+
+   this->type = brw_type_for_base_type(type);
+}
+
+dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+   init();
+
+   this->file = GRF;
+   this->reg = v->virtual_grf_alloc(type_size(type));
+
+   if (type->is_array() || type->is_record()) {
+      this->writemask = WRITEMASK_XYZW;
+   } else {
+      this->writemask = (1 << type->vector_elements) - 1;
+   }
+
+   this->type = brw_type_for_base_type(type);
+}
+
+dst_reg *
+vec4_visitor::variable_storage(ir_variable *var)
+{
+   return (dst_reg *)hash_table_find(this->variable_ht, var);
+}
+
+void
+vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
+{
+   ir_expression *expr = ir->as_expression();
+
+   if (expr) {
+      src_reg op[2];
+      vec4_instruction *inst;
+
+      assert(expr->get_num_operands() <= 2);
+      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+	 assert(expr->operands[i]->type->is_scalar());
+
+	 expr->operands[i]->accept(this);
+	 op[i] = this->result;
+      }
+
+      switch (expr->operation) {
+      case ir_unop_logic_not:
+	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+	 break;
+
+      case ir_binop_logic_xor:
+	 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_binop_logic_or:
+	 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_binop_logic_and:
+	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_unop_f2b:
+	 if (intel->gen >= 6) {
+	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
+	 } else {
+	    inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
+	 }
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_unop_i2b:
+	 if (intel->gen >= 6) {
+	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+	 } else {
+	    inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
+	 }
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_equal:
+      case ir_binop_all_equal:
+      case ir_binop_nequal:
+      case ir_binop_any_nequal:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+	 inst->conditional_mod =
+	    brw_conditional_for_comparison(expr->operation);
+	 break;
+
+      default:
+	 assert(!"not reached");
+	 break;
+      }
+      return;
+   }
+
+   ir->accept(this);
+
+   if (intel->gen >= 6) {
+      vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
+			       this->result, src_reg(1));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+   } else {
+      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+   }
+}
+
+/**
+ * Emit a gen6 IF statement with the comparison folded into the IF
+ * instruction.
+ */
+void
+vec4_visitor::emit_if_gen6(ir_if *ir)
+{
+   ir_expression *expr = ir->condition->as_expression();
+
+   if (expr) {
+      src_reg op[2];
+      vec4_instruction *inst;
+      dst_reg temp;
+
+      assert(expr->get_num_operands() <= 2);
+      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+	 assert(expr->operands[i]->type->is_scalar());
+
+	 expr->operands[i]->accept(this);
+	 op[i] = this->result;
+      }
+
+      switch (expr->operation) {
+      case ir_unop_logic_not:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+	 return;
+
+      case ir_binop_logic_xor:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_binop_logic_or:
+	 temp = dst_reg(this, glsl_type::bool_type);
+	 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_binop_logic_and:
+	 temp = dst_reg(this, glsl_type::bool_type);
+	 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_unop_f2b:
+	 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_unop_i2b:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_equal:
+      case ir_binop_all_equal:
+      case ir_binop_nequal:
+      case ir_binop_any_nequal:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod =
+	    brw_conditional_for_comparison(expr->operation);
+	 return;
+      default:
+	 assert(!"not reached");
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+      }
+      return;
+   }
+
+   ir->condition->accept(this);
+
+   vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
+			    this->result, src_reg(0));
+   inst->conditional_mod = BRW_CONDITIONAL_NZ;
+}
+
+void
+vec4_visitor::visit(ir_variable *ir)
+{
+   dst_reg *reg = NULL;
+
+   if (variable_storage(ir))
+      return;
+
+   switch (ir->mode) {
+   case ir_var_in:
+      reg = new(mem_ctx) dst_reg(ATTR, ir->location);
+      reg->type = brw_type_for_base_type(ir->type);
+      hash_table_insert(this->variable_ht, reg, ir);
+      break;
+
+   case ir_var_out:
+      reg = new(mem_ctx) dst_reg(this, ir->type);
+      hash_table_insert(this->variable_ht, reg, ir);
+
+      for (int i = 0; i < type_size(ir->type); i++) {
+	 output_reg[ir->location + i] = *reg;
+	 output_reg[ir->location + i].reg_offset = i;
+      }
+      break;
+
+   case ir_var_temporary:
+      reg = new(mem_ctx) dst_reg(this, ir->type);
+      hash_table_insert(this->variable_ht, reg, ir);
+
+      break;
+
+   case ir_var_uniform:
+      /* FINISHME: uniforms */
+      break;
+   }
+}
+
+void
+vec4_visitor::visit(ir_loop *ir)
+{
+   ir_dereference_variable *counter = NULL;
+
+   /* We don't want debugging output to print the whole body of the
+    * loop as the annotation.
+    */
+   this->base_ir = NULL;
+
+   if (ir->counter != NULL)
+      counter = new(ir) ir_dereference_variable(ir->counter);
+
+   if (ir->from != NULL) {
+      assert(ir->counter != NULL);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
+
+      a->accept(this);
+      delete a;
+   }
+
+   emit(BRW_OPCODE_DO);
+
+   if (ir->to) {
+      ir_expression *e =
+	 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
+			       counter, ir->to);
+      ir_if *if_stmt =  new(ir) ir_if(e);
+
+      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
+
+      if_stmt->then_instructions.push_tail(brk);
+
+      if_stmt->accept(this);
+
+      delete if_stmt;
+      delete e;
+      delete brk;
+   }
+
+   visit_instructions(&ir->body_instructions);
+
+   if (ir->increment) {
+      ir_expression *e =
+	 new(ir) ir_expression(ir_binop_add, counter->type,
+			       counter, ir->increment);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
+
+      a->accept(this);
+      delete a;
+      delete e;
+   }
+
+   emit(BRW_OPCODE_WHILE);
+}
+
+void
+vec4_visitor::visit(ir_loop_jump *ir)
+{
+   switch (ir->mode) {
+   case ir_loop_jump::jump_break:
+      emit(BRW_OPCODE_BREAK);
+      break;
+   case ir_loop_jump::jump_continue:
+      emit(BRW_OPCODE_CONTINUE);
+      break;
+   }
+}
+
+
+void
+vec4_visitor::visit(ir_function_signature *ir)
+{
+   assert(0);
+   (void)ir;
+}
+
+void
+vec4_visitor::visit(ir_function *ir)
+{
+   /* Ignore function bodies other than main() -- we shouldn't see calls to
+    * them since they should all be inlined.
+    */
+   if (strcmp(ir->name, "main") == 0) {
+      const ir_function_signature *sig;
+      exec_list empty;
+
+      sig = ir->matching_signature(&empty);
+
+      assert(sig);
+
+      visit_instructions(&sig->body);
+   }
+}
+
+GLboolean
+vec4_visitor::try_emit_sat(ir_expression *ir)
+{
+   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+   if (!sat_src)
+      return false;
+
+   sat_src->accept(this);
+   src_reg src = this->result;
+
+   this->result = src_reg(this, ir->type);
+   vec4_instruction *inst;
+   inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
+   inst->saturate = true;
+
+   return true;
+}
+
+void
+vec4_visitor::emit_bool_comparison(unsigned int op,
+				 dst_reg dst, src_reg src0, src_reg src1)
+{
+   /* original gen4 does destination conversion before comparison. */
+   if (intel->gen < 5)
+      dst.type = src0.type;
+
+   vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
+   inst->conditional_mod = brw_conditional_for_comparison(op);
+
+   dst.type = BRW_REGISTER_TYPE_D;
+   emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
+}
+
+void
+vec4_visitor::visit(ir_expression *ir)
+{
+   unsigned int operand;
+   src_reg op[Elements(ir->operands)];
+   src_reg result_src;
+   dst_reg result_dst;
+   vec4_instruction *inst;
+
+   if (try_emit_sat(ir))
+      return;
+
+   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      this->result.file = BAD_FILE;
+      ir->operands[operand]->accept(this);
+      if (this->result.file == BAD_FILE) {
+	 printf("Failed to get tree for expression operand:\n");
+	 ir->operands[operand]->print();
+	 exit(1);
+      }
+      op[operand] = this->result;
+
+      /* Matrix expression operands should have been broken down to vector
+       * operations already.
+       */
+      assert(!ir->operands[operand]->type->is_matrix());
+   }
+
+   int vector_elements = ir->operands[0]->type->vector_elements;
+   if (ir->operands[1]) {
+      vector_elements = MAX2(vector_elements,
+			     ir->operands[1]->type->vector_elements);
+   }
+
+   this->result.file = BAD_FILE;
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = src_reg(this, ir->type);
+   /* convenience for the emit functions below. */
+   result_dst = dst_reg(result_src);
+   /* If nothing special happens, this is the result. */
+   this->result = result_src;
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+   switch (ir->operation) {
+   case ir_unop_logic_not:
+      /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
+       * ones complement of the whole register, not just bit 0.
+       */
+      emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
+      break;
+   case ir_unop_neg:
+      op[0].negate = !op[0].negate;
+      this->result = op[0];
+      break;
+   case ir_unop_abs:
+      op[0].abs = true;
+      op[0].negate = false;
+      this->result = op[0];
+      break;
+
+   case ir_unop_sign:
+      emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
+
+      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+      inst->conditional_mod = BRW_CONDITIONAL_G;
+      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+
+      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+      inst->conditional_mod = BRW_CONDITIONAL_L;
+      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+
+      break;
+
+   case ir_unop_rcp:
+      emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
+      break;
+
+   case ir_unop_exp2:
+      emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
+      break;
+   case ir_unop_log2:
+      emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
+      break;
+   case ir_unop_exp:
+   case ir_unop_log:
+      assert(!"not reached: should be handled by ir_explog_to_explog2");
+      break;
+   case ir_unop_sin:
+   case ir_unop_sin_reduced:
+      emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos:
+   case ir_unop_cos_reduced:
+      emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
+      break;
+
+   case ir_unop_dFdx:
+   case ir_unop_dFdy:
+      assert(!"derivatives not valid in vertex shader");
+      break;
+
+   case ir_unop_noise:
+      assert(!"not reached: should be handled by lower_noise");
+      break;
+
+   case ir_binop_add:
+      emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_sub:
+      assert(!"not reached: should be handled by ir_sub_to_add_neg");
+      break;
+
+   case ir_binop_mul:
+      emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_div:
+      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+   case ir_binop_mod:
+      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      break;
+
+   case ir_binop_less:
+   case ir_binop_greater:
+   case ir_binop_lequal:
+   case ir_binop_gequal:
+   case ir_binop_equal:
+   case ir_binop_nequal: {
+      dst_reg temp = result_dst;
+      /* original gen4 does implicit conversion before comparison. */
+      if (intel->gen < 5)
+	 temp.type = op[0].type;
+
+      inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+      inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
+      emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
+      break;
+   }
+
+   case ir_binop_all_equal:
+      /* "==" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+	  ir->operands[1]->type->is_vector()) {
+	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+      } else {
+	 dst_reg temp = result_dst;
+	 /* original gen4 does implicit conversion before comparison. */
+	 if (intel->gen < 5)
+	    temp.type = op[0].type;
+
+	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+      }
+      break;
+   case ir_binop_any_nequal:
+      /* "!=" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+	  ir->operands[1]->type->is_vector()) {
+	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+      } else {
+	 dst_reg temp = result_dst;
+	 /* original gen4 does implicit conversion before comparison. */
+	 if (intel->gen < 5)
+	    temp.type = op[0].type;
+
+	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+      }
+      break;
+
+   case ir_unop_any:
+      emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+      emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+
+      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+      inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+      break;
+
+   case ir_binop_logic_xor:
+      emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_logic_or:
+      emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_logic_and:
+      emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_dot:
+      assert(ir->operands[0]->type->is_vector());
+      assert(ir->operands[0]->type == ir->operands[1]->type);
+      emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
+      break;
+
+   case ir_unop_sqrt:
+      emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
+      break;
+   case ir_unop_rsq:
+      emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
+      break;
+   case ir_unop_i2f:
+   case ir_unop_i2u:
+   case ir_unop_u2i:
+   case ir_unop_u2f:
+   case ir_unop_b2f:
+   case ir_unop_b2i:
+   case ir_unop_f2i:
+      emit(BRW_OPCODE_MOV, result_dst, op[0]);
+      break;
+   case ir_unop_f2b:
+   case ir_unop_i2b: {
+      dst_reg temp = result_dst;
+      /* original gen4 does implicit conversion before comparison. */
+      if (intel->gen < 5)
+	 temp.type = op[0].type;
+
+      inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+      inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
+      break;
+   }
+
+   case ir_unop_trunc:
+      emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
+      break;
+   case ir_unop_ceil:
+      op[0].negate = !op[0].negate;
+      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+      this->result.negate = true;
+      break;
+   case ir_unop_floor:
+      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+      break;
+   case ir_unop_fract:
+      inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
+      break;
+   case ir_unop_round_even:
+      emit(BRW_OPCODE_RNDE, result_dst, op[0]);
+      break;
+
+   case ir_binop_min:
+      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+      inst->conditional_mod = BRW_CONDITIONAL_L;
+
+      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+   case ir_binop_max:
+      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+      inst->conditional_mod = BRW_CONDITIONAL_G;
+
+      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+
+   case ir_binop_pow:
+      emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
+      break;
+
+   case ir_unop_bit_not:
+      inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
+      break;
+   case ir_binop_bit_and:
+      inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_bit_xor:
+      inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_bit_or:
+      inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_lshift:
+   case ir_binop_rshift:
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+
+   case ir_quadop_vector:
+      assert(!"not reached: should be handled by lower_quadop_vector");
+      break;
+   }
+}
+
+
+void
+vec4_visitor::visit(ir_swizzle *ir)
+{
+   src_reg src;
+   int i = 0;
+   int swizzle[4];
+
+   /* Note that this is only swizzles in expressions, not those on the left
+    * hand side of an assignment, which do write masking.  See ir_assignment
+    * for that.
+    */
+
+   ir->val->accept(this);
+   src = this->result;
+   assert(src.file != BAD_FILE);
+
+   if (i < ir->type->vector_elements) {
+      switch (i) {
+      case 0:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
+	 break;
+      case 1:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
+	 break;
+      case 2:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
+	 break;
+      case 3:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
+	    break;
+      }
+   }
+   for (; i < 4; i++) {
+      /* Replicate the last channel out. */
+      swizzle[i] = swizzle[ir->type->vector_elements - 1];
+   }
+
+   src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+   this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_variable *ir)
+{
+   dst_reg *reg = variable_storage(ir->var);
+
+   if (!reg) {
+      fail("Failed to find variable storage for %s\n", ir->var->name);
+      this->result = src_reg(brw_null_reg());
+      return;
+   }
+
+   this->result = src_reg(*reg);
+}
+
+void
+vec4_visitor::visit(ir_dereference_array *ir)
+{
+   ir_constant *constant_index;
+   src_reg src;
+   int element_size = type_size(ir->type);
+
+   constant_index = ir->array_index->constant_expression_value();
+
+   ir->array->accept(this);
+   src = this->result;
+
+   if (constant_index) {
+      src.reg_offset += constant_index->value.i[0] * element_size;
+   } else {
+#if 0 /* Variable array index */
+      /* Variable index array dereference.  It eats the "vec4" of the
+       * base of the array and an index that offsets the Mesa register
+       * index.
+       */
+      ir->array_index->accept(this);
+
+      src_reg index_reg;
+
+      if (element_size == 1) {
+	 index_reg = this->result;
+      } else {
+	 index_reg = src_reg(this, glsl_type::float_type);
+
+	 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
+	      this->result, src_reg_for_float(element_size));
+      }
+
+      src.reladdr = ralloc(mem_ctx, src_reg);
+      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+#endif
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      src.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      src.swizzle = BRW_SWIZZLE_NOOP;
+
+   this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_record *ir)
+{
+   unsigned int i;
+   const glsl_type *struct_type = ir->record->type;
+   int offset = 0;
+
+   ir->record->accept(this);
+
+   for (i = 0; i < struct_type->length; i++) {
+      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+	 break;
+      offset += type_size(struct_type->fields.structure[i].type);
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      this->result.swizzle = BRW_SWIZZLE_NOOP;
+
+   this->result.reg_offset += offset;
+}
+
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static dst_reg
+get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
+{
+   /* The LHS must be a dereference.  If the LHS is a variable indexed array
+    * access of a vector, it must be separated into a series conditional moves
+    * before reaching this point (see ir_vec_index_to_cond_assign).
+    */
+   assert(ir->as_dereference());
+   ir_dereference_array *deref_array = ir->as_dereference_array();
+   if (deref_array) {
+      assert(!deref_array->array->type->is_vector());
+   }
+
+   /* Use the rvalue deref handler for the most part.  We'll ignore
+    * swizzles in it and write swizzles using writemask, though.
+    */
+   ir->accept(v);
+   return dst_reg(v->result);
+}
+
+void
+vec4_visitor::emit_block_move(ir_assignment *ir)
+{
+   ir->rhs->accept(this);
+   src_reg src = this->result;
+
+   dst_reg dst = get_assignment_lhs(ir->lhs, this);
+
+   /* FINISHME: This should really set to the correct maximal writemask for each
+    * FINISHME: component written (in the loops below).
+    */
+   dst.writemask = WRITEMASK_XYZW;
+
+   for (int i = 0; i < type_size(ir->lhs->type); i++) {
+      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
+      if (ir->condition)
+	 inst->predicate = BRW_PREDICATE_NORMAL;
+
+      dst.reg_offset++;
+      src.reg_offset++;
+   }
+}
+
+void
+vec4_visitor::visit(ir_assignment *ir)
+{
+   if (!ir->lhs->type->is_scalar() &&
+       !ir->lhs->type->is_vector()) {
+      emit_block_move(ir);
+      return;
+   }
+
+   /* Now we're down to just a scalar/vector with writemasks. */
+   int i;
+
+   ir->rhs->accept(this);
+   src_reg src = this->result;
+
+   dst_reg dst = get_assignment_lhs(ir->lhs, this);
+
+   int swizzles[4];
+   int first_enabled_chan = 0;
+   int src_chan = 0;
+
+   assert(ir->lhs->type->is_vector());
+   dst.writemask = ir->write_mask;
+
+   for (int i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i)) {
+	 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
+	 break;
+      }
+   }
+
+   /* Swizzle a small RHS vector into the channels being written.
+    *
+    * glsl ir treats write_mask as dictating how many channels are
+    * present on the RHS while in our instructions we need to make
+    * those channels appear in the slots of the vec4 they're written to.
+    */
+   for (int i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i))
+	 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
+      else
+	 swizzles[i] = first_enabled_chan;
+   }
+   src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+			      swizzles[2], swizzles[3]);
+
+   if (ir->condition) {
+      emit_bool_to_cond_code(ir->condition);
+   }
+
+   for (i = 0; i < type_size(ir->lhs->type); i++) {
+      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
+
+      if (ir->condition)
+	 inst->predicate = BRW_PREDICATE_NORMAL;
+
+      dst.reg_offset++;
+      src.reg_offset++;
+   }
+}
+
+
+void
+vec4_visitor::visit(ir_constant *ir)
+{
+   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+      src_reg temp_base = src_reg(this, ir->type);
+      dst_reg temp = dst_reg(temp_base);
+
+      foreach_iter(exec_list_iterator, iter, ir->components) {
+	 ir_constant *field_value = (ir_constant *)iter.get();
+	 int size = type_size(field_value->type);
+
+	 assert(size > 0);
+
+	 field_value->accept(this);
+	 src_reg src = this->result;
+
+	 for (int i = 0; i < (unsigned int)size; i++) {
+	    emit(BRW_OPCODE_MOV, temp, src);
+
+	    src.reg_offset++;
+	    temp.reg_offset++;
+	 }
+      }
+      this->result = temp_base;
+      return;
+   }
+
+   if (ir->type->is_array()) {
+      src_reg temp_base = src_reg(this, ir->type);
+      dst_reg temp = dst_reg(temp_base);
+      int size = type_size(ir->type->fields.array);
+
+      assert(size > 0);
+
+      for (unsigned int i = 0; i < ir->type->length; i++) {
+	 ir->array_elements[i]->accept(this);
+	 src_reg src = this->result;
+	 for (int j = 0; j < size; j++) {
+	    emit(BRW_OPCODE_MOV, temp, src);
+
+	    src.reg_offset++;
+	    temp.reg_offset++;
+	 }
+      }
+      this->result = temp_base;
+      return;
+   }
+
+   if (ir->type->is_matrix()) {
+      this->result = src_reg(this, ir->type);
+      dst_reg dst = dst_reg(this->result);
+
+      assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+
+      for (int i = 0; i < ir->type->matrix_columns; i++) {
+	 for (int j = 0; j < ir->type->vector_elements; j++) {
+	    dst.writemask = 1 << j;
+	    emit(BRW_OPCODE_MOV, dst,
+		 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
+	 }
+	 dst.reg_offset++;
+      }
+      return;
+   }
+
+   for (int i = 0; i < ir->type->vector_elements; i++) {
+      this->result = src_reg(this, ir->type);
+      dst_reg dst = dst_reg(this->result);
+
+      dst.writemask = 1 << i;
+
+      switch (ir->type->base_type) {
+      case GLSL_TYPE_FLOAT:
+	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i]));
+	 break;
+      case GLSL_TYPE_INT:
+	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i]));
+	 break;
+      case GLSL_TYPE_UINT:
+	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i]));
+	 break;
+      case GLSL_TYPE_BOOL:
+	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i]));
+	 break;
+      default:
+	 assert(!"Non-float/uint/int/bool constant");
+	 break;
+      }
+   }
+}
+
+void
+vec4_visitor::visit(ir_call *ir)
+{
+   assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_texture *ir)
+{
+   assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_return *ir)
+{
+   assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_discard *ir)
+{
+   assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_if *ir)
+{
+   this->base_ir = ir->condition;
+   ir->condition->accept(this);
+   assert(this->result.file != BAD_FILE);
+
+   /* FINISHME: condcode */
+   emit(BRW_OPCODE_IF);
+
+   visit_instructions(&ir->then_instructions);
+
+   if (!ir->else_instructions.is_empty()) {
+      this->base_ir = ir->condition;
+      emit(BRW_OPCODE_ELSE);
+
+      visit_instructions(&ir->else_instructions);
+   }
+
+   this->base_ir = ir->condition;
+   emit(BRW_OPCODE_ENDIF);
+}
+
+int
+vec4_visitor::emit_vue_header_gen4(int header_mrf)
+{
+   /* Get the position */
+   src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
+
+   /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
+   dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
+
+   current_annotation = "NDC";
+   dst_reg ndc_w = ndc;
+   ndc_w.writemask = WRITEMASK_W;
+   src_reg pos_w = pos;
+   pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
+   emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
+
+   dst_reg ndc_xyz = ndc;
+   ndc_xyz.writemask = WRITEMASK_XYZ;
+
+   emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
+
+   if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
+       c->key.nr_userclip || brw->has_negative_rhw_bug) {
+      dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
+      GLuint i;
+
+      emit(BRW_OPCODE_MOV, header1, 0u);
+
+      if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+	 assert(!"finishme: psiz");
+	 src_reg psiz;
+
+	 header1.writemask = WRITEMASK_W;
+	 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
+	 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
+      }
+
+      for (i = 0; i < c->key.nr_userclip; i++) {
+	 vec4_instruction *inst;
+
+	 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
+		     pos, src_reg(c->userplane[i]));
+	 inst->conditional_mod = BRW_CONDITIONAL_L;
+
+	 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
+	 inst->predicate = BRW_PREDICATE_NORMAL;
+      }
+
+      /* i965 clipping workaround:
+       * 1) Test for -ve rhw
+       * 2) If set,
+       *      set ndc = (0,0,0,0)
+       *      set ucp[6] = 1
+       *
+       * Later, clipping will detect ucp[6] and ensure the primitive is
+       * clipped against all fixed planes.
+       */
+      if (brw->has_negative_rhw_bug) {
+#if 0
+	 /* FINISHME */
+	 brw_CMP(p,
+		 vec8(brw_null_reg()),
+		 BRW_CONDITIONAL_L,
+		 brw_swizzle1(ndc, 3),
+		 brw_imm_f(0));
+
+	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+	 brw_MOV(p, ndc, brw_imm_f(0));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+#endif
+      }
+
+      header1.writemask = WRITEMASK_XYZW;
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
+   } else {
+      emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
+				  BRW_REGISTER_TYPE_UD), 0u);
+   }
+
+   if (intel->gen == 5) {
+      /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
+       * dword 0-3 (m1) of the header is indices, point width, clip flags.
+       * dword 4-7 (m2) is the ndc position (set above)
+       * dword 8-11 (m3) of the vertex header is the 4D space position
+       * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
+       * m6 is a pad so that the vertex element data is aligned
+       * m7 is the first vertex data we fill, which is the vertex position.
+       */
+      current_annotation = "NDC";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+      current_annotation = "gl_Position";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+
+      /* user clip distance. */
+      header_mrf += 2;
+
+      /* Pad so that vertex element data (starts with position) is aligned. */
+      header_mrf++;
+   } else {
+      /* There are 8 dwords in VUE header pre-Ironlake:
+       * dword 0-3 (m1) is indices, point width, clip flags.
+       * dword 4-7 (m2) is ndc position (set above)
+       *
+       * dword 8-11 (m3) is the first vertex data, which we always have be the
+       * vertex position.
+       */
+      current_annotation = "NDC";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+      current_annotation = "gl_Position";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+   }
+
+   return header_mrf;
+}
+
+int
+vec4_visitor::emit_vue_header_gen6(int header_mrf)
+{
+   struct brw_reg reg;
+
+   /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
+    * dword 0-3 (m2) of the header is indices, point width, clip flags.
+    * dword 4-7 (m3) is the 4D space position
+    * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
+    * enabled.
+    *
+    * m4 or 6 is the first vertex element data we fill, which is
+    * the vertex position.
+    */
+
+   current_annotation = "indices, point width, clip flags";
+   reg = brw_message_reg(header_mrf++);
+   emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
+   if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+      emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
+	   src_reg(output_reg[VERT_RESULT_PSIZ]));
+   }
+
+   current_annotation = "gl_Position";
+   emit(BRW_OPCODE_MOV,
+	brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
+
+   current_annotation = "user clip distances";
+   if (c->key.nr_userclip) {
+      for (int i = 0; i < c->key.nr_userclip; i++) {
+	 struct brw_reg m;
+	 if (i < 4)
+	    m = brw_message_reg(header_mrf);
+	 else
+	    m = brw_message_reg(header_mrf + 1);
+
+	 emit(BRW_OPCODE_DP4,
+	      dst_reg(brw_writemask(m, 1 << (i & 7))),
+	      src_reg(c->userplane[i]));
+      }
+      header_mrf += 2;
+   }
+
+   current_annotation = NULL;
+
+   return header_mrf;
+}
+
+static int
+align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
+{
+   struct intel_context *intel = &brw->intel;
+
+   if (intel->gen >= 6) {
+      /* URB data written (does not include the message header reg) must
+       * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
+       * section 5.4.3.2.2: URB_INTERLEAVED.
+       *
+       * URB entries are allocated on a multiple of 1024 bits, so an
+       * extra 128 bits written here to make the end align to 256 is
+       * no problem.
+       */
+      if ((mlen % 2) != 1)
+	 mlen++;
+   }
+
+   return mlen;
+}
+
+/**
+ * Generates the VUE payload plus the 1 or 2 URB write instructions to
+ * complete the VS thread.
+ *
+ * The VUE layout is documented in Volume 2a.
+ */
+void
+vec4_visitor::emit_urb_writes()
+{
+   int base_mrf = 1;
+   int mrf = base_mrf;
+   int urb_entry_size;
+
+   /* FINISHME: edgeflag */
+
+   /* First mrf is the g0-based message header containing URB handles and such,
+    * which is implied in VS_OPCODE_URB_WRITE.
+    */
+   mrf++;
+
+   if (intel->gen >= 6) {
+      mrf = emit_vue_header_gen6(mrf);
+   } else {
+      mrf = emit_vue_header_gen4(mrf);
+   }
+
+   int attr;
+   for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
+      if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+	 continue;
+
+      /* This is loaded into the VUE header, and thus doesn't occupy
+       * an attribute slot.
+       */
+      if (attr == VERT_RESULT_PSIZ)
+	 continue;
+
+      emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+
+      /* If this is MRF 15, we can't fit anything more into this URB
+       * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
+       * even-numbered amount of URB write data, which will meet
+       * gen6's requirements for length alignment.
+       */
+      if (mrf == 15)
+	 break;
+   }
+
+   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+   inst->base_mrf = base_mrf;
+   inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+   inst->eot = true;
+
+   urb_entry_size = mrf - base_mrf;
+
+   for (; attr < VERT_RESULT_MAX; attr++) {
+      if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+	 continue;
+      fail("Second URB write not supported.\n");
+      break;
+   }
+
+   if (intel->gen == 6)
+      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
+   else
+      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
+}
+
+vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
+			   struct gl_shader_program *prog,
+			   struct brw_shader *shader)
+{
+   this->c = c;
+   this->p = &c->func;
+   this->brw = p->brw;
+   this->intel = &brw->intel;
+   this->ctx = &intel->ctx;
+   this->prog = prog;
+   this->shader = shader;
+
+   this->mem_ctx = ralloc_context(NULL);
+   this->failed = false;
+
+   this->base_ir = NULL;
+   this->current_annotation = NULL;
+
+   this->c = c;
+   this->prog_data = &c->prog_data;
+
+   this->variable_ht = hash_table_ctor(0,
+				       hash_table_pointer_hash,
+				       hash_table_pointer_compare);
+
+   this->virtual_grf_sizes = NULL;
+   this->virtual_grf_count = 0;
+   this->virtual_grf_array_size = 0;
+}
+
+vec4_visitor::~vec4_visitor()
+{
+   hash_table_dtor(this->variable_ht);
+}
+
+
+void
+vec4_visitor::fail(const char *format, ...)
+{
+   va_list va;
+   char *msg;
+
+   if (failed)
+      return;
+
+   failed = true;
+
+   va_start(va, format);
+   msg = ralloc_vasprintf(mem_ctx, format, va);
+   va_end(va);
+   msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
+
+   this->fail_msg = msg;
+
+   if (INTEL_DEBUG & DEBUG_VS) {
+      fprintf(stderr, "%s",  msg);
+   }
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index a9ad5311fe3..bd0677db151 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -30,6 +30,7 @@
   */
            
 
+#include "main/compiler.h"
 #include "brw_context.h"
 #include "brw_vs.h"
 #include "brw_util.h"
@@ -50,6 +51,7 @@ static void do_vs_prog( struct brw_context *brw,
    void *mem_ctx;
    int aux_size;
    int i;
+   static int new_vs = -1;
 
    memset(&c, 0, sizeof(c));
    memcpy(&c.key, key, sizeof(*key));
@@ -85,7 +87,15 @@ static void do_vs_prog( struct brw_context *brw,
 
    /* Emit GEN4 code.
     */
-   brw_vs_emit(&c);
+   if (new_vs == -1)
+      new_vs = getenv("INTEL_NEW_VS") != NULL;
+
+   if (new_vs) {
+      if (!brw_vs_emit(&c))
+	 brw_old_vs_emit(&c);
+   } else {
+      brw_old_vs_emit(&c);
+   }
 
    /* get the program
     */
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 432994a8534..9f9fed33970 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -92,6 +92,7 @@ struct brw_vs_compile {
    GLboolean needs_stack;
 };
 
-void brw_vs_emit( struct brw_vs_compile *c );
+bool brw_vs_emit(struct brw_vs_compile *c);
+void brw_old_vs_emit(struct brw_vs_compile *c);
 
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index dbabb44e45c..a06a2bbec52 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1903,7 +1903,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c)
 
 /* Emit the vertex program instructions here.
  */
-void brw_vs_emit(struct brw_vs_compile *c )
+void brw_old_vs_emit(struct brw_vs_compile *c )
 {
 #define MAX_IF_DEPTH 32
 #define MAX_LOOP_DEPTH 32
-- 
cgit v1.2.3


From a070d5f363e99b0f846d555e9ca3a74ec807fdc0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 May 2011 12:50:16 -0700
Subject: i965/vs: Start adding support for uniforms

There's no clever packing here, no pull constants, and no array support.
---
 src/mesa/drivers/dri/i965/brw_context.h          |  22 +++-
 src/mesa/drivers/dri/i965/brw_curbe.c            |  27 ++--
 src/mesa/drivers/dri/i965/brw_vec4.h             |   5 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp      |  73 ++++++++---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 150 ++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |   2 +-
 src/mesa/drivers/dri/i965/gen6_vs_state.c        |  21 +++-
 src/mesa/drivers/dri/i965/gen6_wm_state.c        |   2 +-
 src/mesa/drivers/dri/i965/gen7_wm_state.c        |   2 +-
 9 files changed, 256 insertions(+), 48 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7b6b64c1a5c..4a1abd6252e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -248,6 +248,7 @@ enum param_conversion {
    PARAM_CONVERT_F2I,
    PARAM_CONVERT_F2U,
    PARAM_CONVERT_F2B,
+   PARAM_CONVERT_ZERO,
 };
 
 /* Data about a particular attempt to compile a program.  Note that
@@ -317,6 +318,13 @@ struct brw_vs_prog_data {
    /* Used for calculating urb partitions:
     */
    GLuint urb_entry_size;
+
+   const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
+   enum param_conversion param_convert[MAX_UNIFORMS * 4];
+   const float *pull_param[MAX_UNIFORMS * 4];
+   enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
+
+   bool uses_new_param_layout;
 };
 
 
@@ -898,7 +906,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p)
 }
 
 static inline
-float convert_param(enum param_conversion conversion, float param)
+float convert_param(enum param_conversion conversion, const float *param)
 {
    union {
       float f;
@@ -908,21 +916,23 @@ float convert_param(enum param_conversion conversion, float param)
 
    switch (conversion) {
    case PARAM_NO_CONVERT:
-      return param;
+      return *param;
    case PARAM_CONVERT_F2I:
-      fi.i = param;
+      fi.i = *param;
       return fi.f;
    case PARAM_CONVERT_F2U:
-      fi.u = param;
+      fi.u = *param;
       return fi.f;
    case PARAM_CONVERT_F2B:
-      if (param != 0.0)
+      if (*param != 0.0)
 	 fi.i = 1;
       else
 	 fi.i = 0;
       return fi.f;
+   case PARAM_CONVERT_ZERO:
+      return 0.0;
    default:
-      return param;
+      return *param;
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index ae11c487a2c..960be10006e 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
       /* copy float constants */
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
 	 buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i],
-					 *brw->wm.prog_data->param[i]);
+					 brw->wm.prog_data->param[i]);
       }
    }
 
@@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw)
       GLuint offset = brw->curbe.vs_start * 16;
       GLuint nr = brw->vs.prog_data->nr_params / 4;
 
-      /* Load the subset of push constants that will get used when
-       * we also have a pull constant buffer.
-       */
-      for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
-	 if (brw->vs.constant_map[i] != -1) {
-	    assert(brw->vs.constant_map[i] <= nr);
-	    memcpy(buf + offset + brw->vs.constant_map[i] * 4,
-		   vp->program.Base.Parameters->ParameterValues[i],
-		   4 * sizeof(float));
+      if (brw->vs.prog_data->uses_new_param_layout) {
+	 for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+	    buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i],
+					    brw->vs.prog_data->param[i]);
+	 }
+      } else {
+	 /* Load the subset of push constants that will get used when
+	  * we also have a pull constant buffer.
+	  */
+	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+	    if (brw->vs.constant_map[i] != -1) {
+	       assert(brw->vs.constant_map[i] <= nr);
+	       memcpy(buf + offset + brw->vs.constant_map[i] * 4,
+		      vp->program.Base.Parameters->ParameterValues[i],
+		      4 * sizeof(float));
+	    }
 	 }
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 10168fc1cb0..01058243f04 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -356,6 +356,8 @@ public:
     * for the ir->location's used.
     */
    dst_reg output_reg[VERT_RESULT_MAX];
+   int uniform_size[MAX_UNIFORMS];
+   int uniforms;
 
    struct hash_table *variable_ht;
 
@@ -363,7 +365,10 @@ public:
    void fail(const char *msg, ...);
 
    int virtual_grf_alloc(int size);
+   int setup_uniform_values(int loc, const glsl_type *type);
+   void setup_builtin_uniform_values(ir_variable *ir);
    int setup_attributes(int payload_reg);
+   int setup_uniforms(int payload_reg);
    void setup_payload();
    void reg_allocate_trivial();
    void reg_allocate();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index bdc7a79d83d..1f2853e1118 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -67,20 +67,12 @@ vec4_visitor::setup_attributes(int payload_reg)
 
    prog_data->urb_read_length = (nr_attributes + 1) / 2;
 
-   return nr_attributes;
+   return payload_reg + nr_attributes;
 }
 
-void
-vec4_visitor::setup_payload(void)
+int
+vec4_visitor::setup_uniforms(int reg)
 {
-   int reg = 0;
-
-   /* r0 is always reserved, as it contains the payload with the URB
-    * handles that are passed on to the URB write at the end of the
-    * thread.
-    */
-   reg++;
-
    /* User clip planes from curbe:
     */
    if (c->key.nr_userclip) {
@@ -99,14 +91,49 @@ vec4_visitor::setup_payload(void)
       }
    }
 
-   /* FINISHME: push constants */
+   /* The pre-gen6 VS requires that some push constants get loaded no
+    * matter what, or the GPU would hang.
+    */
+   if (this->uniforms == 0) {
+      this->uniform_size[this->uniforms] = 1;
+
+      for (unsigned int i = 0; i < 4; i++) {
+	 unsigned int slot = this->uniforms * 4 + i;
+
+	 c->prog_data.param[slot] = NULL;
+	 c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO;
+      }
+
+      this->uniforms++;
+   } else {
+      reg += ALIGN(uniforms, 2) / 2;
+   }
+
+   /* for now, we are not doing any elimination of unused slots, nor
+    * are we packing our uniforms.
+    */
+   c->prog_data.nr_params = this->uniforms * 4;
+
    c->prog_data.curb_read_length = reg - 1;
-   c->prog_data.nr_params = 0;
-   /* XXX 0 causes a bug elsewhere... */
-   if (intel->gen < 6 && c->prog_data.nr_params == 0)
-      c->prog_data.nr_params = 4;
+   c->prog_data.uses_new_param_layout = true;
+
+   return reg;
+}
+
+void
+vec4_visitor::setup_payload(void)
+{
+   int reg = 0;
+
+   /* The payload always contains important data in g0, which contains
+    * the URB handles that are passed on to the URB write at the end
+    * of the thread.  So, we always start push constants at g1.
+    */
+   reg++;
 
-   reg += setup_attributes(reg);
+   reg = setup_uniforms(reg);
+
+   reg = setup_attributes(reg);
 
    this->first_non_payload_grf = reg;
 }
@@ -174,6 +201,18 @@ vec4_instruction::get_src(int i)
       }
       break;
 
+   case UNIFORM:
+      brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
+				    ((src[i].reg + src[i].reg_offset) % 2) * 4),
+		       0, 4, 1);
+      brw_reg = retype(brw_reg, src[i].type);
+      brw_reg.dw1.bits.swizzle = src[i].swizzle;
+      if (src[i].abs)
+	 brw_reg = brw_abs(brw_reg);
+      if (src[i].negate)
+	 brw_reg = negate(brw_reg);
+      break;
+
    case HW_REG:
       brw_reg = src[i].fixed_hw_reg;
       break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index bba1d810f19..91abd40faad 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -22,7 +22,10 @@
  */
 
 #include "brw_vec4.h"
+extern "C" {
 #include "main/macros.h"
+#include "program/prog_parameter.h"
+}
 
 namespace brw {
 
@@ -306,6 +309,130 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
    this->type = brw_type_for_base_type(type);
 }
 
+/* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+int
+vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
+{
+   unsigned int offset = 0;
+   float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
+
+   if (type->is_matrix()) {
+      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+							type->vector_elements,
+							1);
+
+      for (unsigned int i = 0; i < type->matrix_columns; i++) {
+	 offset += setup_uniform_values(loc + offset, column);
+      }
+
+      return offset;
+   }
+
+   switch (type->base_type) {
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_BOOL:
+      for (unsigned int i = 0; i < type->vector_elements; i++) {
+	 int slot = this->uniforms * 4 + i;
+	 switch (type->base_type) {
+	 case GLSL_TYPE_FLOAT:
+	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+	    break;
+	 case GLSL_TYPE_UINT:
+	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
+	    break;
+	 case GLSL_TYPE_INT:
+	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
+	    break;
+	 case GLSL_TYPE_BOOL:
+	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
+	    break;
+	 default:
+	    assert(!"not reached");
+	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+	    break;
+	 }
+	 c->prog_data.param[slot] = &values[i];
+      }
+
+      for (unsigned int i = type->vector_elements; i < 4; i++) {
+	 c->prog_data.param_convert[this->uniforms * 4 + i] =
+	    PARAM_CONVERT_ZERO;
+	 c->prog_data.param[this->uniforms * 4 + i] = NULL;
+      }
+
+      this->uniform_size[this->uniforms] = type->vector_elements;
+      this->uniforms++;
+
+      return 1;
+
+   case GLSL_TYPE_STRUCT:
+      for (unsigned int i = 0; i < type->length; i++) {
+	 offset += setup_uniform_values(loc + offset,
+					type->fields.structure[i].type);
+      }
+      return offset;
+
+   case GLSL_TYPE_ARRAY:
+      for (unsigned int i = 0; i < type->length; i++) {
+	 offset += setup_uniform_values(loc + offset, type->fields.array);
+      }
+      return offset;
+
+   case GLSL_TYPE_SAMPLER:
+      /* The sampler takes up a slot, but we don't use any values from it. */
+      return 1;
+
+   default:
+      assert(!"not reached");
+      return 0;
+   }
+}
+
+/* Our support for builtin uniforms is even scarier than non-builtin.
+ * It sits on top of the PROG_STATE_VAR parameters that are
+ * automatically updated from GL context state.
+ */
+void
+vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
+{
+   const ir_state_slot *const slots = ir->state_slots;
+   assert(ir->state_slots != NULL);
+
+   for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+      /* This state reference has already been setup by ir_to_mesa,
+       * but we'll get the same index back here.  We can reference
+       * ParameterValues directly, since unlike brw_fs.cpp, we never
+       * add new state references during compile.
+       */
+      int index = _mesa_add_state_reference(this->vp->Base.Parameters,
+					    (gl_state_index *)slots[i].tokens);
+      float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
+
+      this->uniform_size[this->uniforms] = 0;
+      /* Add each of the unique swizzled channels of the element.
+       * This will end up matching the size of the glsl_type of this field.
+       */
+      int last_swiz = -1;
+      for (unsigned int j = 0; j < 4; j++) {
+	 int swiz = GET_SWZ(slots[i].swizzle, j);
+	 if (swiz == last_swiz)
+	    break;
+	 last_swiz = swiz;
+
+	 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
+	 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
+	 this->uniform_size[this->uniforms]++;
+      }
+      this->uniforms++;
+   }
+}
+
 dst_reg *
 vec4_visitor::variable_storage(ir_variable *var)
 {
@@ -496,13 +623,10 @@ vec4_visitor::visit(ir_variable *ir)
    switch (ir->mode) {
    case ir_var_in:
       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
-      reg->type = brw_type_for_base_type(ir->type);
-      hash_table_insert(this->variable_ht, reg, ir);
       break;
 
    case ir_var_out:
       reg = new(mem_ctx) dst_reg(this, ir->type);
-      hash_table_insert(this->variable_ht, reg, ir);
 
       for (int i = 0; i < type_size(ir->type); i++) {
 	 output_reg[ir->location + i] = *reg;
@@ -512,14 +636,21 @@ vec4_visitor::visit(ir_variable *ir)
 
    case ir_var_temporary:
       reg = new(mem_ctx) dst_reg(this, ir->type);
-      hash_table_insert(this->variable_ht, reg, ir);
-
       break;
 
    case ir_var_uniform:
-      /* FINISHME: uniforms */
+      reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
+
+      if (!strncmp(ir->name, "gl_", 3)) {
+	 setup_builtin_uniform_values(ir);
+      } else {
+	 setup_uniform_values(ir->location, ir->type);
+      }
       break;
    }
+
+   reg->type = brw_type_for_base_type(ir->type);
+   hash_table_insert(this->variable_ht, reg, ir);
 }
 
 void
@@ -1606,6 +1737,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
    this->current_annotation = NULL;
 
    this->c = c;
+   this->vp = brw->vertex_program; /* FINISHME: change for precompile */
    this->prog_data = &c->prog_data;
 
    this->variable_ht = hash_table_ctor(0,
@@ -1615,6 +1747,12 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
    this->virtual_grf_sizes = NULL;
    this->virtual_grf_count = 0;
    this->virtual_grf_array_size = 0;
+
+   this->uniforms = 0;
+
+   this->variable_ht = hash_table_ctor(0,
+				       hash_table_pointer_hash,
+				       hash_table_pointer_compare);
 }
 
 vec4_visitor::~vec4_visitor()
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index fb4fb146f8d..ad909789d82 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw)
    constants = brw->wm.const_bo->virtual;
    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
       constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i],
-				   *brw->wm.prog_data->pull_param[i]);
+				   brw->wm.prog_data->pull_param[i]);
    }
    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
 
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index e70454416bf..affa72c7324 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw)
 	 params_uploaded++;
       }
 
-      for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
-	 if (brw->vs.constant_map[i] != -1) {
-	    memcpy(param + brw->vs.constant_map[i] * 4,
-		   vp->program.Base.Parameters->ParameterValues[i],
-		   4 * sizeof(float));
-	    params_uploaded++;
+      if (brw->vs.prog_data->uses_new_param_layout) {
+	 for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+	    *param = convert_param(brw->vs.prog_data->param_convert[i],
+				   brw->vs.prog_data->param[i]);
+	    param++;
+	 }
+	 params_uploaded += brw->vs.prog_data->nr_params / 4;
+      } else {
+	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+	    if (brw->vs.constant_map[i] != -1) {
+	       memcpy(param + brw->vs.constant_map[i] * 4,
+		      vp->program.Base.Parameters->ParameterValues[i],
+		      4 * sizeof(float));
+	       params_uploaded++;
+	    }
 	 }
       }
 
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 3d525248f25..07e9995f53b 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -61,7 +61,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
 
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
 	 constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
-				      *brw->wm.prog_data->param[i]);
+				      brw->wm.prog_data->param[i]);
       }
 
       if (0) {
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index a102ca772b3..1d80e96778e 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw)
 
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
 	 constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
-				      *brw->wm.prog_data->param[i]);
+				      brw->wm.prog_data->param[i]);
       }
 
       if (0) {
-- 
cgit v1.2.3


From 83d5850518388202c5589d3181b84fb54c213fb1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:18:00 -0700
Subject: i965/vs: Fix constant vector construction.

Fixes some issues noticed in glsl-vs-all-01.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 91abd40faad..8ee4884098c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1391,10 +1391,10 @@ vec4_visitor::visit(ir_constant *ir)
       return;
    }
 
-   for (int i = 0; i < ir->type->vector_elements; i++) {
-      this->result = src_reg(this, ir->type);
-      dst_reg dst = dst_reg(this->result);
+   this->result = src_reg(this, ir->type);
+   dst_reg dst = dst_reg(this->result);
 
+   for (int i = 0; i < ir->type->vector_elements; i++) {
       dst.writemask = 1 << i;
 
       switch (ir->type->base_type) {
-- 
cgit v1.2.3


From 4a4857246c79c42d918a84d7e28e9afff3a9ef6d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:23:42 -0700
Subject: i965/vs: Port the fix for clip plane writemasks from brw_vs_emit.c.

---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 8ee4884098c..439969ab7ea 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1618,7 +1618,7 @@ vec4_visitor::emit_vue_header_gen6(int header_mrf)
 	    m = brw_message_reg(header_mrf + 1);
 
 	 emit(BRW_OPCODE_DP4,
-	      dst_reg(brw_writemask(m, 1 << (i & 7))),
+	      dst_reg(brw_writemask(m, 1 << (i & 3))),
 	      src_reg(c->userplane[i]));
       }
       header_mrf += 2;
-- 
cgit v1.2.3


From c0f334a3ed3c6645abd1812e39cd52f1dfa32fa1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:29:48 -0700
Subject: i965/vs: Don't emit an extra copy of the vertex position.

Fixes glsl-vs-abs-neg, glsl-vs-all-01, and probably many other tests.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 439969ab7ea..c4a3c8a8667 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1550,7 +1550,7 @@ vec4_visitor::emit_vue_header_gen4(int header_mrf)
        * dword 8-11 (m3) of the vertex header is the 4D space position
        * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
        * m6 is a pad so that the vertex element data is aligned
-       * m7 is the first vertex data we fill, which is the vertex position.
+       * m7 is the first vertex data we fill.
        */
       current_annotation = "NDC";
       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
@@ -1561,15 +1561,14 @@ vec4_visitor::emit_vue_header_gen4(int header_mrf)
       /* user clip distance. */
       header_mrf += 2;
 
-      /* Pad so that vertex element data (starts with position) is aligned. */
+      /* Pad so that vertex element data is aligned. */
       header_mrf++;
    } else {
       /* There are 8 dwords in VUE header pre-Ironlake:
        * dword 0-3 (m1) is indices, point width, clip flags.
        * dword 4-7 (m2) is ndc position (set above)
        *
-       * dword 8-11 (m3) is the first vertex data, which we always have be the
-       * vertex position.
+       * dword 8-11 (m3) is the first vertex data.
        */
       current_annotation = "NDC";
       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
@@ -1592,8 +1591,7 @@ vec4_visitor::emit_vue_header_gen6(int header_mrf)
     * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
     * enabled.
     *
-    * m4 or 6 is the first vertex element data we fill, which is
-    * the vertex position.
+    * m4 or 6 is the first vertex element data we fill.
     */
 
    current_annotation = "indices, point width, clip flags";
@@ -1681,6 +1679,10 @@ vec4_visitor::emit_urb_writes()
       if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
 	 continue;
 
+      /* This is set up in the VUE header. */
+      if (attr == VERT_RESULT_HPOS)
+	 continue;
+
       /* This is loaded into the VUE header, and thus doesn't occupy
        * an attribute slot.
        */
-- 
cgit v1.2.3


From 82aa9299fbfe92d2526fa9f7ffd2a1ebc7827ee9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:31:30 -0700
Subject: i965/vs: Allow scalar values in assignments, too.

Fixes glsl-vs-all-02 and many other tests.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c4a3c8a8667..e3779ab0444 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1285,7 +1285,8 @@ vec4_visitor::visit(ir_assignment *ir)
    int first_enabled_chan = 0;
    int src_chan = 0;
 
-   assert(ir->lhs->type->is_vector());
+   assert(ir->lhs->type->is_vector() ||
+	  ir->lhs->type->is_scalar());
    dst.writemask = ir->write_mask;
 
    for (int i = 0; i < 4; i++) {
-- 
cgit v1.2.3


From 78fac1892a3a7a90eb7baf78903d70649028d27a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:35:24 -0700
Subject: i965/vs: Allocate storage for "auto" variables just like temps.

Fixes segfault in glsl-vs-cross-2.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index e3779ab0444..3e62c9ebba8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -634,6 +634,7 @@ vec4_visitor::visit(ir_variable *ir)
       }
       break;
 
+   case ir_var_auto:
    case ir_var_temporary:
       reg = new(mem_ctx) dst_reg(this, ir->type);
       break;
@@ -647,6 +648,9 @@ vec4_visitor::visit(ir_variable *ir)
 	 setup_uniform_values(ir->location, ir->type);
       }
       break;
+
+   default:
+      assert(!"not reached");
    }
 
    reg->type = brw_type_for_base_type(ir->type);
-- 
cgit v1.2.3


From bb468fc1ede9b0a5231ebfaa51df444502d33654 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:37:18 -0700
Subject: i965/vs: Fix ir_swizzle handling.

I decided to refactor it a bit in adapting ir_to_mesa.cpp code, and
mangled it.  Fixes glsl-vs-cross-2.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3e62c9ebba8..4f2a2011068 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1109,7 +1109,7 @@ vec4_visitor::visit(ir_swizzle *ir)
    src = this->result;
    assert(src.file != BAD_FILE);
 
-   if (i < ir->type->vector_elements) {
+   for (i = 0; i < ir->type->vector_elements; i++) {
       switch (i) {
       case 0:
 	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
-- 
cgit v1.2.3


From aa753c5a14637ede804e8043762693122174bf8c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:05:42 -0700
Subject: i965/vs: Disable loops for now until rendering is generally correct.

---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 4f2a2011068..c3b55db4ac1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -662,6 +662,8 @@ vec4_visitor::visit(ir_loop *ir)
 {
    ir_dereference_variable *counter = NULL;
 
+   fail("not yet\n");
+
    /* We don't want debugging output to print the whole body of the
     * loop as the annotation.
     */
-- 
cgit v1.2.3


From 164ccd27787e0df4ae6f85a7178aff0720d56ac9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:12:16 -0700
Subject: i965/vs: Fix support for "IF" instructions by copying
 brw_fs_visitor.cpp.

Fixes glsl-vs-if-greater.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c3b55db4ac1..014f7e62284 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1451,12 +1451,18 @@ vec4_visitor::visit(ir_discard *ir)
 void
 vec4_visitor::visit(ir_if *ir)
 {
+   /* Don't point the annotation at the if statement, because then it plus
+    * the then and else blocks get printed.
+    */
    this->base_ir = ir->condition;
-   ir->condition->accept(this);
-   assert(this->result.file != BAD_FILE);
 
-   /* FINISHME: condcode */
-   emit(BRW_OPCODE_IF);
+   if (intel->gen == 6) {
+      emit_if_gen6(ir);
+   } else {
+      emit_bool_to_cond_code(ir->condition);
+      vec4_instruction *inst = emit(BRW_OPCODE_IF);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+   }
 
    visit_instructions(&ir->then_instructions);
 
-- 
cgit v1.2.3


From eca762d831e099b549dafa0be896eac82b3fceb9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:18:31 -0700
Subject: i965/vs: Fix support for zero uniforms in use.

We were looking for attributes in the wrong place, and pointlessly
doing the work on gen6 at all.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 1f2853e1118..be089369bcf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -94,7 +94,7 @@ vec4_visitor::setup_uniforms(int reg)
    /* The pre-gen6 VS requires that some push constants get loaded no
     * matter what, or the GPU would hang.
     */
-   if (this->uniforms == 0) {
+   if (intel->gen < 6 && this->uniforms == 0) {
       this->uniform_size[this->uniforms] = 1;
 
       for (unsigned int i = 0; i < 4; i++) {
@@ -105,6 +105,7 @@ vec4_visitor::setup_uniforms(int reg)
       }
 
       this->uniforms++;
+      reg++;
    } else {
       reg += ALIGN(uniforms, 2) / 2;
    }
-- 
cgit v1.2.3


From e5363c7fd2ed6318e86ba4a62adc0c2377e51eef Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:29:41 -0700
Subject: i965/vs: Use an appropriate swizzle on src regs from variables.

Fixes glsl-vs-if-bool.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 014f7e62284..734e2514536 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1140,6 +1140,7 @@ vec4_visitor::visit(ir_swizzle *ir)
 void
 vec4_visitor::visit(ir_dereference_variable *ir)
 {
+   const struct glsl_type *type = ir->type;
    dst_reg *reg = variable_storage(ir->var);
 
    if (!reg) {
@@ -1149,6 +1150,9 @@ vec4_visitor::visit(ir_dereference_variable *ir)
    }
 
    this->result = src_reg(*reg);
+
+   if (type->is_scalar() || type->is_vector() || type->is_matrix())
+      this->result.swizzle = swizzle_for_size(type->vector_elements);
 }
 
 void
-- 
cgit v1.2.3


From 814a9bef30beda427e8fbf6f3b8abb6a45f0e2e4 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:31:53 -0700
Subject: i965/vs: Drop the assertion about dst.reg_offset == 0.

Adding the offset is the right thing to do here, and fixes
glsl-vs-mat-add-1.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index be089369bcf..a41c58c7d52 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -146,7 +146,6 @@ vec4_instruction::get_dst(void)
 
    switch (dst.file) {
    case GRF:
-      assert(dst.reg_offset == 0);
       brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
       brw_reg = retype(brw_reg, dst.type);
       brw_reg.dw1.bits.writemask = dst.writemask;
-- 
cgit v1.2.3


From 8e947c2546c25c0dfa93b538e54113af1bf582df Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:38:44 -0700
Subject: i965/vs: Fix the types of array/struct dereferences.

Fixes glsl-vs-arrays-3.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 734e2514536..b6f3cbc265f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1198,6 +1198,7 @@ vec4_visitor::visit(ir_dereference_array *ir)
       src.swizzle = swizzle_for_size(ir->type->vector_elements);
    else
       src.swizzle = BRW_SWIZZLE_NOOP;
+   src.type = brw_type_for_base_type(ir->type);
 
    this->result = src;
 }
@@ -1222,6 +1223,7 @@ vec4_visitor::visit(ir_dereference_record *ir)
       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
    else
       this->result.swizzle = BRW_SWIZZLE_NOOP;
+   this->result.type = brw_type_for_base_type(ir->type);
 
    this->result.reg_offset += offset;
 }
-- 
cgit v1.2.3


From c3752b399ab376aa53392afb8f2d4b526054f0a8 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:40:46 -0700
Subject: i965/vs: Add support for dot product opcodes.

Fixes glsl-vs-dot-vec2.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index a41c58c7d52..71caf907b38 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -447,6 +447,18 @@ vec4_visitor::generate_code()
 	 brw_SEL(p, dst, src[0], src[1]);
 	 break;
 
+      case BRW_OPCODE_DP4:
+	 brw_DP4(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_DP3:
+	 brw_DP3(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_DP2:
+	 brw_DP2(p, dst, src[0], src[1]);
+	 break;
+
       case BRW_OPCODE_IF:
 	 if (inst->src[0].file != BAD_FILE) {
 	    /* The instruction has an embedded compare (only allowed on gen6) */
-- 
cgit v1.2.3


From 2b7632aeaa5f8b4ab3da7d33a3c71c71023a072a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 20:03:31 -0700
Subject: i965/vs: Add support for if(any_nequal()) and if(all_equal()) on
 gen6.

Fixes vs-temp-array-mat2-col-rd.shader_test.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    |  3 ++-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 23 ++++++++++++++++++++---
 2 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 71caf907b38..bc3110b0458 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -465,7 +465,8 @@ vec4_visitor::generate_code()
 	    assert(intel->gen == 6);
 	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
 	 } else {
-	    brw_IF(p, BRW_EXECUTE_8);
+	    struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
+	    brw_inst->header.predicate_control = inst->predicate;
 	 }
 	 if_depth_in_loop[loop_stack_depth]++;
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index b6f3cbc265f..4237373c13d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -543,7 +543,9 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
 
       assert(expr->get_num_operands() <= 2);
       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
-	 assert(expr->operands[i]->type->is_scalar());
+	 assert(expr->operands[i]->type->is_scalar() ||
+		expr->operation == ir_binop_any_nequal ||
+		expr->operation == ir_binop_all_equal);
 
 	 expr->operands[i]->accept(this);
 	 op[i] = this->result;
@@ -589,13 +591,28 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
       case ir_binop_less:
       case ir_binop_lequal:
       case ir_binop_equal:
-      case ir_binop_all_equal:
       case ir_binop_nequal:
-      case ir_binop_any_nequal:
 	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 	 inst->conditional_mod =
 	    brw_conditional_for_comparison(expr->operation);
 	 return;
+
+      case ir_binop_all_equal:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+	 inst = emit(BRW_OPCODE_IF);
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+	 return;
+
+      case ir_binop_any_nequal:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+	 inst = emit(BRW_OPCODE_IF);
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+	 return;
+
       default:
 	 assert(!"not reached");
 	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
-- 
cgit v1.2.3


From cda28bca0d789c328d19bf90afd35a5ff74cfb77 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 20:16:21 -0700
Subject: i965/vs: Apply the gen6 math workaround for math1 instructions.

Fixes glsl-vs-masked-cos.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 4237373c13d..7e0535b5c02 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -122,14 +122,12 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 {
    /* The gen6 math instruction ignores the source modifiers --
     * swizzle, abs, negate, and at least some parts of the register
-    * region description.  Move the source to the corresponding slots
-    * of the destination generally work.
+    * region description.
     */
-   src_reg expanded = src_reg(this, glsl_type::float_type);
-   emit(BRW_OPCODE_MOV, dst, src);
-   src = expanded;
+   src_reg temp_src = src_reg(this, glsl_type::vec4_type);
+   emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
 
-   emit(opcode, dst, src);
+   emit(opcode, dst, temp_src);
 }
 
 void
-- 
cgit v1.2.3


From 930afd1774bdcd013bccbd7b5717ae0bb8e3dea3 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 20:26:48 -0700
Subject: i965/vs: Don't forget to set up assignment condition code for
 arrays/structs.

Fixes vs-uniform-array-mat2-index-col-rd.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 7e0535b5c02..27620c47085 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1276,6 +1276,10 @@ vec4_visitor::emit_block_move(ir_assignment *ir)
 
    dst_reg dst = get_assignment_lhs(ir->lhs, this);
 
+   if (ir->condition) {
+      emit_bool_to_cond_code(ir->condition);
+   }
+
    /* FINISHME: This should really set to the correct maximal writemask for each
     * FINISHME: component written (in the loops below).
     */
-- 
cgit v1.2.3


From 979072613139870f12e329e4b483c7f688b40560 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 20:46:03 -0700
Subject: i965/vs: Handle assignment of structures/arrays/matrices better.

This gets the right types on the instructions, as well as emitting
minimal swizzles/writemasks.
---
 src/mesa/drivers/dri/i965/brw_vec4.h           |  3 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 69 ++++++++++++++++++--------
 2 files changed, 51 insertions(+), 21 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 01058243f04..1619c2e1ef6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -390,7 +390,8 @@ public:
    void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
    void emit_if_gen6(ir_if *ir);
 
-   void emit_block_move(ir_assignment *ir);
+   void emit_block_move(dst_reg *dst, src_reg *src,
+			const struct glsl_type *type, bool predicated);
 
    /**
     * Emit the correct dot-product instruction for the type of arguments
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 27620c47085..4f7763d61bb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1269,38 +1269,69 @@ get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
 }
 
 void
-vec4_visitor::emit_block_move(ir_assignment *ir)
+vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
+			      const struct glsl_type *type, bool predicated)
 {
-   ir->rhs->accept(this);
-   src_reg src = this->result;
-
-   dst_reg dst = get_assignment_lhs(ir->lhs, this);
+   if (type->base_type == GLSL_TYPE_STRUCT) {
+      for (unsigned int i = 0; i < type->length; i++) {
+	 emit_block_move(dst, src, type->fields.structure[i].type, predicated);
+      }
+      return;
+   }
 
-   if (ir->condition) {
-      emit_bool_to_cond_code(ir->condition);
+   if (type->is_array()) {
+      for (unsigned int i = 0; i < type->length; i++) {
+	 emit_block_move(dst, src, type->fields.array, predicated);
+      }
+      return;
    }
 
-   /* FINISHME: This should really set to the correct maximal writemask for each
-    * FINISHME: component written (in the loops below).
-    */
-   dst.writemask = WRITEMASK_XYZW;
+   if (type->is_matrix()) {
+      const struct glsl_type *vec_type;
 
-   for (int i = 0; i < type_size(ir->lhs->type); i++) {
-      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
-      if (ir->condition)
-	 inst->predicate = BRW_PREDICATE_NORMAL;
+      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+					 type->vector_elements, 1);
 
-      dst.reg_offset++;
-      src.reg_offset++;
+      for (int i = 0; i < type->matrix_columns; i++) {
+	 emit_block_move(dst, src, vec_type, predicated);
+      }
+      return;
    }
+
+   assert(type->is_scalar() || type->is_vector());
+
+   dst->type = brw_type_for_base_type(type);
+   src->type = dst->type;
+
+   dst->writemask = (1 << type->vector_elements) - 1;
+
+   /* Do we need to worry about swizzling a swizzle? */
+   assert(src->swizzle = BRW_SWIZZLE_NOOP);
+   src->swizzle = swizzle_for_size(type->vector_elements);
+
+   vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
+   if (predicated)
+      inst->predicate = BRW_PREDICATE_NORMAL;
+
+   dst->reg_offset++;
+   src->reg_offset++;
 }
 
 void
 vec4_visitor::visit(ir_assignment *ir)
 {
+   dst_reg dst = get_assignment_lhs(ir->lhs, this);
+
    if (!ir->lhs->type->is_scalar() &&
        !ir->lhs->type->is_vector()) {
-      emit_block_move(ir);
+      ir->rhs->accept(this);
+      src_reg src = this->result;
+
+      if (ir->condition) {
+	 emit_bool_to_cond_code(ir->condition);
+      }
+
+      emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
       return;
    }
 
@@ -1310,8 +1341,6 @@ vec4_visitor::visit(ir_assignment *ir)
    ir->rhs->accept(this);
    src_reg src = this->result;
 
-   dst_reg dst = get_assignment_lhs(ir->lhs, this);
-
    int swizzles[4];
    int first_enabled_chan = 0;
    int src_chan = 0;
-- 
cgit v1.2.3


From aba9801996f2f524a765df378c234a7645b3a5d1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 20:54:25 -0700
Subject: i965/vs: Fix types of varying outputs.

For structs/arrays/matrices, they were ending up as uint because we
forgot to set them.  All varyings in GLSL 1.20 are of base type float,
so just force the matter here (which gets inherited at
emit_urb_writes() time).

Fixes vs-varying-array-mat2-col-rd.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 4f7763d61bb..5e2b3e5a5fe 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -646,6 +646,7 @@ vec4_visitor::visit(ir_variable *ir)
       for (int i = 0; i < type_size(ir->type); i++) {
 	 output_reg[ir->location + i] = *reg;
 	 output_reg[ir->location + i].reg_offset = i;
+	 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
       }
       break;
 
-- 
cgit v1.2.3


From 31ef2e3ec2f5837eea0899b4bda5ea15e335a6a2 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 21:22:36 -0700
Subject: i965/vs: Avoid generating extra moves when setting up large
 ir_constants.

We were also screwing up the types in the process, and just not
emitting moves was easier.
---
 src/mesa/drivers/dri/i965/brw_vec4.h           |  2 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 77 +++++++++-----------------
 2 files changed, 28 insertions(+), 51 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 1619c2e1ef6..3e457fc61aa 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -393,6 +393,8 @@ public:
    void emit_block_move(dst_reg *dst, src_reg *src,
 			const struct glsl_type *type, bool predicated);
 
+   void emit_constant_values(dst_reg *dst, ir_constant *value);
+
    /**
     * Emit the correct dot-product instruction for the type of arguments
     */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 5e2b3e5a5fe..3562779413f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1387,96 +1387,71 @@ vec4_visitor::visit(ir_assignment *ir)
    }
 }
 
-
 void
-vec4_visitor::visit(ir_constant *ir)
+vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
 {
    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
-      src_reg temp_base = src_reg(this, ir->type);
-      dst_reg temp = dst_reg(temp_base);
-
-      foreach_iter(exec_list_iterator, iter, ir->components) {
-	 ir_constant *field_value = (ir_constant *)iter.get();
-	 int size = type_size(field_value->type);
-
-	 assert(size > 0);
-
-	 field_value->accept(this);
-	 src_reg src = this->result;
-
-	 for (int i = 0; i < (unsigned int)size; i++) {
-	    emit(BRW_OPCODE_MOV, temp, src);
+      foreach_list(node, &ir->components) {
+	 ir_constant *field_value = (ir_constant *)node;
 
-	    src.reg_offset++;
-	    temp.reg_offset++;
-	 }
+	 emit_constant_values(dst, field_value);
       }
-      this->result = temp_base;
       return;
    }
 
    if (ir->type->is_array()) {
-      src_reg temp_base = src_reg(this, ir->type);
-      dst_reg temp = dst_reg(temp_base);
-      int size = type_size(ir->type->fields.array);
-
-      assert(size > 0);
-
       for (unsigned int i = 0; i < ir->type->length; i++) {
-	 ir->array_elements[i]->accept(this);
-	 src_reg src = this->result;
-	 for (int j = 0; j < size; j++) {
-	    emit(BRW_OPCODE_MOV, temp, src);
-
-	    src.reg_offset++;
-	    temp.reg_offset++;
-	 }
+	 emit_constant_values(dst, ir->array_elements[i]);
       }
-      this->result = temp_base;
       return;
    }
 
    if (ir->type->is_matrix()) {
-      this->result = src_reg(this, ir->type);
-      dst_reg dst = dst_reg(this->result);
-
-      assert(ir->type->base_type == GLSL_TYPE_FLOAT);
-
       for (int i = 0; i < ir->type->matrix_columns; i++) {
 	 for (int j = 0; j < ir->type->vector_elements; j++) {
-	    dst.writemask = 1 << j;
-	    emit(BRW_OPCODE_MOV, dst,
+	    dst->writemask = 1 << j;
+	    dst->type = BRW_REGISTER_TYPE_F;
+
+	    emit(BRW_OPCODE_MOV, *dst,
 		 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
 	 }
-	 dst.reg_offset++;
+	 dst->reg_offset++;
       }
       return;
    }
 
-   this->result = src_reg(this, ir->type);
-   dst_reg dst = dst_reg(this->result);
-
    for (int i = 0; i < ir->type->vector_elements; i++) {
-      dst.writemask = 1 << i;
+      dst->writemask = 1 << i;
+      dst->type = brw_type_for_base_type(ir->type);
 
       switch (ir->type->base_type) {
       case GLSL_TYPE_FLOAT:
-	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i]));
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
 	 break;
       case GLSL_TYPE_INT:
-	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i]));
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
 	 break;
       case GLSL_TYPE_UINT:
-	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i]));
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
 	 break;
       case GLSL_TYPE_BOOL:
-	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i]));
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
 	 break;
       default:
 	 assert(!"Non-float/uint/int/bool constant");
 	 break;
       }
    }
+   dst->reg_offset++;
+}
+
+void
+vec4_visitor::visit(ir_constant *ir)
+{
+   dst_reg dst = dst_reg(this, ir->type);
+   this->result = src_reg(dst);
+
+   emit_constant_values(&dst, ir);
 }
 
 void
-- 
cgit v1.2.3


From 160a5a3ff0fc826a2978c6bea6de21b445f612e9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 21:53:00 -0700
Subject: i965/vs: Add support for VUEs larger than a single URB write.

Fixes glsl-max-varyings.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 42 +++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 8 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3562779413f..f90025c8e7e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1703,6 +1703,7 @@ vec4_visitor::emit_urb_writes()
    int base_mrf = 1;
    int mrf = base_mrf;
    int urb_entry_size;
+   uint64_t outputs_remaining = c->prog_data.outputs_written;
 
    /* FINISHME: edgeflag */
 
@@ -1717,11 +1718,14 @@ vec4_visitor::emit_urb_writes()
       mrf = emit_vue_header_gen4(mrf);
    }
 
+   /* Set up the VUE data for the first URB write */
    int attr;
    for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
       if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
 	 continue;
 
+      outputs_remaining &= ~BITFIELD64_BIT(attr);
+
       /* This is set up in the VUE header. */
       if (attr == VERT_RESULT_HPOS)
 	 continue;
@@ -1734,27 +1738,49 @@ vec4_visitor::emit_urb_writes()
 
       emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
 
-      /* If this is MRF 15, we can't fit anything more into this URB
+      /* If this was MRF 15, we can't fit anything more into this URB
        * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
        * even-numbered amount of URB write data, which will meet
        * gen6's requirements for length alignment.
        */
-      if (mrf == 15)
+      if (mrf == 16) {
+	 attr++;
 	 break;
+      }
    }
 
    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
    inst->base_mrf = base_mrf;
    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
-   inst->eot = true;
+   inst->eot = !outputs_remaining;
 
    urb_entry_size = mrf - base_mrf;
 
-   for (; attr < VERT_RESULT_MAX; attr++) {
-      if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
-	 continue;
-      fail("Second URB write not supported.\n");
-      break;
+   /* Optional second URB write */
+   if (outputs_remaining) {
+      mrf = base_mrf + 1;
+
+      for (; attr < VERT_RESULT_MAX; attr++) {
+	 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+	    continue;
+
+	 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+
+	 assert(mrf != 16);
+      }
+
+      inst = emit(VS_OPCODE_URB_WRITE);
+      inst->base_mrf = base_mrf;
+      inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+      inst->eot = true;
+      /* URB destination offset.  In the previous write, we got MRFs 2-
+       * 15 MRFs minus the one header MRF, so 14 regs.  URB offset is in
+       * URB row increments, and each of our MRFs is half of one of
+       * those, since we're doing interleaved writes.
+       */
+      inst->offset = 14 / 2;
+
+      urb_entry_size += mrf - base_mrf;
    }
 
    if (intel->gen == 6)
-- 
cgit v1.2.3


From e355b179b2bd42a585464f17759764083fa3ef26 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 10:43:49 -0700
Subject: i965: Remove dead brw->wm.max_threads field.

---
 src/mesa/drivers/dri/i965/brw_context.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 4a1abd6252e..38b13098bc0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -735,7 +735,6 @@ struct brw_context
       GLuint render_surf;
       GLuint nr_surfaces;      
 
-      GLuint max_threads;
       drm_intel_bo *scratch_bo;
 
       GLuint sampler_count;
-- 
cgit v1.2.3


From 2b224d66a01f3ce867fb05558b25749705bbfe7a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 10:44:15 -0700
Subject: i965: Set up allocation of a VS scratch space if required.

---
 src/mesa/drivers/dri/i965/brw_context.h |  6 ++++++
 src/mesa/drivers/dri/i965/brw_program.c | 28 ++++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vs.c      |  9 +++++++++
 src/mesa/drivers/dri/i965/brw_vs.h      |  1 +
 src/mesa/drivers/dri/i965/brw_wm.c      | 25 +++----------------------
 5 files changed, 47 insertions(+), 22 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 38b13098bc0..add8c568795 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -312,6 +312,7 @@ struct brw_vs_prog_data {
    GLuint total_grf;
    GLbitfield64 outputs_written;
    GLuint nr_params;       /**< number of float params/constants */
+   GLuint total_scratch;
 
    GLuint inputs_read;
 
@@ -671,6 +672,7 @@ struct brw_context
       struct brw_vs_prog_data *prog_data;
       int8_t *constant_map; /* variable array following prog_data */
 
+      drm_intel_bo *scratch_bo;
       drm_intel_bo *const_bo;
       /** Offset in the program cache to the VS program */
       uint32_t prog_offset;
@@ -858,6 +860,10 @@ void brw_validate_textures( struct brw_context *brw );
  */
 void brwInitFragProgFuncs( struct dd_function_table *functions );
 
+int brw_get_scratch_size(int size);
+void brw_get_scratch_bo(struct intel_context *intel,
+			drm_intel_bo **scratch_bo, int size);
+
 
 /* brw_urb.c
  */
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 6674f1640c8..09b5be4c96e 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -226,6 +226,34 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx,
    return GL_TRUE;
 }
 
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+int
+brw_get_scratch_size(int size)
+{
+   int i;
+
+   for (i = 1024; i < size; i *= 2)
+      ;
+
+   return i;
+}
+
+void
+brw_get_scratch_bo(struct intel_context *intel,
+		   drm_intel_bo **scratch_bo, int size)
+{
+   drm_intel_bo *old_bo = *scratch_bo;
+
+   if (old_bo && old_bo->size < size) {
+      drm_intel_bo_unreference(old_bo);
+      old_bo = NULL;
+   }
+
+   if (!old_bo) {
+      *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096);
+   }
+}
+
 void brwInitFragProgFuncs( struct dd_function_table *functions )
 {
    assert(functions->ProgramStringNotify == _tnl_program_string); 
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index bd0677db151..d389f602fba 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -45,6 +45,7 @@ static void do_vs_prog( struct brw_context *brw,
 			struct brw_vs_prog_key *key )
 {
    struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
    GLuint program_size;
    const GLuint *program;
    struct brw_vs_compile c;
@@ -97,6 +98,14 @@ static void do_vs_prog( struct brw_context *brw,
       brw_old_vs_emit(&c);
    }
 
+   /* Scratch space is used for register spilling */
+   if (c.last_scratch) {
+      c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);
+
+      brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
+			 c.prog_data.total_scratch * brw->vs_max_threads);
+   }
+
    /* get the program
     */
    program = brw_get_program(&c.func, &program_size);
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 9f9fed33970..83a37f5b800 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -66,6 +66,7 @@ struct brw_vs_compile {
    GLuint first_output;
    GLuint nr_outputs;
    GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+   GLuint last_scratch;
 
    GLuint first_tmp;
    GLuint last_tmp;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index d13ac6124c8..a4524fc7889 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -244,29 +244,10 @@ bool do_wm_prog(struct brw_context *brw,
 
    /* Scratch space is used for register spilling */
    if (c->last_scratch) {
-      uint32_t total_scratch;
+      c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch);
 
-      /* Per-thread scratch space is power-of-two sized. */
-      for (c->prog_data.total_scratch = 1024;
-	   c->prog_data.total_scratch <= c->last_scratch;
-	   c->prog_data.total_scratch *= 2) {
-	 /* empty */
-      }
-      total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
-
-      if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
-	 drm_intel_bo_unreference(brw->wm.scratch_bo);
-	 brw->wm.scratch_bo = NULL;
-      }
-      if (brw->wm.scratch_bo == NULL) {
-	 brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
-						 "wm scratch",
-						 total_scratch,
-						 4096);
-      }
-   }
-   else {
-      c->prog_data.total_scratch = 0;
+      brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
+			 c->prog_data.total_scratch * brw->wm_max_threads);
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_WM))
-- 
cgit v1.2.3


From 314c2574ff6e562a6cfc5fb84980f092e495a948 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 10:47:54 -0700
Subject: i965: Add remaining scratch space setup emit to unit states.

---
 src/mesa/drivers/dri/i965/brw_vs_state.c  | 10 ++++++++++
 src/mesa/drivers/dri/i965/gen6_vs_state.c | 10 +++++++++-
 src/mesa/drivers/dri/i965/gen7_vs_state.c | 10 +++++++++-
 src/mesa/drivers/dri/i965/gen7_wm_state.c |  8 +++++++-
 4 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index fc4373ab311..29b3e47ab0c 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -77,6 +77,16 @@ brw_prepare_vs_unit(struct brw_context *brw)
    else
       vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
 
+   if (brw->vs.prog_data->total_scratch != 0) {
+      vs->thread2.scratch_space_base_pointer =
+	 brw->vs.scratch_bo->offset >> 10; /* reloc */
+      vs->thread2.per_thread_scratch_space =
+	 ffs(brw->vs.prog_data->total_scratch) - 11;
+   } else {
+      vs->thread2.scratch_space_base_pointer = 0;
+      vs->thread2.per_thread_scratch_space = 0;
+   }
+
    vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
    vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
    vs->thread3.dispatch_grf_start_reg = 1;
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index affa72c7324..b94121e8437 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -160,7 +160,15 @@ upload_vs_state(struct brw_context *brw)
    OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
 	     GEN6_VS_FLOATING_POINT_MODE_ALT |
 	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
-   OUT_BATCH(0); /* scratch space base offset */
+
+   if (brw->vs.prog_data->total_scratch) {
+      OUT_RELOC(brw->vs.scratch_bo,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		ffs(brw->vs.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
+
    OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
 	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
 	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 0fad3d2fb68..f3cd5d15bf0 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -71,7 +71,15 @@ upload_vs_state(struct brw_context *brw)
    OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
 	     GEN6_VS_FLOATING_POINT_MODE_ALT |
 	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
-   OUT_BATCH(0); /* scratch space base offset */
+
+   if (brw->vs.prog_data->total_scratch) {
+      OUT_RELOC(brw->vs.scratch_bo,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		ffs(brw->vs.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
+
    OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
 	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
 	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 1d80e96778e..55a603e887a 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -228,7 +228,13 @@ upload_ps_state(struct brw_context *brw)
    OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
    OUT_BATCH(brw->wm.prog_offset);
    OUT_BATCH(dw2);
-   OUT_BATCH(0); /* scratch space base offset */
+   if (brw->wm.prog_data->total_scratch) {
+      OUT_RELOC(brw->wm.scratch_bo,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		ffs(brw->wm.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
    OUT_BATCH(dw4);
    OUT_BATCH(dw5);
    OUT_BATCH(0); /* kernel 1 pointer */
-- 
cgit v1.2.3


From 1ff4f11dd94711a498cde0330101c58636ef2741 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 10:59:39 -0700
Subject: i965/vs: Track the variable index of array accesses.

This isn't used currently, as we lower all array accesses.
---
 src/mesa/drivers/dri/i965/brw_vec4.h           |  4 ++++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 16 ++++++++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 3e457fc61aa..bb40c71e4c9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -169,6 +169,8 @@ public:
    GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
    bool negate;
    bool abs;
+
+   src_reg *reladdr;
 };
 
 class dst_reg : public reg
@@ -219,6 +221,8 @@ public:
    explicit dst_reg(src_reg reg);
 
    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+
+   src_reg *reladdr;
 };
 
 class vec4_instruction : public exec_node {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f90025c8e7e..8bd048ff459 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -37,6 +37,7 @@ src_reg::src_reg(dst_reg reg)
    this->reg = reg.reg;
    this->reg_offset = reg.reg_offset;
    this->type = reg.type;
+   this->reladdr = reg.reladdr;
 
    int swizzles[4];
    int next_chan = 0;
@@ -66,6 +67,7 @@ dst_reg::dst_reg(src_reg reg)
    this->reg_offset = reg.reg_offset;
    this->type = reg.type;
    this->writemask = WRITEMASK_XYZW;
+   this->reladdr = reg.reladdr;
 }
 
 vec4_instruction *
@@ -1186,7 +1188,6 @@ vec4_visitor::visit(ir_dereference_array *ir)
    if (constant_index) {
       src.reg_offset += constant_index->value.i[0] * element_size;
    } else {
-#if 0 /* Variable array index */
       /* Variable index array dereference.  It eats the "vec4" of the
        * base of the array and an index that offsets the Mesa register
        * index.
@@ -1198,15 +1199,22 @@ vec4_visitor::visit(ir_dereference_array *ir)
       if (element_size == 1) {
 	 index_reg = this->result;
       } else {
-	 index_reg = src_reg(this, glsl_type::float_type);
+	 index_reg = src_reg(this, glsl_type::int_type);
 
 	 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
-	      this->result, src_reg_for_float(element_size));
+	      this->result, src_reg(element_size));
+      }
+
+      if (src.reladdr) {
+	 src_reg temp = src_reg(this, glsl_type::int_type);
+
+	 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
+
+	 index_reg = temp;
       }
 
       src.reladdr = ralloc(mem_ctx, src_reg);
       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
-#endif
    }
 
    /* If the type is smaller than a vec4, replicate the last channel out. */
-- 
cgit v1.2.3


From 758c3c2b4588f235def48b2f28c0479a70f7c194 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 15:21:25 -0700
Subject: i965/vs: Reserve MRF 14/15 for array loads/register unspilling.

---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 8bd048ff459..e01318af1ab 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1708,10 +1708,18 @@ align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
 void
 vec4_visitor::emit_urb_writes()
 {
+   /* MRF 0 is reserved for the debugger, so start with message header
+    * in MRF 1.
+    */
    int base_mrf = 1;
    int mrf = base_mrf;
    int urb_entry_size;
    uint64_t outputs_remaining = c->prog_data.outputs_written;
+   /* In the process of generating our URB write message contents, we
+    * may need to unspill a register or load from an array.  Those
+    * reads would use MRFs 14-15.
+    */
+   int max_usable_mrf = 13;
 
    /* FINISHME: edgeflag */
 
@@ -1751,7 +1759,7 @@ vec4_visitor::emit_urb_writes()
        * even-numbered amount of URB write data, which will meet
        * gen6's requirements for length alignment.
        */
-      if (mrf == 16) {
+      if (mrf > max_usable_mrf) {
 	 attr++;
 	 break;
       }
@@ -1772,21 +1780,21 @@ vec4_visitor::emit_urb_writes()
 	 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
 	    continue;
 
-	 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+	 assert(mrf < max_usable_mrf);
 
-	 assert(mrf != 16);
+	 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
       }
 
       inst = emit(VS_OPCODE_URB_WRITE);
       inst->base_mrf = base_mrf;
       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
       inst->eot = true;
-      /* URB destination offset.  In the previous write, we got MRFs 2-
-       * 15 MRFs minus the one header MRF, so 14 regs.  URB offset is in
+      /* URB destination offset.  In the previous write, we got MRFs
+       * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
        * URB row increments, and each of our MRFs is half of one of
        * those, since we're doing interleaved writes.
        */
-      inst->offset = 14 / 2;
+      inst->offset = (max_usable_mrf - base_mrf) / 2;
 
       urb_entry_size += mrf - base_mrf;
    }
-- 
cgit v1.2.3


From d0e4d71070cd7fa197ed98612782484ec1f27123 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 12:15:26 -0700
Subject: i965/vs: Move virtual GRFs with array accesses to them to scratch
 space.

---
 src/mesa/drivers/dri/i965/brw_defines.h        |   2 +
 src/mesa/drivers/dri/i965/brw_vec4.h           |  12 ++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    |  10 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 163 +++++++++++++++++++++++++
 4 files changed, 186 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index e3823c65d1a..b740d87c933 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -640,6 +640,8 @@ enum opcode {
    FS_OPCODE_PULL_CONSTANT_LOAD,
 
    VS_OPCODE_URB_WRITE,
+   VS_OPCODE_SCRATCH_READ,
+   VS_OPCODE_SCRATCH_WRITE,
 };
 
 #define BRW_PREDICATE_NONE             0
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index bb40c71e4c9..2f171b72049 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -376,6 +376,7 @@ public:
    void setup_payload();
    void reg_allocate_trivial();
    void reg_allocate();
+   void move_grf_array_access_to_scratch();
 
    vec4_instruction *emit(enum opcode opcode);
 
@@ -424,6 +425,17 @@ public:
    int emit_vue_header_gen4(int header_mrf);
    void emit_urb_writes(void);
 
+   src_reg get_scratch_offset(vec4_instruction *inst,
+			      src_reg *reladdr, int reg_offset);
+   void emit_scratch_read(vec4_instruction *inst,
+			  dst_reg dst,
+			  src_reg orig_src,
+			  int base_offset);
+   void emit_scratch_write(vec4_instruction *inst,
+			   src_reg temp,
+			   dst_reg orig_dst,
+			   int base_offset);
+
    GLboolean try_emit_sat(ir_expression *ir);
 
    bool process_move_condition(ir_rvalue *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index bc3110b0458..57eb467567e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -321,7 +321,7 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
 bool
 vec4_visitor::run()
 {
-   /* Generate FS IR for main().  (the visitor only descends into
+   /* Generate VS IR for main().  (the visitor only descends into
     * functions called "main").
     */
    foreach_iter(exec_list_iterator, iter, *shader->ir) {
@@ -332,6 +332,14 @@ vec4_visitor::run()
 
    emit_urb_writes();
 
+   /* Before any optimization, push array accesses out to scratch
+    * space where we need them to be.  This pass may allocate new
+    * virtual GRFs, so we want to do it early.  It also makes sure
+    * that we have reladdr computations available for CSE, since we'll
+    * often do repeated subexpressions for those.
+    */
+   move_grf_array_access_to_scratch();
+
    if (failed)
       return false;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index e01318af1ab..049af6c3992 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1805,6 +1805,169 @@ vec4_visitor::emit_urb_writes()
       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
 }
 
+src_reg
+vec4_visitor::get_scratch_offset(vec4_instruction *inst,
+				 src_reg *reladdr, int reg_offset)
+{
+   /* Because we store the values to scratch interleaved like our
+    * vertex data, we need to scale the vec4 index by 2.
+    */
+   int message_header_scale = 2;
+
+   /* Pre-gen6, the message header uses byte offsets instead of vec4
+    * (16-byte) offset units.
+    */
+   if (intel->gen < 6)
+      message_header_scale *= 16;
+
+   if (reladdr) {
+      src_reg index = src_reg(this, glsl_type::int_type);
+
+      vec4_instruction *add = emit(BRW_OPCODE_ADD,
+				   dst_reg(index),
+				   *reladdr,
+				   src_reg(reg_offset));
+      /* Move our new instruction from the tail to its correct place. */
+      add->remove();
+      inst->insert_before(add);
+
+      vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
+				   index, src_reg(message_header_scale));
+      mul->remove();
+      inst->insert_before(mul);
+
+      return index;
+   } else {
+      return src_reg(reg_offset * message_header_scale);
+   }
+}
+
+/**
+ * Emits an instruction before @inst to load the value named by @orig_src
+ * from scratch space at @base_offset to @temp.
+ */
+void
+vec4_visitor::emit_scratch_read(vec4_instruction *inst,
+				dst_reg temp, src_reg orig_src,
+				int base_offset)
+{
+   int reg_offset = base_offset + orig_src.reg_offset;
+   src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
+
+   vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
+					      temp, index);
+
+   scratch_read_inst->base_mrf = 14;
+   scratch_read_inst->mlen = 1;
+   /* Move our instruction from the tail to its correct place. */
+   scratch_read_inst->remove();
+   inst->insert_before(scratch_read_inst);
+}
+
+/**
+ * Emits an instruction after @inst to store the value to be written
+ * to @orig_dst to scratch space at @base_offset, from @temp.
+ */
+void
+vec4_visitor::emit_scratch_write(vec4_instruction *inst,
+				 src_reg temp, dst_reg orig_dst,
+				 int base_offset)
+{
+   int reg_offset = base_offset + orig_dst.reg_offset;
+   src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
+
+   dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
+				       orig_dst.writemask));
+   vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
+					       dst, temp, index);
+   scratch_write_inst->base_mrf = 13;
+   scratch_write_inst->mlen = 2;
+   scratch_write_inst->predicate = inst->predicate;
+   /* Move our instruction from the tail to its correct place. */
+   scratch_write_inst->remove();
+   inst->insert_after(scratch_write_inst);
+}
+
+/**
+ * We can't generally support array access in GRF space, because a
+ * single instruction's destination can only span 2 contiguous
+ * registers.  So, we send all GRF arrays that get variable index
+ * access to scratch space.
+ */
+void
+vec4_visitor::move_grf_array_access_to_scratch()
+{
+   int scratch_loc[this->virtual_grf_count];
+
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      scratch_loc[i] = -1;
+   }
+
+   /* First, calculate the set of virtual GRFs that need to be punted
+    * to scratch due to having any array access on them, and where in
+    * scratch.
+    */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      if (inst->dst.file == GRF && inst->dst.reladdr &&
+	  scratch_loc[inst->dst.reg] == -1) {
+	 scratch_loc[inst->dst.reg] = c->last_scratch;
+	 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
+      }
+
+      for (int i = 0 ; i < 3; i++) {
+	 src_reg *src = &inst->src[i];
+
+	 if (src->file == GRF && src->reladdr &&
+	     scratch_loc[src->reg] == -1) {
+	    scratch_loc[src->reg] = c->last_scratch;
+	    c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
+	 }
+      }
+   }
+
+   /* Now, for anything that will be accessed through scratch, rewrite
+    * it to load/store.  Note that this is a _safe list walk, because
+    * we may generate a new scratch_write instruction after the one
+    * we're processing.
+    */
+   foreach_list_safe(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      /* Set up the annotation tracking for new generated instructions. */
+      base_ir = inst->ir;
+      current_annotation = inst->annotation;
+
+      if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
+	 src_reg temp = src_reg(this, glsl_type::vec4_type);
+
+	 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
+
+	 inst->dst.file = temp.file;
+	 inst->dst.reg = temp.reg;
+	 inst->dst.reg_offset = temp.reg_offset;
+	 inst->dst.reladdr = NULL;
+      }
+
+      for (int i = 0 ; i < 3; i++) {
+	 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
+	    continue;
+
+	 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+
+	 emit_scratch_read(inst, temp, inst->src[i],
+			   scratch_loc[inst->src[i].reg]);
+
+	 inst->src[i].file = temp.file;
+	 inst->src[i].reg = temp.reg;
+	 inst->src[i].reg_offset = temp.reg_offset;
+	 inst->src[i].reladdr = NULL;
+      }
+   }
+}
+
+
 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
 			   struct gl_shader_program *prog,
 			   struct brw_shader *shader)
-- 
cgit v1.2.3


From 0f22f98ccd69bb5e8df3c78203bce9bc630965c1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 13:16:06 -0700
Subject: i965: Make some EU emit code for DP read/write messages non-static.

We keep building these strange interfaces for DP read/write where
there's a helper function with some partially-specific,
partially-general controls, which is used in exactly one place in code
generation.  Making these public will let us set up those instructions
in the one place they're to be generated.
---
 src/mesa/drivers/dri/i965/brw_eu.h      | 27 ++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 44 ++++++++++++++++-----------------
 2 files changed, 49 insertions(+), 22 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 38dd99b693d..af50305fc2b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -801,6 +801,12 @@ void brw_init_compile(struct brw_context *, struct brw_compile *p,
 		      void *mem_ctx);
 const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
 
+struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
+void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+		  struct brw_reg dest);
+void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+		  struct brw_reg reg);
+
 
 /* Helpers for regular instructions:
  */
@@ -855,6 +861,27 @@ ROUND(RNDE)
 
 /* Helpers for SEND instruction:
  */
+void brw_set_dp_read_message(struct brw_compile *p,
+			     struct brw_instruction *insn,
+			     GLuint binding_table_index,
+			     GLuint msg_control,
+			     GLuint msg_type,
+			     GLuint target_cache,
+			     GLuint msg_length,
+			     GLuint response_length);
+
+void brw_set_dp_write_message(struct brw_compile *p,
+			      struct brw_instruction *insn,
+			      GLuint binding_table_index,
+			      GLuint msg_control,
+			      GLuint msg_type,
+			      GLuint msg_length,
+			      GLboolean header_present,
+			      GLuint pixel_scoreboard_clear,
+			      GLuint response_length,
+			      GLuint end_of_thread,
+			      GLuint send_commit_msg);
+
 void brw_urb_WRITE(struct brw_compile *p,
 		   struct brw_reg dest,
 		   GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index e7370f36064..b08906426e4 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -89,9 +89,9 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
 }
 
 
-static void brw_set_dest(struct brw_compile *p,
-			 struct brw_instruction *insn,
-			 struct brw_reg dest)
+void
+brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+	     struct brw_reg dest)
 {
    if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
        dest.file != BRW_MESSAGE_REGISTER_FILE)
@@ -221,9 +221,9 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg)
    /* 10. Check destination issues. */
 }
 
-static void brw_set_src0(struct brw_compile *p,
-			 struct brw_instruction *insn,
-			 struct brw_reg reg)
+void
+brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+	     struct brw_reg reg)
 {
    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
@@ -504,17 +504,18 @@ static void brw_set_urb_message( struct brw_compile *p,
     }
 }
 
-static void brw_set_dp_write_message( struct brw_compile *p,
-				      struct brw_instruction *insn,
-				      GLuint binding_table_index,
-				      GLuint msg_control,
-				      GLuint msg_type,
-				      GLuint msg_length,
-				      GLboolean header_present,
-				      GLuint pixel_scoreboard_clear,
-				      GLuint response_length,
-				      GLuint end_of_thread,
-				      GLuint send_commit_msg)
+void
+brw_set_dp_write_message(struct brw_compile *p,
+			 struct brw_instruction *insn,
+			 GLuint binding_table_index,
+			 GLuint msg_control,
+			 GLuint msg_type,
+			 GLuint msg_length,
+			 GLboolean header_present,
+			 GLuint pixel_scoreboard_clear,
+			 GLuint response_length,
+			 GLuint end_of_thread,
+			 GLuint send_commit_msg)
 {
    struct brw_context *brw = p->brw;
    struct intel_context *intel = &brw->intel;
@@ -570,7 +571,7 @@ static void brw_set_dp_write_message( struct brw_compile *p,
    }
 }
 
-static void
+void
 brw_set_dp_read_message(struct brw_compile *p,
 			struct brw_instruction *insn,
 			GLuint binding_table_index,
@@ -709,9 +710,9 @@ static void brw_set_sampler_message(struct brw_compile *p,
 }
 
 
-
-static struct brw_instruction *next_insn( struct brw_compile *p, 
-					  GLuint opcode )
+#define next_insn brw_next_insn
+struct brw_instruction *
+brw_next_insn(struct brw_compile *p, GLuint opcode)
 {
    struct brw_instruction *insn;
 
@@ -732,7 +733,6 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
    return insn;
 }
 
-
 static struct brw_instruction *brw_alu1( struct brw_compile *p,
 					 GLuint opcode,
 					 struct brw_reg dest,
-- 
cgit v1.2.3


From 584ff407482fd3baf5ce081dbbf9653eb76c40f1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 13:36:11 -0700
Subject: i965/vs: Add support for scratch read/write codegen.

---
 src/mesa/drivers/dri/i965/brw_vec4.h        |   9 ++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 144 +++++++++++++++++++++++++++-
 2 files changed, 151 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 2f171b72049..b5f442e6d21 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -451,6 +451,15 @@ public:
 			    struct brw_reg dst,
 			    struct brw_reg src);
    void generate_urb_write(vec4_instruction *inst);
+   void generate_oword_dual_block_offsets(struct brw_reg m1,
+					  struct brw_reg index);
+   void generate_scratch_write(vec4_instruction *inst,
+			       struct brw_reg dst,
+			       struct brw_reg src,
+			       struct brw_reg index);
+   void generate_scratch_read(vec4_instruction *inst,
+			      struct brw_reg dst,
+			      struct brw_reg index);
 };
 
 } /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 57eb467567e..21830f99fc2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -1,5 +1,4 @@
-/*
- * Copyright © 2011 Intel Corporation
+/* Copyright © 2011 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -278,6 +277,139 @@ vec4_visitor::generate_urb_write(vec4_instruction *inst)
 		 BRW_URB_SWIZZLE_INTERLEAVE);
 }
 
+void
+vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1,
+						struct brw_reg index)
+{
+   int second_vertex_offset;
+
+   if (intel->gen >= 6)
+      second_vertex_offset = 1;
+   else
+      second_vertex_offset = 16;
+
+   m1 = retype(m1, BRW_REGISTER_TYPE_D);
+
+   /* Set up M1 (message payload).  Only the block offsets in M1.0 and
+    * M1.4 are used, and the rest are ignored.
+    */
+   struct brw_reg m1_0 = suboffset(vec1(m1), 0);
+   struct brw_reg m1_4 = suboffset(vec1(m1), 4);
+   struct brw_reg index_0 = suboffset(vec1(index), 0);
+   struct brw_reg index_4 = suboffset(vec1(index), 4);
+
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+
+   brw_MOV(p, m1_0, index_0);
+
+   brw_set_predicate_inverse(p, true);
+   if (index.file == BRW_IMMEDIATE_VALUE) {
+      index_4.dw1.ud++;
+      brw_MOV(p, m1_4, index_4);
+   } else {
+      brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
+   }
+
+   brw_pop_insn_state(p);
+}
+
+void
+vec4_visitor::generate_scratch_read(vec4_instruction *inst,
+				    struct brw_reg dst,
+				    struct brw_reg index)
+{
+   if (intel->gen >= 6) {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_MOV(p,
+	      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
+	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
+      brw_pop_insn_state(p);
+   }
+
+   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+				     index);
+
+   uint32_t msg_type;
+
+   if (intel->gen >= 6)
+      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   else if (intel->gen == 5 || intel->is_g4x)
+      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   else
+      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+
+   /* Each of the 8 channel enables is considered for whether each
+    * dword is written.
+    */
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
+   brw_set_dp_read_message(p, send,
+			   255, /* binding table index: stateless access */
+			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+			   msg_type,
+			   BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+			   2, /* mlen */
+			   1 /* rlen */);
+}
+
+void
+vec4_visitor::generate_scratch_write(vec4_instruction *inst,
+				     struct brw_reg dst,
+				     struct brw_reg src,
+				     struct brw_reg index)
+{
+   /* If the instruction is predicated, we'll predicate the send, not
+    * the header setup.
+    */
+   brw_set_predicate_control(p, false);
+
+   if (intel->gen >= 6) {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_MOV(p,
+	      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
+	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
+      brw_pop_insn_state(p);
+   }
+
+   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+				     index);
+
+   brw_MOV(p,
+	   retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
+	   retype(src, BRW_REGISTER_TYPE_D));
+
+   uint32_t msg_type;
+
+   if (intel->gen >= 6)
+      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+   else
+      msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+
+   brw_set_predicate_control(p, inst->predicate);
+
+   /* Each of the 8 channel enables is considered for whether each
+    * dword is written.
+    */
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
+   brw_set_dp_write_message(p, send,
+			    255, /* binding table index: stateless access */
+			    BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+			    msg_type,
+			    3, /* mlen */
+			    true, /* header present */
+			    false, /* pixel scoreboard */
+			    0, /* rlen */
+			    false, /* eot */
+			    false /* commit */);
+}
+
 void
 vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
 				      struct brw_reg dst,
@@ -308,6 +440,14 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
       generate_urb_write(inst);
       break;
 
+   case VS_OPCODE_SCRATCH_READ:
+      generate_scratch_read(inst, dst, src[0]);
+      break;
+
+   case VS_OPCODE_SCRATCH_WRITE:
+      generate_scratch_write(inst, dst, src[0], src[1]);
+      break;
+
    default:
       if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
 	 fail("unsupported opcode in `%s' in VS\n",
-- 
cgit v1.2.3


From 54fa706d6f06955221cb6b452b5b170bfaaceef4 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 13:38:50 -0700
Subject: i965/vs: Enable variable array indexing in the VS.

---
 src/mesa/drivers/dri/i965/brw_shader.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 2eeeec25cac..2dc32c95610 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -111,12 +111,14 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       brw_do_cubemap_normalize(shader->ir);
       lower_noise(shader->ir);
       lower_quadop_vector(shader->ir, false);
+
+      bool input = true;
+      bool output = stage == MESA_SHADER_FRAGMENT;
+      bool temp = stage == MESA_SHADER_FRAGMENT;
+      bool uniform = true;
+
       lower_variable_index_to_cond_assign(shader->ir,
-					  GL_TRUE, /* input */
-					  GL_TRUE, /* output */
-					  GL_TRUE, /* temp */
-					  GL_TRUE /* uniform */
-					  );
+					  input, output, temp, uniform);
 
       do {
 	 progress = false;
-- 
cgit v1.2.3


From e94bdbe04a4f0adb73ab92153987f0c9f48814f7 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 17:09:12 -0700
Subject: i965: Add gen6 disassembly for DP render cache messages.

---
 src/mesa/drivers/dri/i965/brw_disasm.c | 49 +++++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index af41c848308..927b0b4acc9 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -309,6 +309,35 @@ char *target_function[16] = {
     [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
 };
 
+char *target_function_gen6[16] = {
+    [BRW_MESSAGE_TARGET_NULL] = "null",
+    [BRW_MESSAGE_TARGET_MATH] = "math",
+    [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+    [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+    [GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE] = "sampler",
+    [GEN6_MESSAGE_TARGET_DP_RENDER_CACHE] = "render",
+    [GEN6_MESSAGE_TARGET_DP_CONST_CACHE] = "const",
+    [BRW_MESSAGE_TARGET_URB] = "urb",
+    [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *dp_rc_msg_type_gen6[16] = {
+    [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
+    [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
+    [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
+    [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
+};
+
 char *math_function[16] = {
     [BRW_MATH_FUNCTION_INV] = "inv",
     [BRW_MATH_FUNCTION_LOG] = "log",
@@ -927,8 +956,14 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
 	newline (file);
 	pad (file, 16);
 	space = 0;
-	err |= control (file, "target function", target_function,
-			target, &space);
+
+	if (gen >= 6) {
+	   err |= control (file, "target function", target_function_gen6,
+			   target, &space);
+	} else {
+	   err |= control (file, "target function", target_function,
+			   target, &space);
+	}
 
 	switch (target) {
 	case BRW_MESSAGE_TARGET_MATH:
@@ -985,9 +1020,16 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
 			inst->bits3.dp_read.msg_type);
 	    }
 	    break;
+
 	case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
 	    if (gen >= 6) {
-		format (file, " (%d, %d, %d, %d, %d, %d)",
+		format (file, " (");
+
+		err |= control (file, "DP rc message type",
+				dp_rc_msg_type_gen6,
+				inst->bits3.gen6_dp.msg_type, &space);
+
+		format (file, ", %d, %d, %d, %d, %d, %d)",
 			inst->bits3.gen6_dp.binding_table_index,
 			inst->bits3.gen6_dp.msg_control,
 			inst->bits3.gen6_dp.msg_type,
@@ -1003,6 +1045,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
 			inst->bits3.dp_write.send_commit_msg);
 	    }
 	    break;
+
 	case BRW_MESSAGE_TARGET_URB:
 	    if (gen >= 5) {
 		format (file, " %d", inst->bits3.urb_gen5.offset);
-- 
cgit v1.2.3


From 7b91eefe7cbe771397684b5970f7c04313baa2f0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 8 Aug 2011 15:56:11 -0700
Subject: i965/vs: Slightly improve the trivial reg allocator to skip unused
 regs.

This fixes most of the regressions in the vs array test set from the
varying array indexing work, since the giant array that was originally
allocated in virtual GRF space never gets used and is only ever
read/stored from scratch space.
---
 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp     | 26 ++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index e7f6b28a536..1bfd84d76e8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -41,15 +41,37 @@ vec4_visitor::reg_allocate_trivial()
 {
    int last_grf = 0;
    int hw_reg_mapping[this->virtual_grf_count];
+   bool virtual_grf_used[this->virtual_grf_count];
    int i;
    int next;
 
+   /* Calculate which virtual GRFs are actually in use after whatever
+    * optimization passes have occurred.
+    */
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      virtual_grf_used[i] = false;
+   }
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+      if (inst->dst.file == GRF)
+	 virtual_grf_used[inst->dst.reg] = true;
+
+      for (int i = 0; i < 3; i++) {
+	 if (inst->src[i].file == GRF)
+	    virtual_grf_used[inst->src[i].reg] = true;
+      }
+   }
+
    /* Note that compressed instructions require alignment to 2 registers. */
    hw_reg_mapping[0] = this->first_non_payload_grf;
    next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
    for (i = 1; i < this->virtual_grf_count; i++) {
-      hw_reg_mapping[i] = next;
-      next += this->virtual_grf_sizes[i];
+      if (virtual_grf_used[i]) {
+	 hw_reg_mapping[i] = next;
+	 next += this->virtual_grf_sizes[i];
+      }
    }
    prog_data->total_grf = next;
 
-- 
cgit v1.2.3


From 6408b0295f5c8be6fea891a025d79752484721b6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 10:57:09 -0700
Subject: i965/vs: Fix implementation of ir_unop_any.

We were inheriting whatever previous predicate existed.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 049af6c3992..fde1d67759a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -998,7 +998,9 @@ vec4_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_any:
-      emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+      inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
       emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
 
       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
-- 
cgit v1.2.3


From 250770b74d33bb8625c780a74a89477af033d13a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 11:00:28 -0700
Subject: i965/vs: Respect the gen6 limitation that math opcodes can't be
 align16.

Fixes vs-acos-vec3 and friends.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    |  9 +++++++++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 26 ++++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 21830f99fc2..effc82a8004 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -250,6 +250,14 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
 				  struct brw_reg dst,
 				  struct brw_reg src)
 {
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+   /* Source swizzles are ignored. */
+   assert(!src.abs);
+   assert(!src.negate);
+   assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
    brw_math(p,
 	    dst,
 	    brw_math_function(inst->opcode),
@@ -258,6 +266,7 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
 	    src,
 	    BRW_MATH_DATA_SCALAR,
 	    BRW_MATH_PRECISION_FULL);
+   brw_set_access_mode(p, BRW_ALIGN_16);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index fde1d67759a..f4756a9a1a8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -129,7 +129,18 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
    emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
 
-   emit(opcode, dst, temp_src);
+   if (dst.writemask != WRITEMASK_XYZW) {
+      /* The gen6 math instruction must be align1, so we can't do
+       * writemasks.
+       */
+      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+
+      emit(opcode, temp_dst, temp_src);
+
+      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
+   } else {
+      emit(opcode, dst, temp_src);
+   }
 }
 
 void
@@ -184,7 +195,18 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode,
    emit(BRW_OPCODE_MOV, dst, src1);
    src1 = expanded;
 
-   emit(opcode, dst, src0, src1);
+   if (dst.writemask != WRITEMASK_XYZW) {
+      /* The gen6 math instruction must be align1, so we can't do
+       * writemasks.
+       */
+      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+
+      emit(opcode, temp_dst, src0, src1);
+
+      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
+   } else {
+      emit(opcode, dst, src0, src1);
+   }
 }
 
 void
-- 
cgit v1.2.3


From abf843a797876b5e3c5c91dbec25b6553d2cc281 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 12:30:41 -0700
Subject: i965/vs: Add support for ir_binop_pow.

Fixes vs-pow-float-float.
---
 src/mesa/drivers/dri/i965/brw_vec4.h           | 10 ++++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    | 63 ++++++++++++++++++++++++--
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  4 +-
 3 files changed, 70 insertions(+), 7 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index b5f442e6d21..082021513d2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -444,12 +444,22 @@ public:
    void generate_vs_instruction(vec4_instruction *inst,
 				struct brw_reg dst,
 				struct brw_reg *src);
+
    void generate_math1_gen4(vec4_instruction *inst,
 			    struct brw_reg dst,
 			    struct brw_reg src);
    void generate_math1_gen6(vec4_instruction *inst,
 			    struct brw_reg dst,
 			    struct brw_reg src);
+   void generate_math2_gen4(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src0,
+			    struct brw_reg src1);
+   void generate_math2_gen6(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src0,
+			    struct brw_reg src1);
+
    void generate_urb_write(vec4_instruction *inst);
    void generate_oword_dual_block_offsets(struct brw_reg m1,
 					  struct brw_reg index);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index effc82a8004..df9521cd04e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -245,6 +245,15 @@ vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
 	    BRW_MATH_PRECISION_FULL);
 }
 
+static void
+check_gen6_math_src_arg(struct brw_reg src)
+{
+   /* Source swizzles are ignored. */
+   assert(!src.abs);
+   assert(!src.negate);
+   assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
+}
+
 void
 vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
 				  struct brw_reg dst,
@@ -252,10 +261,7 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
 {
    /* Can't do writemask because math can't be align16. */
    assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
-   /* Source swizzles are ignored. */
-   assert(!src.abs);
-   assert(!src.negate);
-   assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
+   check_gen6_math_src_arg(src);
 
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_math(p,
@@ -269,6 +275,49 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
    brw_set_access_mode(p, BRW_ALIGN_16);
 }
 
+void
+vec4_visitor::generate_math2_gen6(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src0,
+				  struct brw_reg src1)
+{
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+   /* Source swizzles are ignored. */
+   check_gen6_math_src_arg(src0);
+   check_gen6_math_src_arg(src1);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math2(p,
+	     dst,
+	     brw_math_function(inst->opcode),
+	     src0, src1);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_math2_gen4(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src0,
+				  struct brw_reg src1)
+{
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+
+   brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math(p,
+	    dst,
+	    brw_math_function(inst->opcode),
+	    BRW_MATH_SATURATE_NONE,
+	    inst->base_mrf,
+	    src0,
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
 void
 vec4_visitor::generate_urb_write(vec4_instruction *inst)
 {
@@ -442,7 +491,11 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
       break;
 
    case SHADER_OPCODE_POW:
-      assert(!"finishme");
+      if (intel->gen >= 6) {
+	 generate_math2_gen6(inst, dst, src[0], src[1]);
+      } else {
+	 generate_math2_gen4(inst, dst, src[0], src[1]);
+      }
       break;
 
    case VS_OPCODE_URB_WRITE:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f4756a9a1a8..f9447d7c391 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -188,11 +188,11 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode,
     */
 
    expanded = src_reg(this, glsl_type::vec4_type);
-   emit(BRW_OPCODE_MOV, dst, src0);
+   emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
    src0 = expanded;
 
    expanded = src_reg(this, glsl_type::vec4_type);
-   emit(BRW_OPCODE_MOV, dst, src1);
+   emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
    src1 = expanded;
 
    if (dst.writemask != WRITEMASK_XYZW) {
-- 
cgit v1.2.3


From 0b359e3ea015576d0e75bf5ec19aceef337311a3 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 14:35:38 -0700
Subject: i965/vs: Add support for loops.

This is copied from brw_fs.cpp, instead of doing the temporary IR
generation that ir_to_mesa does.  Fixes glsl-vs-loop and friends.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 53 ++++++++++----------------
 1 file changed, 21 insertions(+), 32 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f9447d7c391..e11ec40cc7b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -700,58 +700,47 @@ vec4_visitor::visit(ir_variable *ir)
 void
 vec4_visitor::visit(ir_loop *ir)
 {
-   ir_dereference_variable *counter = NULL;
-
-   fail("not yet\n");
+   dst_reg counter;
 
    /* We don't want debugging output to print the whole body of the
     * loop as the annotation.
     */
    this->base_ir = NULL;
 
-   if (ir->counter != NULL)
-      counter = new(ir) ir_dereference_variable(ir->counter);
-
-   if (ir->from != NULL) {
-      assert(ir->counter != NULL);
+   if (ir->counter != NULL) {
+      this->base_ir = ir->counter;
+      ir->counter->accept(this);
+      counter = *(variable_storage(ir->counter));
 
-      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
+      if (ir->from != NULL) {
+	 this->base_ir = ir->from;
+	 ir->from->accept(this);
 
-      a->accept(this);
-      delete a;
+	 emit(BRW_OPCODE_MOV, counter, this->result);
+      }
    }
 
    emit(BRW_OPCODE_DO);
 
    if (ir->to) {
-      ir_expression *e =
-	 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
-			       counter, ir->to);
-      ir_if *if_stmt =  new(ir) ir_if(e);
-
-      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
-
-      if_stmt->then_instructions.push_tail(brk);
+      this->base_ir = ir->to;
+      ir->to->accept(this);
 
-      if_stmt->accept(this);
+      vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
+				    src_reg(counter), this->result);
+      inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
 
-      delete if_stmt;
-      delete e;
-      delete brk;
+      inst = emit(BRW_OPCODE_BREAK);
+      inst->predicate = BRW_PREDICATE_NORMAL;
    }
 
    visit_instructions(&ir->body_instructions);
 
-   if (ir->increment) {
-      ir_expression *e =
-	 new(ir) ir_expression(ir_binop_add, counter->type,
-			       counter, ir->increment);
-
-      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
 
-      a->accept(this);
-      delete a;
-      delete e;
+   if (ir->increment) {
+      this->base_ir = ir->increment;
+      ir->increment->accept(this);
+      emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
    }
 
    emit(BRW_OPCODE_WHILE);
-- 
cgit v1.2.3


From fea7d34b3545878ce00914f388e1eeebf55f7748 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 14:49:29 -0700
Subject: i965/vs: Fix builtin uniform setup.

I want to intelligently pack them at some point, but for now we have
the params set up in groups of 4.  Fixes glsl-vs-normalscale.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index e11ec40cc7b..93252f73285 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -443,13 +443,12 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
       int last_swiz = -1;
       for (unsigned int j = 0; j < 4; j++) {
 	 int swiz = GET_SWZ(slots[i].swizzle, j);
-	 if (swiz == last_swiz)
-	    break;
 	 last_swiz = swiz;
 
 	 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
 	 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
-	 this->uniform_size[this->uniforms]++;
+	 if (swiz <= last_swiz)
+	    this->uniform_size[this->uniforms]++;
       }
       this->uniforms++;
    }
-- 
cgit v1.2.3


From a55fbbc1a2b579aed1e80036367b521ef6928f66 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 15:08:47 -0700
Subject: i965/vs: Fix access of attribute arrays.

By leaving out the column index, we were reading an unallocated
attribute on glsl-mat-attribute.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index df9521cd04e..517a3e3c75b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -52,8 +52,9 @@ vec4_visitor::setup_attributes(int payload_reg)
 	 if (inst->src[i].file != ATTR)
 	    continue;
 
+	 int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset];
 	 inst->src[i].file = HW_REG;
-	 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0);
+	 inst->src[i].fixed_hw_reg = brw_vec8_grf(grf, 0);
 	 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
       }
    }
-- 
cgit v1.2.3


From aed5e353e95f47773864c6e61c506b9ddad0e2e9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 15:19:26 -0700
Subject: i965/vs: Clamp vertex color outputs when required by
 ARB_color_buffer_float.

Fixes glsl-vs-vertex-color.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 93252f73285..2a1f003b5ce 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1764,7 +1764,16 @@ vec4_visitor::emit_urb_writes()
       if (attr == VERT_RESULT_PSIZ)
 	 continue;
 
-      emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+      vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++),
+				    src_reg(output_reg[attr]));
+
+      if ((attr == VERT_RESULT_COL0 ||
+	   attr == VERT_RESULT_COL1 ||
+	   attr == VERT_RESULT_BFC0 ||
+	   attr == VERT_RESULT_BFC1) &&
+	  c->key.clamp_vertex_color) {
+	 inst->saturate = true;
+      }
 
       /* If this was MRF 15, we can't fit anything more into this URB
        * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
-- 
cgit v1.2.3


From 072d64121e13ad6bcb9b703090de1ee4a59f7096 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 10 Aug 2011 11:38:42 -0700
Subject: i965/vs: Add support for GL_FIXED attributes.

Fixes arb_es2_compatibility-fixed-type
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 517a3e3c75b..350d544aba3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -42,6 +42,18 @@ vec4_visitor::setup_attributes(int payload_reg)
       if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
 	 attribute_map[i] = payload_reg + nr_attributes;
 	 nr_attributes++;
+
+	 /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED
+	  * attributes come in as floating point conversions of the
+	  * integer values.
+	  */
+	 if (c->key.gl_fixed_input_size[i] != 0) {
+	    struct brw_reg reg = brw_vec8_grf(attribute_map[i], 0);
+
+	    brw_MUL(p,
+		    brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1),
+		    reg, brw_imm_f(1.0 / 65536.0));
+	 }
       }
    }
 
-- 
cgit v1.2.3


From 193a9a209d5121e2c20f1d20c61587b1e3d0603d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 10 Aug 2011 14:13:23 -0700
Subject: i965/vs: Add support for if(any(bvec)) on gen6.

---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 2a1f003b5ce..d1888579597 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -564,10 +564,6 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
 
       assert(expr->get_num_operands() <= 2);
       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
-	 assert(expr->operands[i]->type->is_scalar() ||
-		expr->operation == ir_binop_any_nequal ||
-		expr->operation == ir_binop_all_equal);
-
 	 expr->operands[i]->accept(this);
 	 op[i] = this->result;
       }
@@ -634,6 +630,14 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
 	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 	 return;
 
+      case ir_unop_any:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+	 inst = emit(BRW_OPCODE_IF);
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+	 return;
+
       default:
 	 assert(!"not reached");
 	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
-- 
cgit v1.2.3


From e8980c61b2932cd4c8791fcc5afdb54fa033c224 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 11 Aug 2011 09:17:18 -0700
Subject: i965/vs: Fix the trivial register allocator's failure path.

---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp         | 3 +++
 src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 5 ++---
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 350d544aba3..27160fb40d4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -561,6 +561,9 @@ vec4_visitor::run()
    setup_payload();
    reg_allocate();
 
+   if (failed)
+      return false;
+
    brw_set_access_mode(p, BRW_ALIGN_16);
 
    generate_code();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 1bfd84d76e8..d5fd21d99a4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -39,7 +39,6 @@ assign(int *reg_hw_locations, reg *reg)
 void
 vec4_visitor::reg_allocate_trivial()
 {
-   int last_grf = 0;
    int hw_reg_mapping[this->virtual_grf_count];
    bool virtual_grf_used[this->virtual_grf_count];
    int i;
@@ -84,9 +83,9 @@ vec4_visitor::reg_allocate_trivial()
       assign(hw_reg_mapping, &inst->src[2]);
    }
 
-   if (last_grf >= BRW_MAX_GRF) {
+   if (prog_data->total_grf > BRW_MAX_GRF) {
       fail("Ran out of regs on trivial allocator (%d/%d)\n",
-	   last_grf, BRW_MAX_GRF);
+	   prog_data->total_grf, BRW_MAX_GRF);
    }
 }
 
-- 
cgit v1.2.3


From d376fa8e84b044ead47586d1b56a10742bcbdac7 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 18:40:14 -0700
Subject: i965: Fix assertion failure on a loop consisting of while (true) {
 break }.

On enabling the precompile step in the VS, we tripped over this
assertion failure in glsl-link-bug-30552.
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index b08906426e4..f5cc09dd49b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2311,7 +2311,7 @@ brw_find_loop_end(struct brw_compile *p, int start)
       if (insn->header.opcode == BRW_OPCODE_WHILE) {
 	 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
 				   : insn->bits3.break_cont.jip;
-	 if (ip + jip / br < start)
+	 if (ip + jip / br <= start)
 	    return ip;
       }
    }
-- 
cgit v1.2.3


From 7fbe7fe13359d3f349664410ec73d7bd48824ed6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 11 Aug 2011 09:52:08 -0700
Subject: i965/vs: Run the shader backend at link time and return compile
 failures.

Link failure is something that shouldn't happen, but we sometimes want
it during development.  The precompile also allows analysis of shader
codegen with shader-db.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp           |  2 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp       |  4 ++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    | 12 ++----
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  2 +-
 src/mesa/drivers/dri/i965/brw_vs.c             | 51 ++++++++++++++++++++++----
 src/mesa/drivers/dri/i965/brw_vs.h             |  3 +-
 6 files changed, 54 insertions(+), 20 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 693ef0ce31a..b19c6e72fa6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1781,7 +1781,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
    fs_visitor v(c, prog, shader);
    if (!v.run()) {
       prog->LinkStatus = GL_FALSE;
-      prog->InfoLog = ralloc_strdup(prog, v.fail_msg);
+      ralloc_strcat(&prog->InfoLog, v.fail_msg);
 
       return false;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 2dc32c95610..3ff6bbaed47 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -24,6 +24,7 @@
 extern "C" {
 #include "main/macros.h"
 #include "brw_context.h"
+#include "brw_vs.h"
 }
 #include "brw_fs.h"
 #include "../glsl/ir_optimization.h"
@@ -67,6 +68,9 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
    if (!brw_fs_precompile(ctx, prog))
       return false;
 
+   if (!brw_vs_precompile(ctx, prog))
+      return false;
+
    return true;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 27160fb40d4..9ef6ab6de90 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -809,14 +809,8 @@ vec4_visitor::generate_code()
 extern "C" {
 
 bool
-brw_vs_emit(struct brw_vs_compile *c)
+brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c)
 {
-   struct brw_compile *p = &c->func;
-   struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
-   struct gl_context *ctx = &intel->ctx;
-   struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
-
    if (!prog)
       return false;
 
@@ -833,8 +827,8 @@ brw_vs_emit(struct brw_vs_compile *c)
 
    vec4_visitor v(c, prog, shader);
    if (!v.run()) {
-      /* FINISHME: Cleanly fail, test at link time, etc. */
-      assert(!"not reached");
+      prog->LinkStatus = GL_FALSE;
+      ralloc_strcat(&prog->InfoLog, v.fail_msg);
       return false;
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index d1888579597..b1792a8ee16 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2012,7 +2012,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
    this->current_annotation = NULL;
 
    this->c = c;
-   this->vp = brw->vertex_program; /* FINISHME: change for precompile */
+   this->vp = prog->VertexProgram;
    this->prog_data = &c->prog_data;
 
    this->variable_ht = hash_table_ctor(0,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index d389f602fba..3373e707d98 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -40,9 +40,11 @@
 
 #include "../glsl/ralloc.h"
 
-static void do_vs_prog( struct brw_context *brw, 
-			struct brw_vertex_program *vp,
-			struct brw_vs_prog_key *key )
+static bool
+do_vs_prog(struct brw_context *brw,
+	   struct gl_shader_program *prog,
+	   struct brw_vertex_program *vp,
+	   struct brw_vs_prog_key *key)
 {
    struct gl_context *ctx = &brw->intel.ctx;
    struct intel_context *intel = &brw->intel;
@@ -91,9 +93,11 @@ static void do_vs_prog( struct brw_context *brw,
    if (new_vs == -1)
       new_vs = getenv("INTEL_NEW_VS") != NULL;
 
-   if (new_vs) {
-      if (!brw_vs_emit(&c))
-	 brw_old_vs_emit(&c);
+   if (new_vs && prog) {
+      if (!brw_vs_emit(prog, &c)) {
+	 ralloc_free(mem_ctx);
+	 return false;
+      }
    } else {
       brw_old_vs_emit(&c);
    }
@@ -130,6 +134,8 @@ static void do_vs_prog( struct brw_context *brw,
 		    &c.prog_data, aux_size,
 		    &brw->vs.prog_offset, &brw->vs.prog_data);
    ralloc_free(mem_ctx);
+
+   return true;
 }
 
 
@@ -174,13 +180,15 @@ static void brw_upload_vs_prog(struct brw_context *brw)
    if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
 			 &key, sizeof(key),
 			 &brw->vs.prog_offset, &brw->vs.prog_data)) {
-      do_vs_prog(brw, vp, &key);
+      bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram,
+				vp, &key);
+
+      assert(success);
    }
    brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
 			   sizeof(*brw->vs.prog_data));
 }
 
-
 /* See brw_vs.c:
  */
 const struct brw_tracked_state brw_vs_prog = {
@@ -193,3 +201,30 @@ const struct brw_tracked_state brw_vs_prog = {
    },
    .prepare = brw_upload_vs_prog
 };
+
+bool
+brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_vs_prog_key key;
+   struct gl_vertex_program *vp = prog->VertexProgram;
+   struct brw_vertex_program *bvp = brw_vertex_program(vp);
+   uint32_t old_prog_offset = brw->vs.prog_offset;
+   struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
+   bool success;
+
+   if (!vp)
+      return true;
+
+   memset(&key, 0, sizeof(key));
+
+   key.program_string_id = bvp->id;
+   key.clamp_vertex_color = true;
+
+   success = do_vs_prog(brw, prog, bvp, &key);
+
+   brw->vs.prog_offset = old_prog_offset;
+   brw->vs.prog_data = old_prog_data;
+
+   return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 83a37f5b800..beccb381ee2 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -93,7 +93,8 @@ struct brw_vs_compile {
    GLboolean needs_stack;
 };
 
-bool brw_vs_emit(struct brw_vs_compile *c);
+bool brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c);
 void brw_old_vs_emit(struct brw_vs_compile *c);
+bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
 
 #endif
-- 
cgit v1.2.3


From 7642c1de6b65b7dfd9e39904291cc9737cd54b56 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 11 Aug 2011 08:09:10 -0700
Subject: i965/vs: Avoid generating a MOV for most ir_assignment handling.

Removes an average of 11.5% of instructions in 54% of vertex shaders
in shader-db.
---
 src/mesa/drivers/dri/i965/brw_vec4.h           |  6 +++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 67 ++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 082021513d2..620b05570a6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -388,6 +388,12 @@ public:
    vec4_instruction *emit(enum opcode opcode, dst_reg dst,
 			  src_reg src0, src_reg src1, src_reg src2);
 
+   bool try_rewrite_rhs_to_dst(ir_assignment *ir,
+			       dst_reg dst,
+			       src_reg src,
+			       vec4_instruction *pre_rhs_inst,
+			       vec4_instruction *last_rhs_inst);
+
    /** Walks an exec_list of ir_instruction and sends it through this visitor. */
    void visit_instructions(const exec_list *list);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index b1792a8ee16..ae733810757 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1342,6 +1342,63 @@ vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
    src->reg_offset++;
 }
 
+
+/* If the RHS processing resulted in an instruction generating a
+ * temporary value, and it would be easy to rewrite the instruction to
+ * generate its result right into the LHS instead, do so.  This ends
+ * up reliably removing instructions where it can be tricky to do so
+ * later without real UD chain information.
+ */
+bool
+vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
+				     dst_reg dst,
+				     src_reg src,
+				     vec4_instruction *pre_rhs_inst,
+				     vec4_instruction *last_rhs_inst)
+{
+   /* This could be supported, but it would take more smarts. */
+   if (ir->condition)
+      return false;
+
+   if (pre_rhs_inst == last_rhs_inst)
+      return false; /* No instructions generated to work with. */
+
+   /* Make sure the last instruction generated our source reg. */
+   if (src.file != GRF ||
+       src.file != last_rhs_inst->dst.file ||
+       src.reg != last_rhs_inst->dst.reg ||
+       src.reg_offset != last_rhs_inst->dst.reg_offset ||
+       src.reladdr ||
+       src.abs ||
+       src.negate ||
+       last_rhs_inst->predicate != BRW_PREDICATE_NONE)
+      return false;
+
+   /* Check that that last instruction fully initialized the channels
+    * we want to use, in the order we want to use them.  We could
+    * potentially reswizzle the operands of many instructions so that
+    * we could handle out of order channels, but don't yet.
+    */
+   for (int i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i)) {
+	 if (!(last_rhs_inst->dst.writemask & (1 << i)))
+	    return false;
+
+	 if (BRW_GET_SWZ(src.swizzle, i) != i)
+	    return false;
+      }
+   }
+
+   /* Success!  Rewrite the instruction. */
+   last_rhs_inst->dst.file = dst.file;
+   last_rhs_inst->dst.reg = dst.reg;
+   last_rhs_inst->dst.reg_offset = dst.reg_offset;
+   last_rhs_inst->dst.reladdr = dst.reladdr;
+   last_rhs_inst->dst.writemask &= dst.writemask;
+
+   return true;
+}
+
 void
 vec4_visitor::visit(ir_assignment *ir)
 {
@@ -1363,7 +1420,13 @@ vec4_visitor::visit(ir_assignment *ir)
    /* Now we're down to just a scalar/vector with writemasks. */
    int i;
 
+   vec4_instruction *pre_rhs_inst, *last_rhs_inst;
+   pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+
    ir->rhs->accept(this);
+
+   last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+
    src_reg src = this->result;
 
    int swizzles[4];
@@ -1396,6 +1459,10 @@ vec4_visitor::visit(ir_assignment *ir)
    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
 			      swizzles[2], swizzles[3]);
 
+   if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
+      return;
+   }
+
    if (ir->condition) {
       emit_bool_to_cond_code(ir->condition);
    }
-- 
cgit v1.2.3


From 54e66a0a6327b55f15a7c641ec68da505ff19a35 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 11 Aug 2011 16:27:41 -0700
Subject: i965/vs: Fix abs/negate handling on attributes.

Fixes glsl-vs-neg-attribute and glsl-vs-abs-attribute.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 9ef6ab6de90..6b0ae42e0e0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -65,9 +65,16 @@ vec4_visitor::setup_attributes(int payload_reg)
 	    continue;
 
 	 int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset];
+
+	 struct brw_reg reg = brw_vec8_grf(grf, 0);
+	 reg.dw1.bits.swizzle = inst->src[i].swizzle;
+	 if (inst->src[i].abs)
+	    reg = brw_abs(reg);
+	 if (inst->src[i].negate)
+	    reg = negate(reg);
+
 	 inst->src[i].file = HW_REG;
-	 inst->src[i].fixed_hw_reg = brw_vec8_grf(grf, 0);
-	 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
+	 inst->src[i].fixed_hw_reg = reg;
       }
    }
 
-- 
cgit v1.2.3


From 905f3d03090c7b86e410959c5640054f5f6894ef Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 12 Aug 2011 05:15:50 -0700
Subject: i965/vs: Remove remaining use of foreach_iter.

---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    | 10 +++-------
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  4 ++--
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 6b0ae42e0e0..fca31b6dec9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -57,8 +57,8 @@ vec4_visitor::setup_attributes(int payload_reg)
       }
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      vec4_instruction *inst = (vec4_instruction *)iter.get();
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
 
       for (int i = 0; i < 3; i++) {
 	 if (inst->src[i].file != ATTR)
@@ -546,11 +546,7 @@ vec4_visitor::run()
    /* Generate VS IR for main().  (the visitor only descends into
     * functions called "main").
     */
-   foreach_iter(exec_list_iterator, iter, *shader->ir) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
-      base_ir = ir;
-      ir->accept(this);
-   }
+   visit_instructions(shader->ir);
 
    emit_urb_writes();
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ae733810757..fc75cc35172 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -234,8 +234,8 @@ vec4_visitor::emit_math(enum opcode opcode,
 void
 vec4_visitor::visit_instructions(const exec_list *list)
 {
-   foreach_iter(exec_list_iterator, iter, *list) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
+   foreach_list(node, list) {
+      ir_instruction *ir = (ir_instruction *)node;
 
       base_ir = ir;
       ir->accept(this);
-- 
cgit v1.2.3


From d0c595ac8032aa9aed402a513870b8dc92e42903 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 12 Aug 2011 05:28:53 -0700
Subject: i965/gen6: Force WHILE exec size to 8.

We can't just look at the instruction that happens to appear at the
start of the loop, because it might be some other exec size and cause
us to only loop on the first N channels.  We always want 8 in our
current code (since 16 doesn't work so we don't do 16-wide fragment in
that case).

Fixes loop-03.vert, which was triggering the assertions.
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index f5cc09dd49b..27e81306e9c 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1341,8 +1341,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
       brw_set_src1(p, insn, brw_imm_ud(0));
       insn->bits3.break_cont.jip = br * (do_insn - insn);
 
-      insn->header.execution_size = do_insn->header.execution_size;
-      assert(insn->header.execution_size == BRW_EXECUTE_8);
+      insn->header.execution_size = BRW_EXECUTE_8;
    } else if (intel->gen == 6) {
       insn = next_insn(p, BRW_OPCODE_WHILE);
 
@@ -1351,8 +1350,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
 
-      insn->header.execution_size = do_insn->header.execution_size;
-      assert(insn->header.execution_size == BRW_EXECUTE_8);
+      insn->header.execution_size = BRW_EXECUTE_8;
    } else {
       if (p->single_program_flow) {
 	 insn = next_insn(p, BRW_OPCODE_ADD);
-- 
cgit v1.2.3


From 8a649277cb57cc13fb38f8e8daf07e8a2b96223c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 12 Aug 2011 05:32:25 -0700
Subject: i965/vs: Don't assertion fail on vertex texturing.

The linker will reject the program, but we need to survive until then.
Fixes abort in glsl1-2D Texture lookup with explicit lod (Vertex
shader)
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index fc75cc35172..d03fbff27fc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1554,7 +1554,12 @@ vec4_visitor::visit(ir_call *ir)
 void
 vec4_visitor::visit(ir_texture *ir)
 {
-   assert(!"not reached");
+   /* FINISHME: Implement vertex texturing.
+    *
+    * With 0 vertex samplers available, the linker will reject
+    * programs that do vertex texturing, but after our visitor has
+    * run.
+    */
 }
 
 void
-- 
cgit v1.2.3


From feff7c62ce446f4e3bb755a2f40dcbd0e70155e4 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 20:13:53 -0700
Subject: i965/vs: Fix condition code for scalar expression all_equals.

Fixes vs-op-eq-bool-bool.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index d03fbff27fc..3ae89dfbc45 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -985,7 +985,7 @@ vec4_visitor::visit(ir_expression *ir)
 	    temp.type = op[0].type;
 
 	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
-	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
 	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
       }
       break;
-- 
cgit v1.2.3


From e9a86ae3370948acb1276e80fbbc421d7025db36 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 20:43:42 -0700
Subject: i965/vs: Fix memory leak of ralloc context for the visitor.

---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3ae89dfbc45..185a01e05f9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2104,6 +2104,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
 
 vec4_visitor::~vec4_visitor()
 {
+   ralloc_free(this->mem_ctx);
    hash_table_dtor(this->variable_ht);
 }
 
-- 
cgit v1.2.3


From 7bf70c29adf175f51d0347d0187aecc0e9bbbcb8 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 20:59:24 -0700
Subject: i965/vs: Add support for conversion of FIXED_HW_REG src_reg to/from
 dst_reg.

This was quietly occurring in some emit code I produced, and failed.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 185a01e05f9..621cb53ff84 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -38,6 +38,7 @@ src_reg::src_reg(dst_reg reg)
    this->reg_offset = reg.reg_offset;
    this->type = reg.type;
    this->reladdr = reg.reladdr;
+   this->fixed_hw_reg = reg.fixed_hw_reg;
 
    int swizzles[4];
    int next_chan = 0;
@@ -68,6 +69,7 @@ dst_reg::dst_reg(src_reg reg)
    this->type = reg.type;
    this->writemask = WRITEMASK_XYZW;
    this->reladdr = reg.reladdr;
+   this->fixed_hw_reg = reg.fixed_hw_reg;
 }
 
 vec4_instruction *
-- 
cgit v1.2.3


From 0ddf0f1c3451eef8a7c7f46afca623dc4f7c5af6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 21:02:10 -0700
Subject: i965/vs: Fix multiplies to actually do 32-bit multiplies.

Fixes vs-op-mult-int-int and friends.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    |  5 +++++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 18 +++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index fca31b6dec9..011af6f2d3e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -633,6 +633,11 @@ vec4_visitor::generate_code()
       case BRW_OPCODE_MUL:
 	 brw_MUL(p, dst, src[0], src[1]);
 	 break;
+      case BRW_OPCODE_MACH:
+	 brw_set_acc_write_control(p, 1);
+	 brw_MACH(p, dst, src[0], src[1]);
+	 brw_set_acc_write_control(p, 0);
+	 break;
 
       case BRW_OPCODE_FRC:
 	 brw_FRC(p, dst, src[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 621cb53ff84..a60fc5f6ada 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -945,7 +945,23 @@ vec4_visitor::visit(ir_expression *ir)
       break;
 
    case ir_binop_mul:
-      emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+      if (ir->type->is_integer()) {
+	 /* For integer multiplication, the MUL uses the low 16 bits
+	  * of one of the operands (src0 on gen6, src1 on gen7).  The
+	  * MACH accumulates in the contribution of the upper 16 bits
+	  * of that operand.
+	  *
+	  * FINISHME: Emit just the MUL if we know an operand is small
+	  * enough.
+	  */
+	 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+	 emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
+	 emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]);
+	 emit(BRW_OPCODE_MOV, result_dst, src_reg(acc));
+      } else {
+	 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+      }
       break;
    case ir_binop_div:
       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
-- 
cgit v1.2.3


From eb0ff1a1c0f1978d867c748bf2525f717a56bfce Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Mon, 15 Aug 2011 10:58:25 -0700
Subject: mesa: Remove use of fpu_control.h

Remove the inclusion of fpu_control.h from compiler.h.  Since Bionic lacks
fpu_control.h, this fixes the Android build.

Also remove the sole use of the fpu_control bits, which was in debug.c.
Those were brianp's debug bits, and he approved of their removal.

Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/main/compiler.h |  3 ---
 src/mesa/main/debug.c    | 11 -----------
 2 files changed, 14 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index ee7d0b2f880..8ed1c6fa61f 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -45,9 +45,6 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
-#if defined(__linux__) && defined(__i386__)
-#include <fpu_control.h>
-#endif
 #include <float.h>
 #include <stdarg.h>
 
diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c
index e7f6be99481..b1fc096f296 100644
--- a/src/mesa/main/debug.c
+++ b/src/mesa/main/debug.c
@@ -192,17 +192,6 @@ static void add_debug_flags( const char *debug )
    if (strstr(debug, "flush"))
       MESA_DEBUG_FLAGS |= DEBUG_ALWAYS_FLUSH;
 
-#if defined(_FPU_GETCW) && defined(_FPU_SETCW)
-   if (strstr(debug, "fpexceptions")) {
-      /* raise FP exceptions */
-      fpu_control_t mask;
-      _FPU_GETCW(mask);
-      mask &= ~(_FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM
-                | _FPU_MASK_OM | _FPU_MASK_UM);
-      _FPU_SETCW(mask);
-   }
-#endif
-
 #else
    (void) debug;
 #endif
-- 
cgit v1.2.3


From bd064a49f119d126623c0e85702801e4cee62187 Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Mon, 15 Aug 2011 13:26:21 -0700
Subject: mesa: Fix Android build by #ifdef'ing out locale support

Bionic does not support locales. This commit #ifdef's out the locale usage
in _mesa_strtof().

Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/main/imports.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 0a572ec225d..8f097195922 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -753,7 +753,8 @@ _mesa_strdup( const char *s )
 float
 _mesa_strtof( const char *s, char **end )
 {
-#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__)
+#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \
+    !defined(ANDROID)
    static locale_t loc = NULL;
    if (!loc) {
       loc = newlocale(LC_CTYPE_MASK, "C", NULL);
-- 
cgit v1.2.3


From 3c9f172fe801a8e954a40affc38942b628b81bda Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Mon, 15 Aug 2011 13:29:15 -0700
Subject: mesa: Add Android to list of platforms that define fpclassify()

This is a fix for the Android build.

Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/main/querymatrix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c
index 944ad435f7a..eaedf7cd238 100644
--- a/src/mesa/main/querymatrix.c
+++ b/src/mesa/main/querymatrix.c
@@ -73,7 +73,7 @@ fpclassify(double x)
 #elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \
      defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \
      (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__) || \
-     (defined(__sun) && defined(__GNUC__))
+     (defined(__sun) && defined(__GNUC__)) || defined(ANDROID)
 
 /* fpclassify is available. */
 
-- 
cgit v1.2.3


From 6ad08989d7c10892919ce1cb9c88c4cf8b73e1dc Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sat, 30 Jul 2011 10:48:10 -0700
Subject: ir_to_mesa: Implement ir_unop_logic_not using 1-x

Since our logic values are 0.0 (false) and 1.0 (true), 1.0 - x
accurately implements logical not.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 1ef609fe15d..f03ea7a95e0 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1135,7 +1135,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 
    switch (ir->operation) {
    case ir_unop_logic_not:
-      emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0));
+      /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
+       * older GPUs implement SEQ using multiple instructions (i915 uses two
+       * SGE instructions and a MUL instruction).  Since our logic values are
+       * 0.0 and 1.0, 1-x also implements !x.
+       */
+      op[0].negate = ~op[0].negate;
+      emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
       break;
    case ir_unop_neg:
       op[0].negate = ~op[0].negate;
-- 
cgit v1.2.3


From 41f8ffe5e07c4f389eb13d17ecf0ff776890e9bc Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sat, 30 Jul 2011 10:49:49 -0700
Subject: ir_to_mesa: Implement ir_binop_logic_or using an add w/saturate or
 add w/SLT

Logical-or is implemented using addition (followed by clampling to
[0,1]) on values of 0.0 and 1.0.  Replacing the logical-or operators
with addition gives a + b which has a result on the range [0, 2].

Previously a SNE instruction was used to clamp the resulting logic
value to [0,1].  In a fragment shader, using a saturate on the add has
the same effect.  Adding the saturate to the add is free, so (at
least) one instruction is saved.

In a vertex shader, using an SLT on the negation of the add result has
the same effect.  Many older shader architectures do not support the
SNE instruction.  It must be emulated using two SLT instructions and
an ADD.  On these architectures, the single SLT saves two
instructions.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index f03ea7a95e0..fcd14c89cd7 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1267,11 +1267,28 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
       break;
 
-   case ir_binop_logic_or:
-      /* This could be a saturated add and skip the SNE. */
-      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
-      emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+   case ir_binop_logic_or: {
+      /* After the addition, the value will be an integer on the
+       * range [0,2].  Zero stays zero, and positive values become 1.0.
+       */
+      ir_to_mesa_instruction *add =
+	 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	 /* The clamping to [0,1] can be done for free in the fragment
+	  * shader with a saturate.
+	  */
+	 add->saturate = true;
+      } else {
+	 /* Negating the result of the addition gives values on the range
+	  * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
+	  * is achieved using SLT.
+	  */
+	 src_reg slt_src = result_src;
+	 slt_src.negate = ~slt_src.negate;
+	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+      }
       break;
+   }
 
    case ir_binop_logic_and:
       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
-- 
cgit v1.2.3


From 7f4c65256cc3f4d9f6a214424beabe688a5dd6a2 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sat, 30 Jul 2011 10:45:35 -0700
Subject: ir_to_mesa: Make ir_to_mesa_visitor::emit_dp return the instruction

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index fcd14c89cd7..60d498bd9e3 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -297,11 +297,11 @@ public:
    /**
     * Emit the correct dot-product instruction for the type of arguments
     */
-   void emit_dp(ir_instruction *ir,
-	        dst_reg dst,
-	        src_reg src0,
-	        src_reg src1,
-	        unsigned elements);
+   ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
+				    dst_reg dst,
+				    src_reg src0,
+				    src_reg src1,
+				    unsigned elements);
 
    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 		    dst_reg dst, src_reg src0);
@@ -408,7 +408,7 @@ ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 }
 
-void
+ir_to_mesa_instruction *
 ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
 			    dst_reg dst, src_reg src0, src_reg src1,
 			    unsigned elements)
@@ -417,7 +417,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
       OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
    };
 
-   emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 }
 
 /**
-- 
cgit v1.2.3


From 92ca560d68e8a6b532998707afcf4f60c0ce2806 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 3 Aug 2011 15:27:43 -0700
Subject: ir_to_mesa: Implement ir_unop_any using DP4 w/saturate or DP4 w/SLT

This is just like the ir_binop_logic_or case.  The operation
ir_unop_any is (a.x || a.y || a.z || a.w).  Logical-or is implemented
using addition (followed by clampling to [0,1]) on values of 0.0 and
1.0.  Replacing the logical-or operators with addition gives (a.x +
a.y + a.z + a.w).  This can be implemented using a dot-product with a
vector of all 1.0.

Previously a SNE instruction was used to clamp the resulting logic
value to [0,1].  In a fragment shader, using a saturate on the
dot-product has the same effect.  Adding the saturate to the
dot-product is free, so (at least) one instruction is saved.

In a vertex shader, using an SLT on the negation of the dot-product
result has the same effect.  Many older shader architectures do not
support the SNE instruction.  It must be emulated using two SLT
instructions and an ADD.  On these architectures, the single SLT saves
two instructions.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 60d498bd9e3..1bd9a2eee1b 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1256,12 +1256,31 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       }
       break;
 
-   case ir_unop_any:
+   case ir_unop_any: {
       assert(ir->operands[0]->type->is_vector());
-      emit_dp(ir, result_dst, op[0], op[0],
-	      ir->operands[0]->type->vector_elements);
-      emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+      /* After the dot-product, the value will be an integer on the
+       * range [0,4].  Zero stays zero, and positive values become 1.0.
+       */
+      ir_to_mesa_instruction *const dp =
+	 emit_dp(ir, result_dst, op[0], op[0],
+		 ir->operands[0]->type->vector_elements);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	 /* The clamping to [0,1] can be done for free in the fragment
+	  * shader with a saturate.
+	  */
+	 dp->saturate = true;
+      } else {
+	 /* Negating the result of the dot-product gives values on the range
+	  * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+	  * is achieved using SLT.
+	  */
+	 src_reg slt_src = result_src;
+	 slt_src.negate = ~slt_src.negate;
+	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+      }
       break;
+   }
 
    case ir_binop_logic_xor:
       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
-- 
cgit v1.2.3


From e7bf096e8b04931996c8c56548ce0b2c0af3a0dc Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 3 Aug 2011 15:35:01 -0700
Subject: ir_to_mesa: Implement ir_binop_any_nequal using DP4 w/saturate or DP4
 w/SLT

The operation ir_binop_any_nequal is (a.x != b.x) || (a.y != b.y) ||
(a.z != b.z) || (a.w != b.w), and that is the same as any(bvec4(a.x !=
b.x, a.y != b.y, a.z != b.z, a.w != b.w)).  Implement the any() part
the same way the regular ir_unop_any is implemented.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 1bd9a2eee1b..1c674ea8756 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1249,8 +1249,26 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 	  ir->operands[1]->type->is_vector()) {
 	 src_reg temp = get_temp(glsl_type::vec4_type);
 	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
-	 emit_dp(ir, result_dst, temp, temp, vector_elements);
-	 emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+	 /* After the dot-product, the value will be an integer on the
+	  * range [0,4].  Zero stays zero, and positive values become 1.0.
+	  */
+	 ir_to_mesa_instruction *const dp =
+	    emit_dp(ir, result_dst, temp, temp, vector_elements);
+	 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	    /* The clamping to [0,1] can be done for free in the fragment
+	     * shader with a saturate.
+	     */
+	    dp->saturate = true;
+	 } else {
+	    /* Negating the result of the dot-product gives values on the range
+	     * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+	     * achieved using SLT.
+	     */
+	    src_reg slt_src = result_src;
+	    slt_src.negate = ~slt_src.negate;
+	    emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+	 }
       } else {
 	 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
       }
-- 
cgit v1.2.3


From ba01df11c4d09c65514a8522cb319e29034ab5a8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 3 Aug 2011 15:42:05 -0700
Subject: ir_to_mesa: Implement ir_binop_all_equal using DP4 w/SGE

The operation ir_binop_all_equal is !(a.x != b.x || a.y != b.y || a.z
!= b.z || a.w != b.w).  Logical-or is implemented using addition
(followed by clampling to [0,1]) on values of 0.0 and 1.0.  Replacing
the logical-or operators with addition gives !bool((int(a.x != b.x) +
int(a.y == b.y) + int(a.z == b.z) + int(a.w == b.w)).  This can be
implemented using a dot-product with a vector of all 1.0.  After the
dot-product, the value will be an integer on the range [0,4].

Previously a SEQ instruction was used to clamp the resulting logic
value to [0,1] and invert the result.  Using an SGE instruction on the
negation of the dot-product result has the same effect.  Many older
shader architectures do not support the SEQ instruction.  It must be
emulated using two SGE instructions and a MUL.  On these
architectures, the single SGE saves two instructions.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 1c674ea8756..4c8b097de6b 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1237,8 +1237,19 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 	  ir->operands[1]->type->is_vector()) {
 	 src_reg temp = get_temp(glsl_type::vec4_type);
 	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
+
+	 /* After the dot-product, the value will be an integer on the
+	  * range [0,4].  Zero becomes 1.0, and positive values become zero.
+	  */
 	 emit_dp(ir, result_dst, temp, temp, vector_elements);
-	 emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0));
+
+	 /* Negating the result of the dot-product gives values on the range
+	  * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This
+	  * achieved using SGE.
+	  */
+	 src_reg sge_src = result_src;
+	 sge_src.negate = ~sge_src.negate;
+	 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
       } else {
 	 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
       }
-- 
cgit v1.2.3


From ff2cfb8989cd79218dfe2cd8c3de20f1ca7418e6 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 2 Aug 2011 12:17:20 -0700
Subject: ir_to_mesa: Emit a MAD(b, -a, b) for !a && b

!a && b occurs frequently when nexted if-statements have been
flattened.  It should also be possible use a MAD for (a && b) || c,
though that would require a MAD_SAT.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 52 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 4c8b097de6b..b1211c1145c 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -314,6 +314,8 @@ public:
 
    GLboolean try_emit_mad(ir_expression *ir,
 			  int mul_operand);
+   bool try_emit_mad_for_and_not(ir_expression *ir,
+				 int mul_operand);
    GLboolean try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
@@ -892,6 +894,46 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    return true;
 }
 
+/**
+ * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ *     - a * !b
+ *     - a * (1 - b)
+ *     - (a * 1) - (a * b)
+ *     - a + -(a * b)
+ *     - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+   const int other_operand = 1 - try_operand;
+   src_reg a, b;
+
+   ir_expression *expr = ir->operands[try_operand]->as_expression();
+   if (!expr || expr->operation != ir_unop_logic_not)
+      return false;
+
+   ir->operands[other_operand]->accept(this);
+   a = this->result;
+   expr->operands[0]->accept(this);
+   b = this->result;
+
+   b.negate = ~b.negate;
+
+   this->result = get_temp(ir->type);
+   emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
+
+   return true;
+}
+
 GLboolean
 ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
 {
@@ -1088,6 +1130,16 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       if (try_emit_mad(ir, 0))
 	 return;
    }
+
+   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+    */
+   if (ir->operation == ir_binop_logic_and) {
+      if (try_emit_mad_for_and_not(ir, 1))
+	 return;
+      if (try_emit_mad_for_and_not(ir, 0))
+	 return;
+   }
+
    if (try_emit_sat(ir))
       return;
 
-- 
cgit v1.2.3


From 54c48a95e6e0573886433f94ac83293876ffe03d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 10 Feb 2011 15:48:27 -0800
Subject: mesa: Add partial constant propagation pass for Mesa IR

This cleans up some code generated by the IR-to-Mesa pass for i915.
In particular, some shaders involving arrays of constant matrices
result in really bad code.

v2: Silence several warnings from merging the gl_constant_value work.
Fix DP[23] folding.  Add support for a bunch more opcodes that appear
in piglit runs on i915.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/SConscript                       |   1 +
 src/mesa/program/prog_opt_constant_fold.c | 451 ++++++++++++++++++++++++++++++
 src/mesa/program/prog_optimize.c          |   2 +
 src/mesa/program/prog_optimize.h          |   3 +
 src/mesa/sources.mak                      |   1 +
 5 files changed, 458 insertions(+)
 create mode 100644 src/mesa/program/prog_opt_constant_fold.c

(limited to 'src/mesa')

diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 05aa0e8010e..b0c3334fa48 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -293,6 +293,7 @@ program_sources = [
     'program/prog_instruction.c',
     'program/prog_noise.c',
     'program/prog_optimize.c',
+    'program/prog_opt_constant_fold.c',
     'program/prog_parameter.c',
     'program/prog_parameter_layout.c',
     'program/prog_print.c',
diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c
new file mode 100644
index 00000000000..e2418b55451
--- /dev/null
+++ b/src/mesa/program/prog_opt_constant_fold.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "program.h"
+#include "prog_instruction.h"
+#include "prog_optimize.h"
+#include "prog_parameter.h"
+#include <stdbool.h>
+
+static bool
+src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
+{
+   unsigned i;
+
+   for (i = 0; i < num_srcs; i++) {
+      if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
+	 return false;
+   }
+
+   return true;
+}
+
+static struct prog_src_register
+src_reg_for_float(struct gl_program *prog, float val)
+{
+   struct prog_src_register src;
+   unsigned swiz;
+
+   memset(&src, 0, sizeof(src));
+
+   src.File = PROGRAM_CONSTANT;
+   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+					  (gl_constant_value *) &val, 1, &swiz);
+   src.Swizzle = swiz;
+   return src;
+}
+
+static struct prog_src_register
+src_reg_for_vec4(struct gl_program *prog, const float *val)
+{
+   struct prog_src_register src;
+   unsigned swiz;
+
+   memset(&src, 0, sizeof(src));
+
+   src.File = PROGRAM_CONSTANT;
+   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+					  (gl_constant_value *) val, 4, &swiz);
+   src.Swizzle = swiz;
+   return src;
+}
+
+static bool
+src_regs_are_same(const struct prog_src_register *a,
+		  const struct prog_src_register *b)
+{
+   return (a->File == b->File)
+      && (a->Index == b->Index)
+      && (a->Swizzle == b->Swizzle)
+      && (a->Abs == b->Abs)
+      && (a->Negate == b->Negate)
+      && (a->RelAddr == 0)
+      && (b->RelAddr == 0);
+}
+
+static void
+get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
+{
+   const gl_constant_value *const value =
+      prog->Parameters->ParameterValues[r->Index];
+
+   data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
+   data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
+   data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
+   data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
+
+   if (r->Abs) {
+      data[0] = fabsf(data[0]);
+      data[1] = fabsf(data[1]);
+      data[2] = fabsf(data[2]);
+      data[3] = fabsf(data[3]);
+   }
+
+   if (r->Negate & 0x01) {
+      data[0] = -data[0];
+   }
+
+   if (r->Negate & 0x02) {
+      data[1] = -data[1];
+   }
+
+   if (r->Negate & 0x04) {
+      data[2] = -data[2];
+   }
+
+   if (r->Negate & 0x08) {
+      data[3] = -data[3];
+   }
+}
+
+/**
+ * Try to replace instructions that produce a constant result with simple moves
+ *
+ * The hope is that a following copy propagation pass will eliminate the
+ * unnecessary move instructions.
+ */
+GLboolean
+_mesa_constant_fold(struct gl_program *prog)
+{
+   bool progress = false;
+   unsigned i;
+
+   for (i = 0; i < prog->NumInstructions; i++) {
+      struct prog_instruction *const inst = &prog->Instructions[i];
+
+      switch (inst->Opcode) {
+      case OPCODE_ADD:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = a[0] + b[0];
+	    result[1] = a[1] + b[1];
+	    result[2] = a[2] + b[2];
+	    result[3] = a[3] + b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_CMP:
+	 /* FINISHME: We could also optimize CMP instructions where the first
+	  * FINISHME: source is a constant that is either all < 0.0 or all
+	  * FINISHME: >= 0.0.
+	  */
+	 if (src_regs_are_constant(inst, 3)) {
+	    float a[4];
+	    float b[4];
+	    float c[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+	    get_value(prog, &inst->SrcReg[2], c);
+
+            result[0] = a[0] < 0.0f ? b[0] : c[0];
+            result[1] = a[1] < 0.0f ? b[1] : c[1];
+            result[2] = a[2] < 0.0f ? b[2] : c[2];
+            result[3] = a[3] < 0.0f ? b[3] : c[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+	    inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_DP2:
+      case OPCODE_DP3:
+      case OPCODE_DP4:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result;
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    /* It seems like a loop could be used here, but we cleverly put
+	     * DP2A between DP2 and DP3.  Subtracting DP2 (or similar) from
+	     * the opcode results in various failures of the loop control.
+	     */
+	    result = (a[0] * b[0]) + (a[1] * b[1]);
+
+	    if (inst->Opcode >= OPCODE_DP3)
+	       result += a[2] * b[2];
+
+	    if (inst->Opcode == OPCODE_DP4)
+	       result += a[3] * b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_MUL:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = a[0] * b[0];
+	    result[1] = a[1] * b[1];
+	    result[2] = a[2] * b[2];
+	    result[3] = a[3] * b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SEQ:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SGE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SGT:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SLE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SLT:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SNE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      default:
+	 break;
+      }
+   }
+
+   return progress;
+}
diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index 3340ce0498b..25d9684b137 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -1358,6 +1358,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
          any_change = GL_TRUE;
       if (_mesa_remove_dead_code_local(program))
          any_change = GL_TRUE;
+
+      any_change = _mesa_constant_fold(program) || any_change;
       _mesa_reallocate_registers(program);
    } while (any_change);
 }
diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h
index 463f5fc51c4..9854fb7a491 100644
--- a/src/mesa/program/prog_optimize.h
+++ b/src/mesa/program/prog_optimize.h
@@ -44,4 +44,7 @@ _mesa_find_temp_intervals(const struct prog_instruction *instructions,
 extern void
 _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program);
 
+extern GLboolean
+_mesa_constant_fold(struct gl_program *prog);
+
 #endif
diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index ed008f8813e..5e77e0f5919 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -251,6 +251,7 @@ PROGRAM_SOURCES = \
 	program/prog_instruction.c \
 	program/prog_noise.c \
 	program/prog_optimize.c \
+	program/prog_opt_constant_fold.c \
 	program/prog_parameter.c \
 	program/prog_parameter_layout.c \
 	program/prog_print.c \
-- 
cgit v1.2.3


From 7125f1e87df359be4aad1d801b633146eeac7292 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 3 Aug 2011 17:12:29 -0700
Subject: mesa: Bump instruction execution limit to 65536

Shader Model 3.0[1] requires that shaders be able to execute at least
65536 instructions.  Bump Mesa maxExec to that limit.  This allows
several vertex shaders in the OpenGL ES 2.0 conformance test suite to
run to completion.

1: http://en.wikipedia.org/wiki/High_Level_Shader_Language

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/prog_execute.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index dbfd1b91875..c70a1e344e5 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -639,7 +639,7 @@ _mesa_execute_program(struct gl_context * ctx,
                       struct gl_program_machine *machine)
 {
    const GLuint numInst = program->NumInstructions;
-   const GLuint maxExec = 10000;
+   const GLuint maxExec = 65536;
    GLuint pc, numExec = 0;
 
    machine->CurProgram = program;
-- 
cgit v1.2.3


From b629d5ba24f76ed6af35455a874d351fde1e5bbe Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Fri, 1 Jul 2011 13:49:18 +0300
Subject: xmlconfig: Make the error message more informative

---
 src/mesa/drivers/dri/common/xmlconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c
index 77967ac2a43..12dd31bb162 100644
--- a/src/mesa/drivers/dri/common/xmlconfig.c
+++ b/src/mesa/drivers/dri/common/xmlconfig.c
@@ -567,7 +567,7 @@ static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) {
     } else
 	defaultVal = attrVal[OA_DEFAULT];
     if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal))
-	XML_FATAL ("illegal default value: %s.", defaultVal);
+	XML_FATAL ("illegal default value for %s: %s.", cache->info[opt].name, defaultVal);
 
     if (attrVal[OA_VALID]) {
 	if (cache->info[opt].type == DRI_BOOL)
-- 
cgit v1.2.3


From 59e56957cce16e5d993974e4b7f339afc9cb949b Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Fri, 1 Jul 2011 13:01:00 +0300
Subject: xmlpool.h: fix a typo

---
 src/mesa/drivers/dri/common/xmlpool.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h
index 587517ea10a..ffea430024d 100644
--- a/src/mesa/drivers/dri/common/xmlpool.h
+++ b/src/mesa/drivers/dri/common/xmlpool.h
@@ -60,7 +60,7 @@
 #define DRI_CONF_OPT_BEGIN(name,type,def) \
 "<option name=\""#name"\" type=\""#type"\" default=\""#def"\">\n"
 
-/** \brief Begin an option definition with qouted default value */
+/** \brief Begin an option definition with quoted default value */
 #define DRI_CONF_OPT_BEGIN_Q(name,type,def) \
 "<option name=\""#name"\" type=\""#type"\" default="#def">\n"
 
-- 
cgit v1.2.3


From 718b894dbb585af52dd24defb2e8c130216e5485 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 16 Aug 2011 16:30:52 -0600
Subject: st/mesa: fix incorrect loop over instruction src regs

The array of src regs is of size 3, not 4.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index aef23e7d207..7b90c812595 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3443,7 +3443,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
          /* Continuing the block, clear any channels from the write array that
           * are read by this instruction.
           */
-         for (int i = 0; i < 4; i++) {
+         for (unsigned i = 0; i < Elements(inst->src); i++) {
             if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
                /* Any temporary might be read, so no dead code elimination 
                 * across this instruction.
-- 
cgit v1.2.3


From 3f78f719732b87e6707f94c187ad6e263c6c2ef0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 22:36:18 -0700
Subject: i965/fs: Fix 32-bit integer multiplication.

The MUL opcode does a 16bit * 32bit multiply, and we need to do the
MACH to get the top 16bit * 32bit added in.

Fixes fs-op-mult-int-*, fs-op-mult-ivec*

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp    |  5 +++++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 18 +++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 7367ccaa7e0..482d250c333 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -630,6 +630,11 @@ fs_visitor::generate_code()
       case BRW_OPCODE_MUL:
 	 brw_MUL(p, dst, src[0], src[1]);
 	 break;
+      case BRW_OPCODE_MACH:
+	 brw_set_acc_write_control(p, 1);
+	 brw_MACH(p, dst, src[0], src[1]);
+	 brw_set_acc_write_control(p, 0);
+	 break;
 
       case BRW_OPCODE_FRC:
 	 brw_FRC(p, dst, src[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 8b4f5bbac15..2dc9132cec6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -287,7 +287,23 @@ fs_visitor::visit(ir_expression *ir)
       break;
 
    case ir_binop_mul:
-      emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
+      if (ir->type->is_integer()) {
+	 /* For integer multiplication, the MUL uses the low 16 bits
+	  * of one of the operands (src0 on gen6, src1 on gen7).  The
+	  * MACH accumulates in the contribution of the upper 16 bits
+	  * of that operand.
+	  *
+	  * FINISHME: Emit just the MUL if we know an operand is small
+	  * enough.
+	  */
+	 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+	 emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
+	 emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]);
+	 emit(BRW_OPCODE_MOV, this->result, fs_reg(acc));
+      } else {
+	 emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
+      }
       break;
    case ir_binop_div:
       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
-- 
cgit v1.2.3


From e9ae4cadf5008f25aeae1bf4fa0c9c3397ee8c18 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Tue, 16 Aug 2011 10:23:52 -0700
Subject: glapi: update .gitignore for generated ES dispatch headers

Commit 6eff33dc (glapi: generate ES dispatch headers from core mesa)
replaced the autogenerated files
src/mapi/es1api/main/{dispatch,remap_helper}.h with new autogenerated
files src/mesa/main/api_exec_es{1,2}_{dispatch,remap_helper}.h.  This
patch updates the .gitignore files to properly ignore the new
autogenerated files, and stop ignoring the old autogenerated files.

Reviewed-by: Chia-I Wu <olv@lunarg.com>
---
 src/mapi/es1api/.gitignore | 3 ---
 src/mesa/main/.gitignore   | 4 ++++
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mapi/es1api/.gitignore b/src/mapi/es1api/.gitignore
index b21f1d14c6f..dfe465677c4 100644
--- a/src/mapi/es1api/.gitignore
+++ b/src/mapi/es1api/.gitignore
@@ -1,4 +1 @@
 glapi_mapi_tmp.h
-glapi-stamp
-glapi
-main
diff --git a/src/mesa/main/.gitignore b/src/mesa/main/.gitignore
index 2575f44df4a..d0744e3f0d7 100644
--- a/src/mesa/main/.gitignore
+++ b/src/mesa/main/.gitignore
@@ -4,3 +4,7 @@ get_es1.c
 get_es2.c
 git_sha1.h
 git_sha1.h.tmp
+api_exec_es1_dispatch.h
+api_exec_es1_remap_helper.h
+api_exec_es2_dispatch.h
+api_exec_es2_remap_helper.h
-- 
cgit v1.2.3


From ccecc08f79afc020c8c0acface04a8e53e3a7c32 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Aug 2011 11:51:48 -0600
Subject: mesa: fix incorrect error code in _mesa_FramebufferTexture1D/3DEXT()

The spec says GL_INVALID_OPERATION is generated when texture!=0 and
textarget is not a legal value.  We had this right for the 2D function.
---
 src/mesa/main/fbobject.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 82eb7fb718d..7646f9bdb03 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1985,7 +1985,7 @@ _mesa_FramebufferTexture1DEXT(GLenum target, GLenum attachment,
    GET_CURRENT_CONTEXT(ctx);
 
    if ((texture != 0) && (textarget != GL_TEXTURE_1D)) {
-      _mesa_error(ctx, GL_INVALID_ENUM,
+      _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glFramebufferTexture1DEXT(textarget)");
       return;
    }
@@ -2023,7 +2023,7 @@ _mesa_FramebufferTexture3DEXT(GLenum target, GLenum attachment,
    GET_CURRENT_CONTEXT(ctx);
 
    if ((texture != 0) && (textarget != GL_TEXTURE_3D)) {
-      _mesa_error(ctx, GL_INVALID_ENUM,
+      _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glFramebufferTexture3DEXT(textarget)");
       return;
    }
-- 
cgit v1.2.3


From f23c3ebeccc5c591b79c10cbdb693270ef27a2f5 Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Fri, 12 Aug 2011 16:50:27 -0700
Subject: mesa: Declare _mesa_meta_begin()/end() as public

Declare _mesa_meta_begin()/end() in meta.h so that drivers can write
custom meta-ops (such as HiZ resolves for i965).

This necessitates moving the the META_* macros into meta.h. To prevent
naming collisions, this commit renames each macro to be MESA_META_*.

Reviewed-by: Brian Paul <brianp@vmware.com>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/drivers/common/meta.c | 256 ++++++++++++++++++-----------------------
 src/mesa/drivers/common/meta.h |  33 ++++++
 2 files changed, 147 insertions(+), 142 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index f9b4755988b..e37b78aae78 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -73,64 +73,36 @@
 /** Return offset in bytes of the field within a vertex struct */
 #define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD))
 
-
-/**
- * Flags passed to _mesa_meta_begin().
- */
-/*@{*/
-#define META_ALL              ~0x0
-#define META_ALPHA_TEST        0x1
-#define META_BLEND             0x2  /**< includes logicop */
-#define META_COLOR_MASK        0x4
-#define META_DEPTH_TEST        0x8
-#define META_FOG              0x10
-#define META_PIXEL_STORE      0x20
-#define META_PIXEL_TRANSFER   0x40
-#define META_RASTERIZATION    0x80
-#define META_SCISSOR         0x100
-#define META_SHADER          0x200
-#define META_STENCIL_TEST    0x400
-#define META_TRANSFORM       0x800 /**< modelview/projection matrix state */
-#define META_TEXTURE        0x1000
-#define META_VERTEX         0x2000
-#define META_VIEWPORT       0x4000
-#define META_CLAMP_FRAGMENT_COLOR 0x8000
-#define META_CLAMP_VERTEX_COLOR 0x10000
-#define META_CONDITIONAL_RENDER 0x20000
-#define META_CLIP          0x40000
-/*@}*/
-
-
 /**
  * State which we may save/restore across meta ops.
  * XXX this may be incomplete...
  */
 struct save_state
 {
-   GLbitfield SavedState;  /**< bitmask of META_* flags */
+   GLbitfield SavedState;  /**< bitmask of MESA_META_* flags */
 
-   /** META_ALPHA_TEST */
+   /** MESA_META_ALPHA_TEST */
    GLboolean AlphaEnabled;
    GLenum AlphaFunc;
    GLclampf AlphaRef;
 
-   /** META_BLEND */
+   /** MESA_META_BLEND */
    GLbitfield BlendEnabled;
    GLboolean ColorLogicOpEnabled;
 
-   /** META_COLOR_MASK */
+   /** MESA_META_COLOR_MASK */
    GLubyte ColorMask[MAX_DRAW_BUFFERS][4];
 
-   /** META_DEPTH_TEST */
+   /** MESA_META_DEPTH_TEST */
    struct gl_depthbuffer_attrib Depth;
 
-   /** META_FOG */
+   /** MESA_META_FOG */
    GLboolean Fog;
 
-   /** META_PIXEL_STORE */
+   /** MESA_META_PIXEL_STORE */
    struct gl_pixelstore_attrib Pack, Unpack;
 
-   /** META_PIXEL_TRANSFER */
+   /** MESA_META_PIXEL_TRANSFER */
    GLfloat RedBias, RedScale;
    GLfloat GreenBias, GreenScale;
    GLfloat BlueBias, BlueScale;
@@ -138,17 +110,17 @@ struct save_state
    GLfloat DepthBias, DepthScale;
    GLboolean MapColorFlag;
 
-   /** META_RASTERIZATION */
+   /** MESA_META_RASTERIZATION */
    GLenum FrontPolygonMode, BackPolygonMode;
    GLboolean PolygonOffset;
    GLboolean PolygonSmooth;
    GLboolean PolygonStipple;
    GLboolean PolygonCull;
 
-   /** META_SCISSOR */
+   /** MESA_META_SCISSOR */
    struct gl_scissor_attrib Scissor;
 
-   /** META_SHADER */
+   /** MESA_META_SHADER */
    GLboolean VertexProgramEnabled;
    struct gl_vertex_program *VertexProgram;
    GLboolean FragmentProgramEnabled;
@@ -158,19 +130,19 @@ struct save_state
    struct gl_shader_program *FragmentShader;
    struct gl_shader_program *ActiveShader;
 
-   /** META_STENCIL_TEST */
+   /** MESA_META_STENCIL_TEST */
    struct gl_stencil_attrib Stencil;
 
-   /** META_TRANSFORM */
+   /** MESA_META_TRANSFORM */
    GLenum MatrixMode;
    GLfloat ModelviewMatrix[16];
    GLfloat ProjectionMatrix[16];
    GLfloat TextureMatrix[16];
 
-   /** META_CLIP */
+   /** MESA_META_CLIP */
    GLbitfield ClipPlanesEnabled;
 
-   /** META_TEXTURE */
+   /** MESA_META_TEXTURE */
    GLuint ActiveUnit;
    GLuint ClientActiveUnit;
    /** for unit[0] only */
@@ -180,21 +152,21 @@ struct save_state
    GLbitfield TexGenEnabled[MAX_TEXTURE_UNITS];
    GLuint EnvMode;  /* unit[0] only */
 
-   /** META_VERTEX */
+   /** MESA_META_VERTEX */
    struct gl_array_object *ArrayObj;
    struct gl_buffer_object *ArrayBufferObj;
 
-   /** META_VIEWPORT */
+   /** MESA_META_VIEWPORT */
    GLint ViewportX, ViewportY, ViewportW, ViewportH;
    GLclampd DepthNear, DepthFar;
 
-   /** META_CLAMP_FRAGMENT_COLOR */
+   /** MESA_META_CLAMP_FRAGMENT_COLOR */
    GLenum ClampFragmentColor;
 
-   /** META_CLAMP_VERTEX_COLOR */
+   /** MESA_META_CLAMP_VERTEX_COLOR */
    GLenum ClampVertexColor;
 
-   /** META_CONDITIONAL_RENDER */
+   /** MESA_META_CONDITIONAL_RENDER */
    struct gl_query_object *CondRenderQuery;
    GLenum CondRenderMode;
 
@@ -342,10 +314,10 @@ _mesa_meta_free(struct gl_context *ctx)
  * Enter meta state.  This is like a light-weight version of glPushAttrib
  * but it also resets most GL state back to default values.
  *
- * \param state  bitmask of META_* flags indicating which attribute groups
+ * \param state  bitmask of MESA_META_* flags indicating which attribute groups
  *               to save and reset to their defaults
  */
-static void
+void
 _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
 {
    struct save_state *save;
@@ -357,7 +329,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
    memset(save, 0, sizeof(*save));
    save->SavedState = state;
 
-   if (state & META_ALPHA_TEST) {
+   if (state & MESA_META_ALPHA_TEST) {
       save->AlphaEnabled = ctx->Color.AlphaEnabled;
       save->AlphaFunc = ctx->Color.AlphaFunc;
       save->AlphaRef = ctx->Color.AlphaRef;
@@ -365,7 +337,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
          _mesa_set_enable(ctx, GL_ALPHA_TEST, GL_FALSE);
    }
 
-   if (state & META_BLEND) {
+   if (state & MESA_META_BLEND) {
       save->BlendEnabled = ctx->Color.BlendEnabled;
       if (ctx->Color.BlendEnabled) {
          if (ctx->Extensions.EXT_draw_buffers2) {
@@ -383,7 +355,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
          _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, GL_FALSE);
    }
 
-   if (state & META_COLOR_MASK) {
+   if (state & MESA_META_COLOR_MASK) {
       memcpy(save->ColorMask, ctx->Color.ColorMask,
              sizeof(ctx->Color.ColorMask));
       if (!ctx->Color.ColorMask[0][0] ||
@@ -393,26 +365,26 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
          _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
    }
 
-   if (state & META_DEPTH_TEST) {
+   if (state & MESA_META_DEPTH_TEST) {
       save->Depth = ctx->Depth; /* struct copy */
       if (ctx->Depth.Test)
          _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE);
    }
 
-   if (state & META_FOG) {
+   if (state & MESA_META_FOG) {
       save->Fog = ctx->Fog.Enabled;
       if (ctx->Fog.Enabled)
          _mesa_set_enable(ctx, GL_FOG, GL_FALSE);
    }
 
-   if (state & META_PIXEL_STORE) {
+   if (state & MESA_META_PIXEL_STORE) {
       save->Pack = ctx->Pack;
       save->Unpack = ctx->Unpack;
       ctx->Pack = ctx->DefaultPacking;
       ctx->Unpack = ctx->DefaultPacking;
    }
 
-   if (state & META_PIXEL_TRANSFER) {
+   if (state & MESA_META_PIXEL_TRANSFER) {
       save->RedScale = ctx->Pixel.RedScale;
       save->RedBias = ctx->Pixel.RedBias;
       save->GreenScale = ctx->Pixel.GreenScale;
@@ -435,7 +407,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       ctx->NewState |=_NEW_PIXEL;
    }
 
-   if (state & META_RASTERIZATION) {
+   if (state & MESA_META_RASTERIZATION) {
       save->FrontPolygonMode = ctx->Polygon.FrontMode;
       save->BackPolygonMode = ctx->Polygon.BackMode;
       save->PolygonOffset = ctx->Polygon.OffsetFill;
@@ -449,12 +421,12 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       _mesa_set_enable(ctx, GL_CULL_FACE, GL_FALSE);
    }
 
-   if (state & META_SCISSOR) {
+   if (state & MESA_META_SCISSOR) {
       save->Scissor = ctx->Scissor; /* struct copy */
       _mesa_set_enable(ctx, GL_SCISSOR_TEST, GL_FALSE);
    }
 
-   if (state & META_SHADER) {
+   if (state & MESA_META_SHADER) {
       if (ctx->Extensions.ARB_vertex_program) {
          save->VertexProgramEnabled = ctx->VertexProgram.Enabled;
          _mesa_reference_vertprog(ctx, &save->VertexProgram,
@@ -483,14 +455,14 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       }
    }
 
-   if (state & META_STENCIL_TEST) {
+   if (state & MESA_META_STENCIL_TEST) {
       save->Stencil = ctx->Stencil; /* struct copy */
       if (ctx->Stencil.Enabled)
          _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE);
       /* NOTE: other stencil state not reset */
    }
 
-   if (state & META_TEXTURE) {
+   if (state & MESA_META_TEXTURE) {
       GLuint u, tgt;
 
       save->ActiveUnit = ctx->Texture.CurrentUnit;
@@ -529,7 +501,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
    }
 
-   if (state & META_TRANSFORM) {
+   if (state & MESA_META_TRANSFORM) {
       GLuint activeTexture = ctx->Texture.CurrentUnit;
       memcpy(save->ModelviewMatrix, ctx->ModelviewMatrixStack.Top->m,
              16 * sizeof(GLfloat));
@@ -552,7 +524,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
                   -1.0, 1.0);
    }
 
-   if (state & META_CLIP) {
+   if (state & MESA_META_CLIP) {
       save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled;
       if (ctx->Transform.ClipPlanesEnabled) {
          GLuint i;
@@ -562,7 +534,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       }
    }
 
-   if (state & META_VERTEX) {
+   if (state & MESA_META_VERTEX) {
       /* save vertex array object state */
       _mesa_reference_array_object(ctx, &save->ArrayObj,
                                    ctx->Array.ArrayObj);
@@ -571,7 +543,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       /* set some default state? */
    }
 
-   if (state & META_VIEWPORT) {
+   if (state & MESA_META_VIEWPORT) {
       /* save viewport state */
       save->ViewportX = ctx->Viewport.X;
       save->ViewportY = ctx->Viewport.Y;
@@ -592,7 +564,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       _mesa_DepthRange(0.0, 1.0);
    }
 
-   if (state & META_CLAMP_FRAGMENT_COLOR) {
+   if (state & MESA_META_CLAMP_FRAGMENT_COLOR) {
       save->ClampFragmentColor = ctx->Color.ClampFragmentColor;
 
       /* Generally in here we want to do clamping according to whether
@@ -603,7 +575,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
 	 _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
    }
 
-   if (state & META_CLAMP_VERTEX_COLOR) {
+   if (state & MESA_META_CLAMP_VERTEX_COLOR) {
       save->ClampVertexColor = ctx->Light.ClampVertexColor;
 
       /* Generally in here we never want vertex color clamping --
@@ -612,7 +584,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, GL_FALSE);
    }
 
-   if (state & META_CONDITIONAL_RENDER) {
+   if (state & MESA_META_CONDITIONAL_RENDER) {
       save->CondRenderQuery = ctx->Query.CondRenderQuery;
       save->CondRenderMode = ctx->Query.CondRenderMode;
 
@@ -632,19 +604,19 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
 /**
  * Leave meta state.  This is like a light-weight version of glPopAttrib().
  */
-static void
+void
 _mesa_meta_end(struct gl_context *ctx)
 {
    struct save_state *save = &ctx->Meta->Save[--ctx->Meta->SaveStackDepth];
    const GLbitfield state = save->SavedState;
 
-   if (state & META_ALPHA_TEST) {
+   if (state & MESA_META_ALPHA_TEST) {
       if (ctx->Color.AlphaEnabled != save->AlphaEnabled)
          _mesa_set_enable(ctx, GL_ALPHA_TEST, save->AlphaEnabled);
       _mesa_AlphaFunc(save->AlphaFunc, save->AlphaRef);
    }
 
-   if (state & META_BLEND) {
+   if (state & MESA_META_BLEND) {
       if (ctx->Color.BlendEnabled != save->BlendEnabled) {
          if (ctx->Extensions.EXT_draw_buffers2) {
             GLuint i;
@@ -660,7 +632,7 @@ _mesa_meta_end(struct gl_context *ctx)
          _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, save->ColorLogicOpEnabled);
    }
 
-   if (state & META_COLOR_MASK) {
+   if (state & MESA_META_COLOR_MASK) {
       GLuint i;
       for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
          if (!TEST_EQ_4V(ctx->Color.ColorMask[i], save->ColorMask[i])) {
@@ -679,23 +651,23 @@ _mesa_meta_end(struct gl_context *ctx)
       }
    }
 
-   if (state & META_DEPTH_TEST) {
+   if (state & MESA_META_DEPTH_TEST) {
       if (ctx->Depth.Test != save->Depth.Test)
          _mesa_set_enable(ctx, GL_DEPTH_TEST, save->Depth.Test);
       _mesa_DepthFunc(save->Depth.Func);
       _mesa_DepthMask(save->Depth.Mask);
    }
 
-   if (state & META_FOG) {
+   if (state & MESA_META_FOG) {
       _mesa_set_enable(ctx, GL_FOG, save->Fog);
    }
 
-   if (state & META_PIXEL_STORE) {
+   if (state & MESA_META_PIXEL_STORE) {
       ctx->Pack = save->Pack;
       ctx->Unpack = save->Unpack;
    }
 
-   if (state & META_PIXEL_TRANSFER) {
+   if (state & MESA_META_PIXEL_TRANSFER) {
       ctx->Pixel.RedScale = save->RedScale;
       ctx->Pixel.RedBias = save->RedBias;
       ctx->Pixel.GreenScale = save->GreenScale;
@@ -709,7 +681,7 @@ _mesa_meta_end(struct gl_context *ctx)
       ctx->NewState |=_NEW_PIXEL;
    }
 
-   if (state & META_RASTERIZATION) {
+   if (state & MESA_META_RASTERIZATION) {
       _mesa_PolygonMode(GL_FRONT, save->FrontPolygonMode);
       _mesa_PolygonMode(GL_BACK, save->BackPolygonMode);
       _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, save->PolygonStipple);
@@ -718,13 +690,13 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_set_enable(ctx, GL_CULL_FACE, save->PolygonCull);
    }
 
-   if (state & META_SCISSOR) {
+   if (state & MESA_META_SCISSOR) {
       _mesa_set_enable(ctx, GL_SCISSOR_TEST, save->Scissor.Enabled);
       _mesa_Scissor(save->Scissor.X, save->Scissor.Y,
                     save->Scissor.Width, save->Scissor.Height);
    }
 
-   if (state & META_SHADER) {
+   if (state & MESA_META_SHADER) {
       if (ctx->Extensions.ARB_vertex_program) {
          _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB,
                           save->VertexProgramEnabled);
@@ -756,7 +728,7 @@ _mesa_meta_end(struct gl_context *ctx)
 				     save->ActiveShader);
    }
 
-   if (state & META_STENCIL_TEST) {
+   if (state & MESA_META_STENCIL_TEST) {
       const struct gl_stencil_attrib *stencil = &save->Stencil;
 
       _mesa_set_enable(ctx, GL_STENCIL_TEST, stencil->Enabled);
@@ -787,7 +759,7 @@ _mesa_meta_end(struct gl_context *ctx)
                               stencil->ZPassFunc[1]);
    }
 
-   if (state & META_TEXTURE) {
+   if (state & MESA_META_TEXTURE) {
       GLuint u, tgt;
 
       ASSERT(ctx->Texture.CurrentUnit == 0);
@@ -838,7 +810,7 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_ClientActiveTextureARB(GL_TEXTURE0 + save->ClientActiveUnit);
    }
 
-   if (state & META_TRANSFORM) {
+   if (state & MESA_META_TRANSFORM) {
       GLuint activeTexture = ctx->Texture.CurrentUnit;
       _mesa_ActiveTextureARB(GL_TEXTURE0);
       _mesa_MatrixMode(GL_TEXTURE);
@@ -854,7 +826,7 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_MatrixMode(save->MatrixMode);
    }
 
-   if (state & META_CLIP) {
+   if (state & MESA_META_CLIP) {
       if (save->ClipPlanesEnabled) {
          GLuint i;
          for (i = 0; i < ctx->Const.MaxClipPlanes; i++) {
@@ -865,7 +837,7 @@ _mesa_meta_end(struct gl_context *ctx)
       }
    }
 
-   if (state & META_VERTEX) {
+   if (state & MESA_META_VERTEX) {
       /* restore vertex buffer object */
       _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, save->ArrayBufferObj->Name);
       _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, NULL);
@@ -875,7 +847,7 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_reference_array_object(ctx, &save->ArrayObj, NULL);
    }
 
-   if (state & META_VIEWPORT) {
+   if (state & MESA_META_VIEWPORT) {
       if (save->ViewportX != ctx->Viewport.X ||
           save->ViewportY != ctx->Viewport.Y ||
           save->ViewportW != ctx->Viewport.Width ||
@@ -886,15 +858,15 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_DepthRange(save->DepthNear, save->DepthFar);
    }
 
-   if (state & META_CLAMP_FRAGMENT_COLOR) {
+   if (state & MESA_META_CLAMP_FRAGMENT_COLOR) {
       _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, save->ClampFragmentColor);
    }
 
-   if (state & META_CLAMP_VERTEX_COLOR) {
+   if (state & MESA_META_CLAMP_VERTEX_COLOR) {
       _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, save->ClampVertexColor);
    }
 
-   if (state & META_CONDITIONAL_RENDER) {
+   if (state & MESA_META_CONDITIONAL_RENDER) {
       if (save->CondRenderQuery)
 	 _mesa_BeginConditionalRender(save->CondRenderQuery->Id,
 				      save->CondRenderMode);
@@ -1360,7 +1332,7 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
    }
 
    /* only scissor effects blit so save/clear all other relevant state */
-   _mesa_meta_begin(ctx, ~META_SCISSOR);
+   _mesa_meta_begin(ctx, ~MESA_META_SCISSOR);
 
    if (blit->ArrayObj == 0) {
       /* one-time setup */
@@ -1489,15 +1461,15 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
    };
    struct vertex verts[4];
    /* save all state but scissor, pixel pack/unpack */
-   GLbitfield metaSave = (META_ALL -
-			  META_SCISSOR -
-			  META_PIXEL_STORE -
-			  META_CONDITIONAL_RENDER);
+   GLbitfield metaSave = (MESA_META_ALL -
+			  MESA_META_SCISSOR -
+			  MESA_META_PIXEL_STORE -
+			  MESA_META_CONDITIONAL_RENDER);
    const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
 
    if (buffers & BUFFER_BITS_COLOR) {
       /* if clearing color buffers, don't save/restore colormask */
-      metaSave -= META_COLOR_MASK;
+      metaSave -= MESA_META_COLOR_MASK;
    }
 
    _mesa_meta_begin(ctx, metaSave);
@@ -1532,7 +1504,7 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
       _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
    }
    else {
-      ASSERT(metaSave & META_COLOR_MASK);
+      ASSERT(metaSave & MESA_META_COLOR_MASK);
       _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
    }
 
@@ -1669,22 +1641,22 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
       GLfloat x, y, z;
    } verts[4];
 
-   metaSave = (META_ALPHA_TEST |
-	       META_BLEND |
-	       META_DEPTH_TEST |
-	       META_RASTERIZATION |
-	       META_SHADER |
-	       META_STENCIL_TEST |
-	       META_VERTEX |
-	       META_VIEWPORT |
-	       META_CLIP |
-	       META_CLAMP_FRAGMENT_COLOR);
+   metaSave = (MESA_META_ALPHA_TEST |
+	       MESA_META_BLEND |
+	       MESA_META_DEPTH_TEST |
+	       MESA_META_RASTERIZATION |
+	       MESA_META_SHADER |
+	       MESA_META_STENCIL_TEST |
+	       MESA_META_VERTEX |
+	       MESA_META_VIEWPORT |
+	       MESA_META_CLIP |
+	       MESA_META_CLAMP_FRAGMENT_COLOR);
 
    if (!(buffers & BUFFER_BITS_COLOR)) {
       /* We'll use colormask to disable color writes.  Otherwise,
        * respect color mask
        */
-      metaSave |= META_COLOR_MASK;
+      metaSave |= MESA_META_COLOR_MASK;
    }
 
    _mesa_meta_begin(ctx, metaSave);
@@ -1706,7 +1678,7 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
       _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
    }
    else {
-      ASSERT(metaSave & META_COLOR_MASK);
+      ASSERT(metaSave & MESA_META_COLOR_MASK);
       _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
    }
 
@@ -1788,13 +1760,13 @@ _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY,
    /* Most GL state applies to glCopyPixels, but a there's a few things
     * we need to override:
     */
-   _mesa_meta_begin(ctx, (META_RASTERIZATION |
-                          META_SHADER |
-                          META_TEXTURE |
-                          META_TRANSFORM |
-                          META_CLIP |
-                          META_VERTEX |
-                          META_VIEWPORT));
+   _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION |
+                          MESA_META_SHADER |
+                          MESA_META_TEXTURE |
+                          MESA_META_TRANSFORM |
+                          MESA_META_CLIP |
+                          MESA_META_VERTEX |
+                          MESA_META_VIEWPORT));
 
    if (copypix->ArrayObj == 0) {
       /* one-time setup */
@@ -2069,10 +2041,10 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
           * in [0,1].
           */
          texIntFormat = GL_ALPHA;
-         metaExtraSave = (META_COLOR_MASK |
-                          META_DEPTH_TEST |
-                          META_SHADER |
-                          META_STENCIL_TEST);
+         metaExtraSave = (MESA_META_COLOR_MASK |
+                          MESA_META_DEPTH_TEST |
+                          MESA_META_SHADER |
+                          MESA_META_STENCIL_TEST);
       }
       else {
          fallback = GL_TRUE;
@@ -2082,7 +2054,7 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
       if (ctx->Extensions.ARB_depth_texture &&
           ctx->Extensions.ARB_fragment_program) {
          texIntFormat = GL_DEPTH_COMPONENT;
-         metaExtraSave = (META_SHADER);
+         metaExtraSave = (MESA_META_SHADER);
       }
       else {
          fallback = GL_TRUE;
@@ -2110,14 +2082,14 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
    /* Most GL state applies to glDrawPixels (like blending, stencil, etc),
     * but a there's a few things we need to override:
     */
-   _mesa_meta_begin(ctx, (META_RASTERIZATION |
-                          META_SHADER |
-                          META_TEXTURE |
-                          META_TRANSFORM |
-                          META_CLIP |
-                          META_VERTEX |
-                          META_VIEWPORT |
-			  META_CLAMP_FRAGMENT_COLOR |
+   _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION |
+                          MESA_META_SHADER |
+                          MESA_META_TEXTURE |
+                          MESA_META_TRANSFORM |
+                          MESA_META_CLIP |
+                          MESA_META_VERTEX |
+                          MESA_META_VIEWPORT |
+			  MESA_META_CLAMP_FRAGMENT_COLOR |
                           metaExtraSave));
 
    newTex = alloc_texture(tex, width, height, texIntFormat);
@@ -2318,15 +2290,15 @@ _mesa_meta_Bitmap(struct gl_context *ctx,
    /* Most GL state applies to glBitmap (like blending, stencil, etc),
     * but a there's a few things we need to override:
     */
-   _mesa_meta_begin(ctx, (META_ALPHA_TEST |
-                          META_PIXEL_STORE |
-                          META_RASTERIZATION |
-                          META_SHADER |
-                          META_TEXTURE |
-                          META_TRANSFORM |
-                          META_CLIP |
-                          META_VERTEX |
-                          META_VIEWPORT));
+   _mesa_meta_begin(ctx, (MESA_META_ALPHA_TEST |
+                          MESA_META_PIXEL_STORE |
+                          MESA_META_RASTERIZATION |
+                          MESA_META_SHADER |
+                          MESA_META_TEXTURE |
+                          MESA_META_TRANSFORM |
+                          MESA_META_CLIP |
+                          MESA_META_VERTEX |
+                          MESA_META_VIEWPORT));
 
    if (bitmap->ArrayObj == 0) {
       /* one-time setup */
@@ -2544,7 +2516,7 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
       faceTarget = target;
    }
 
-   _mesa_meta_begin(ctx, META_ALL);
+   _mesa_meta_begin(ctx, MESA_META_ALL);
 
    if (original_active_unit != 0)
       _mesa_BindTexture(target, texObj->Name);
@@ -2900,7 +2872,7 @@ copy_tex_sub_image(struct gl_context *ctx,
    /*
     * Read image from framebuffer (disable pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
    ctx->Driver.ReadPixels(ctx, x, y, width, height,
 			  format, type, &ctx->Pack, buf);
    _mesa_meta_end(ctx);
@@ -2910,7 +2882,7 @@ copy_tex_sub_image(struct gl_context *ctx,
    /*
     * Store texture data (with pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE);
    if (target == GL_TEXTURE_1D) {
       ctx->Driver.TexSubImage1D(ctx, target, level, xoffset,
                                 width, format, type, buf,
@@ -2982,7 +2954,7 @@ _mesa_meta_CopyColorTable(struct gl_context *ctx,
    /*
     * Read image from framebuffer (disable pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
    ctx->Driver.ReadPixels(ctx, x, y, width, 1,
                           GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
 
@@ -3009,7 +2981,7 @@ _mesa_meta_CopyColorSubTable(struct gl_context *ctx,GLenum target, GLsizei start
    /*
     * Read image from framebuffer (disable pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
    ctx->Driver.ReadPixels(ctx, x, y, width, 1,
                           GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
 
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index 95b4b5579c8..ac20e370eb8 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -26,6 +26,33 @@
 #ifndef META_H
 #define META_H
 
+/**
+ * \name Flags for meta operations
+ * \{
+ *
+ * These flags are passed to _mesa_meta_begin().
+ */
+#define MESA_META_ALL                      ~0x0
+#define MESA_META_ALPHA_TEST                0x1
+#define MESA_META_BLEND                     0x2  /**< includes logicop */
+#define MESA_META_COLOR_MASK                0x4
+#define MESA_META_DEPTH_TEST                0x8
+#define MESA_META_FOG                      0x10
+#define MESA_META_PIXEL_STORE              0x20
+#define MESA_META_PIXEL_TRANSFER           0x40
+#define MESA_META_RASTERIZATION            0x80
+#define MESA_META_SCISSOR                 0x100
+#define MESA_META_SHADER                  0x200
+#define MESA_META_STENCIL_TEST            0x400
+#define MESA_META_TRANSFORM               0x800 /**< modelview/projection matrix state */
+#define MESA_META_TEXTURE                0x1000
+#define MESA_META_VERTEX                 0x2000
+#define MESA_META_VIEWPORT               0x4000
+#define MESA_META_CLAMP_FRAGMENT_COLOR   0x8000
+#define MESA_META_CLAMP_VERTEX_COLOR    0x10000
+#define MESA_META_CONDITIONAL_RENDER    0x20000
+#define MESA_META_CLIP                  0x40000
+/**\}*/
 
 extern void
 _mesa_meta_init(struct gl_context *ctx);
@@ -33,6 +60,12 @@ _mesa_meta_init(struct gl_context *ctx);
 extern void
 _mesa_meta_free(struct gl_context *ctx);
 
+extern void
+_mesa_meta_begin(struct gl_context *ctx, GLbitfield state);
+
+extern void
+_mesa_meta_end(struct gl_context *ctx);
+
 extern void
 _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
                            GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
-- 
cgit v1.2.3


From 352cab498a6210e1fdd57d6be2a30d33024ef37f Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Aug 2011 14:35:27 -0600
Subject: mesa: restructure error checking in
 _mesa_FramebufferTexture1D/2DEXT()

In anticipation of adding more texture targets.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/main/fbobject.c | 56 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 11 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 7646f9bdb03..e25ec8cc2b7 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1984,10 +1984,23 @@ _mesa_FramebufferTexture1DEXT(GLenum target, GLenum attachment,
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   if ((texture != 0) && (textarget != GL_TEXTURE_1D)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glFramebufferTexture1DEXT(textarget)");
-      return;
+   if (texture != 0) {
+      GLboolean error;
+
+      switch (textarget) {
+      case GL_TEXTURE_1D:
+         error = GL_FALSE;
+         break;
+      default:
+         error = GL_TRUE;
+      }
+
+      if (error) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glFramebufferTexture1DEXT(textarget=%s)",
+                     _mesa_lookup_enum_by_nr(textarget));
+         return;
+      }
    }
 
    framebuffer_texture(ctx, "1D", target, attachment, textarget, texture,
@@ -2001,13 +2014,34 @@ _mesa_FramebufferTexture2DEXT(GLenum target, GLenum attachment,
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   if ((texture != 0) &&
-       (textarget != GL_TEXTURE_2D) &&
-       (textarget != GL_TEXTURE_RECTANGLE_ARB) &&
-       (!is_cube_face(textarget))) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glFramebufferTexture2DEXT(textarget=0x%x)", textarget);
-      return;
+   if (texture != 0) {
+      GLboolean error;
+
+      switch (textarget) {
+      case GL_TEXTURE_2D:
+         error = GL_FALSE;
+         break;
+      case GL_TEXTURE_RECTANGLE:
+         error = !ctx->Extensions.NV_texture_rectangle;
+         break;
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+         error = !ctx->Extensions.ARB_texture_cube_map;
+         break;
+      default:
+         error = GL_FALSE;
+      }
+
+      if (error) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glFramebufferTexture2DEXT(textarget=%s)",
+                     _mesa_lookup_enum_by_nr(textarget));
+         return;
+      }
    }
 
    framebuffer_texture(ctx, "2D", target, attachment, textarget, texture,
-- 
cgit v1.2.3


From 0eb18ee55719377ebd90456bde605384ce4ec14a Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Aug 2011 15:54:53 -0600
Subject: mesa: set Q=1 for OPCODE_TEX execution

Q should not be significant for OPCODE_TEX, but it winds up getting
passed to the compute_lambda() function.  Make sure it's 1.0 to
prevent garbage values, which is effectively what we get when the
swizzle is coord.xyzz (which is what GLSL gives us).

Part of the fix for piglit's fbo-generatemipmap-array test.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/program/prog_execute.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index c70a1e344e5..77f842a1630 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -1651,6 +1651,14 @@ _mesa_execute_program(struct gl_context * ctx,
             GLfloat texcoord[4], color[4];
             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
 
+            /* For TEX, texcoord.Q should not be used and its value should not
+             * matter (at most, we pass coord.xyz to texture3D() in GLSL).
+             * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
+             * which is effectively what happens when the texcoord swizzle
+             * is .xyzz
+             */
+            texcoord[3] = 1.0f;
+
             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
 
             if (DEBUG_PROG) {
-- 
cgit v1.2.3


From 0f8c43c34f74b2ebc40ade2944f3b56b7dc606b0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Aug 2011 15:58:00 -0600
Subject: meta: use fallback mipmap generation for 1D/2D texture arrays

We could do 1D/2D arrays with textured quad rendering, but it'll take
some work (as with 3D textures).

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/common/meta.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index e37b78aae78..291d912121b 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -2424,7 +2424,9 @@ _mesa_meta_check_generate_mipmap_fallback(struct gl_context *ctx, GLenum target,
 
    /* check for fallbacks */
    if (!ctx->Extensions.EXT_framebuffer_object ||
-       target == GL_TEXTURE_3D) {
+       target == GL_TEXTURE_3D ||
+       target == GL_TEXTURE_1D_ARRAY ||
+       target == GL_TEXTURE_2D_ARRAY) {
       return GL_TRUE;
    }
 
@@ -2476,7 +2478,8 @@ _mesa_meta_check_generate_mipmap_fallback(struct gl_context *ctx, GLenum target,
 
 /**
  * Called via ctx->Driver.GenerateMipmap()
- * Note: texture borders and 3D texture support not yet complete.
+ * Note: We don't yet support 3D textures, 1D/2D array textures or texture
+ * borders.
  */
 void
 _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
-- 
cgit v1.2.3


From 3e9dc51f82276e57ecfb4e2725d88d83dbedcd85 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Aug 2011 15:59:33 -0600
Subject: mesa: handle array textures in GenerateMipmap(),
 FramebufferTexture1/2D()

This was an unfinished to-do item before.
With this patch and the two preceeding patches, piglit's
fbo-generatemipmap-array test runs and passes instead of generating
a GL error and dying on an assertion.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/main/fbobject.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index e25ec8cc2b7..0b48fc7eab0 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1991,6 +1991,9 @@ _mesa_FramebufferTexture1DEXT(GLenum target, GLenum attachment,
       case GL_TEXTURE_1D:
          error = GL_FALSE;
          break;
+      case GL_TEXTURE_1D_ARRAY:
+         error = !ctx->Extensions.EXT_texture_array;
+         break;
       default:
          error = GL_TRUE;
       }
@@ -2032,6 +2035,9 @@ _mesa_FramebufferTexture2DEXT(GLenum target, GLenum attachment,
       case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
          error = !ctx->Extensions.ARB_texture_cube_map;
          break;
+      case GL_TEXTURE_2D_ARRAY:
+         error = !ctx->Extensions.EXT_texture_array;
+         break;
       default:
          error = GL_FALSE;
       }
@@ -2380,6 +2386,8 @@ void GLAPIENTRY
 _mesa_GenerateMipmapEXT(GLenum target)
 {
    struct gl_texture_object *texObj;
+   GLboolean error;
+
    GET_CURRENT_CONTEXT(ctx);
 
    ASSERT_OUTSIDE_BEGIN_END(ctx);
@@ -2389,12 +2397,22 @@ _mesa_GenerateMipmapEXT(GLenum target)
    case GL_TEXTURE_1D:
    case GL_TEXTURE_2D:
    case GL_TEXTURE_3D:
+      error = GL_FALSE;
+      break;
    case GL_TEXTURE_CUBE_MAP:
-      /* OK, legal value */
+      error = !ctx->Extensions.ARB_texture_cube_map;
+      break;
+   case GL_TEXTURE_1D_ARRAY:
+   case GL_TEXTURE_2D_ARRAY:
+      error = !ctx->Extensions.EXT_texture_array;
       break;
    default:
-      /* XXX need to implement GL_TEXTURE_1D_ARRAY and GL_TEXTURE_2D_ARRAY */
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target)");
+      error = GL_TRUE;
+   }
+
+   if (error) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target=%s)",
+                  _mesa_lookup_enum_by_nr(target));
       return;
    }
 
-- 
cgit v1.2.3


From 6a6441fc03b4f28bfd2619e4e6d260cf6ea82554 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:21:25 +0300
Subject: driconf: Add the PP descriptions

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/drivers/dri/common/xmlpool/options.h   | 60 +++++++++++++++++++++++++
 src/mesa/drivers/dri/common/xmlpool/t_options.h | 30 +++++++++++++
 2 files changed, 90 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/common/xmlpool/options.h b/src/mesa/drivers/dri/common/xmlpool/options.h
index d76595578c7..1e584ba086a 100644
--- a/src/mesa/drivers/dri/common/xmlpool/options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/options.h
@@ -425,6 +425,66 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \
         DRI_CONF_DESC(sv,"Använd HyperZ för att maximera prestandan") \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(de,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(es,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(nl,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(fr,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(sv,"A post-processing filter to cel-shade the output") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(de,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(es,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(nl,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(fr,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(sv,"A post-processing filter to remove the red channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(de,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(es,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(nl,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(fr,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(sv,"A post-processing filter to remove the green channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(de,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(es,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(nl,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(fr,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(sv,"A post-processing filter to remove the blue channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+DRI_CONF_OPT_END
+
 #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
 DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
         DRI_CONF_DESC(en,"Number of texture units used") \
diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h
index 5fd6ec65bf8..2427aa77f5b 100644
--- a/src/mesa/drivers/dri/common/xmlpool/t_options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h
@@ -191,6 +191,36 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \
         DRI_CONF_DESC(en,gettext("Use HyperZ to boost performance")) \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to cel-shade the output")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the red channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the green channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the blue channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps")) \
+DRI_CONF_OPT_END
+
 #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
 DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
         DRI_CONF_DESC(en,gettext("Number of texture units used")) \
-- 
cgit v1.2.3


From eb5454f20a7ad998f2789d9b2a91adcd41c887b6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 16 Aug 2011 14:18:51 -0700
Subject: i965/vs: Remove stale comment about compressed instructions.

This was copy'n'paste from the fragment shader, and didn't make sense
here.
---
 src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index d5fd21d99a4..70395533119 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -63,7 +63,6 @@ vec4_visitor::reg_allocate_trivial()
       }
    }
 
-   /* Note that compressed instructions require alignment to 2 registers. */
    hw_reg_mapping[0] = this->first_non_payload_grf;
    next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
    for (i = 1; i < this->virtual_grf_count; i++) {
-- 
cgit v1.2.3


From 3dadc1e3cceac80a1b63cad2e10f0e0f8904531b Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 16 Aug 2011 15:09:48 -0700
Subject: i965/vs: Copy the live intervals calculation over from the FS.

This is a rather pessimistic calculation, since it doesn't distinguish
individual channels of a vec4, or elements of an array, but should be
a minimum start for register allocation.
---
 src/mesa/drivers/dri/i965/Makefile             |   1 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp         | 130 +++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4.h           |   5 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   3 +
 4 files changed, 139 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4.cpp

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 45a5350a383..d9c885da65b 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -125,6 +125,7 @@ CXX_SOURCES = \
 	brw_fs_schedule_instructions.cpp \
 	brw_fs_vector_splitting.cpp \
 	brw_shader.cpp \
+	brw_vec4.cpp \
 	brw_vec4_emit.cpp \
 	brw_vec4_reg_allocate.cpp \
 	brw_vec4_visitor.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
new file mode 100644
index 00000000000..a3ed31a9da0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+extern "C" {
+#include "main/macros.h"
+#include "program/prog_parameter.h"
+}
+
+#define MAX_INSTRUCTION (1 << 30)
+
+namespace brw {
+
+void
+vec4_visitor::calculate_live_intervals()
+{
+   int *def = ralloc_array(mem_ctx, int, virtual_grf_count);
+   int *use = ralloc_array(mem_ctx, int, virtual_grf_count);
+   int loop_depth = 0;
+   int loop_start = 0;
+
+   if (this->live_intervals_valid)
+      return;
+
+   for (int i = 0; i < virtual_grf_count; i++) {
+      def[i] = MAX_INSTRUCTION;
+      use[i] = -1;
+   }
+
+   int ip = 0;
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      if (inst->opcode == BRW_OPCODE_DO) {
+	 if (loop_depth++ == 0)
+	    loop_start = ip;
+      } else if (inst->opcode == BRW_OPCODE_WHILE) {
+	 loop_depth--;
+
+	 if (loop_depth == 0) {
+	    /* Patches up the use of vars marked for being live across
+	     * the whole loop.
+	     */
+	    for (int i = 0; i < virtual_grf_count; i++) {
+	       if (use[i] == loop_start) {
+		  use[i] = ip;
+	       }
+	    }
+	 }
+      } else {
+	 for (unsigned int i = 0; i < 3; i++) {
+	    if (inst->src[i].file == GRF) {
+	       int reg = inst->src[i].reg;
+
+	       if (!loop_depth) {
+		  use[reg] = ip;
+	       } else {
+		  def[reg] = MIN2(loop_start, def[reg]);
+		  use[reg] = loop_start;
+
+		  /* Nobody else is going to go smash our start to
+		   * later in the loop now, because def[reg] now
+		   * points before the bb header.
+		   */
+	       }
+	    }
+	 }
+	 if (inst->dst.file == GRF) {
+	    int reg = inst->dst.reg;
+
+	    if (!loop_depth) {
+	       def[reg] = MIN2(def[reg], ip);
+	    } else {
+	       def[reg] = MIN2(def[reg], loop_start);
+	    }
+	 }
+      }
+
+      ip++;
+   }
+
+   ralloc_free(this->virtual_grf_def);
+   ralloc_free(this->virtual_grf_use);
+   this->virtual_grf_def = def;
+   this->virtual_grf_use = use;
+
+   this->live_intervals_valid = true;
+}
+
+bool
+vec4_visitor::virtual_grf_interferes(int a, int b)
+{
+   int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
+   int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
+
+   /* We can't handle dead register writes here, without iterating
+    * over the whole instruction stream to find every single dead
+    * write to that register to compare to the live interval of the
+    * other register.  Just assert that dead_code_eliminate() has been
+    * called.
+    */
+   assert((this->virtual_grf_use[a] != -1 ||
+	   this->virtual_grf_def[a] == MAX_INSTRUCTION) &&
+	  (this->virtual_grf_use[b] != -1 ||
+	   this->virtual_grf_def[b] == MAX_INSTRUCTION));
+
+   return start < end;
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 620b05570a6..77a28c7cda7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -320,6 +320,9 @@ public:
    int virtual_grf_count;
    int virtual_grf_array_size;
    int first_non_payload_grf;
+   int *virtual_grf_def;
+   int *virtual_grf_use;
+   bool live_intervals_valid;
 
    dst_reg *variable_storage(ir_variable *var);
 
@@ -377,6 +380,8 @@ public:
    void reg_allocate_trivial();
    void reg_allocate();
    void move_grf_array_access_to_scratch();
+   void calculate_live_intervals();
+   bool virtual_grf_interferes(int a, int b);
 
    vec4_instruction *emit(enum opcode opcode);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index a60fc5f6ada..b3a07bd0539 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2109,9 +2109,12 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
 				       hash_table_pointer_hash,
 				       hash_table_pointer_compare);
 
+   this->virtual_grf_def = NULL;
+   this->virtual_grf_use = NULL;
    this->virtual_grf_sizes = NULL;
    this->virtual_grf_count = 0;
    this->virtual_grf_array_size = 0;
+   this->live_intervals_valid = false;
 
    this->uniforms = 0;
 
-- 
cgit v1.2.3


From 8174945d3346dc049ae56dcb4bf1eab39f5c88aa Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 17 Aug 2011 10:50:17 -0700
Subject: i965/vs: Add simple dead code elimination.

This is copied right from the fragment shader.  It is needed for real
register allocation to work correctly.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp      | 31 +++++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4.h        |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |  6 ++++++
 3 files changed, 38 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index a3ed31a9da0..760bc1f7acd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -127,4 +127,35 @@ vec4_visitor::virtual_grf_interferes(int a, int b)
    return start < end;
 }
 
+/**
+ * Must be called after calculate_live_intervales() to remove unused
+ * writes to registers -- register allocation will fail otherwise
+ * because something deffed but not used won't be considered to
+ * interfere with other regs.
+ */
+bool
+vec4_visitor::dead_code_eliminate()
+{
+   bool progress = false;
+   int pc = 0;
+
+   calculate_live_intervals();
+
+   foreach_list_safe(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
+	 inst->remove();
+	 progress = true;
+      }
+
+      pc++;
+   }
+
+   if (progress)
+      live_intervals_valid = false;
+
+   return progress;
+}
+
 } /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 77a28c7cda7..1db910e2b99 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -381,6 +381,7 @@ public:
    void reg_allocate();
    void move_grf_array_access_to_scratch();
    void calculate_live_intervals();
+   bool dead_code_eliminate();
    bool virtual_grf_interferes(int a, int b);
 
    vec4_instruction *emit(enum opcode opcode);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 011af6f2d3e..65ac7d9dc09 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -558,6 +558,12 @@ vec4_visitor::run()
     */
    move_grf_array_access_to_scratch();
 
+   bool progress;
+   do {
+      progress = false;
+      progress = dead_code_eliminate() || progress;
+   } while (progress);
+
    if (failed)
       return false;
 
-- 
cgit v1.2.3


From f4db75547f38f08665efac3daf1599fdc5594bb7 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 16 Aug 2011 15:28:53 -0700
Subject: i965/vs: Implement proper register allocation instead of 1:1 mapping.

Fixes vs-atan-* and several others.  This is not the real solution we
eventually want, which will pack floats, vec2s, and vec3s into vec4
registers, but this code should provide the framework for that.
---
 src/mesa/drivers/dri/i965/brw_context.h            |  17 +++
 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp     | 139 ++++++++++++++++++++-
 2 files changed, 155 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index add8c568795..df63fe1d52c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -685,6 +685,23 @@ struct brw_context
 
       uint32_t push_const_offset; /* Offset in the batchbuffer */
       int push_const_size; /* in 256-bit register increments */
+
+      /** @{ register allocator */
+
+      struct ra_regs *regs;
+
+      /**
+       * Array of the ra classes for the unaligned contiguous register
+       * block sizes used.
+       */
+      int *classes;
+
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
+      /** @} */
    } vs;
 
    struct {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 70395533119..3f052ff64cf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -21,6 +21,11 @@
  * IN THE SOFTWARE.
  */
 
+extern "C" {
+#include "main/macros.h"
+#include "program/register_allocate.h"
+} /* extern "C" */
+
 #include "brw_vec4.h"
 #include "../glsl/ir_print_visitor.h"
 
@@ -88,10 +93,142 @@ vec4_visitor::reg_allocate_trivial()
    }
 }
 
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+			      int *class_sizes,
+			      int class_count,
+			      int base_reg_count)
+{
+   /* Compute the total number of registers across all classes. */
+   int ra_reg_count = 0;
+   for (int i = 0; i < class_count; i++) {
+      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+   }
+
+   ralloc_free(brw->vs.ra_reg_to_grf);
+   brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+   ralloc_free(brw->vs.regs);
+   brw->vs.regs = ra_alloc_reg_set(ra_reg_count);
+   ralloc_free(brw->vs.classes);
+   brw->vs.classes = ralloc_array(brw, int, class_count + 1);
+
+   /* Now, add the registers to their classes, and add the conflicts
+    * between them and the base GRF registers (and also each other).
+    */
+   int reg = 0;
+   for (int i = 0; i < class_count; i++) {
+      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+      brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
+
+      for (int j = 0; j < class_reg_count; j++) {
+	 ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
+
+	 brw->vs.ra_reg_to_grf[reg] = j;
+
+	 for (int base_reg = j;
+	      base_reg < j + class_sizes[i];
+	      base_reg++) {
+	    ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
+	 }
+
+	 reg++;
+      }
+   }
+   assert(reg == ra_reg_count);
+
+   ra_set_finalize(brw->vs.regs);
+}
+
 void
 vec4_visitor::reg_allocate()
 {
-   reg_allocate_trivial();
+   int hw_reg_mapping[virtual_grf_count];
+   int first_assigned_grf = this->first_non_payload_grf;
+   int base_reg_count = BRW_MAX_GRF - first_assigned_grf;
+   int class_sizes[base_reg_count];
+   int class_count = 0;
+
+   /* Using the trivial allocator can be useful in debugging undefined
+    * register access as a result of broken optimization passes.
+    */
+   if (0) {
+      reg_allocate_trivial();
+      return;
+   }
+
+   calculate_live_intervals();
+
+   /* Set up the register classes.
+    *
+    * The base registers store a vec4.  However, we'll need larger
+    * storage for arrays, structures, and matrices, which will be sets
+    * of contiguous registers.
+    */
+   class_sizes[class_count++] = 1;
+
+   for (int r = 0; r < virtual_grf_count; r++) {
+      int i;
+
+      for (i = 0; i < class_count; i++) {
+	 if (class_sizes[i] == this->virtual_grf_sizes[r])
+	    break;
+      }
+      if (i == class_count) {
+	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
+	    fail("Object too large to register allocate.\n");
+	 }
+
+	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
+      }
+   }
+
+   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
+
+   struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
+						    virtual_grf_count);
+
+   for (int i = 0; i < virtual_grf_count; i++) {
+      for (int c = 0; c < class_count; c++) {
+	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
+	    ra_set_node_class(g, i, brw->vs.classes[c]);
+	    break;
+	 }
+      }
+
+      for (int j = 0; j < i; j++) {
+	 if (virtual_grf_interferes(i, j)) {
+	    ra_add_node_interference(g, i, j);
+	 }
+      }
+   }
+
+   if (!ra_allocate_no_spills(g)) {
+      ralloc_free(g);
+      fail("No register spilling support yet\n");
+   }
+
+   /* Get the chosen virtual registers for each node, and map virtual
+    * regs in the register classes back down to real hardware reg
+    * numbers.
+    */
+   prog_data->total_grf = first_assigned_grf;
+   for (int i = 0; i < virtual_grf_count; i++) {
+      int reg = ra_get_node_reg(g, i);
+
+      hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
+      prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1);
+   }
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      assign(hw_reg_mapping, &inst->dst);
+      assign(hw_reg_mapping, &inst->src[0]);
+      assign(hw_reg_mapping, &inst->src[1]);
+      assign(hw_reg_mapping, &inst->src[2]);
+   }
+
+   ralloc_free(g);
 }
 
 } /* namespace brw */
-- 
cgit v1.2.3


From eb86bb55f5faef67c21604db19210c6788592679 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 18 Aug 2011 14:05:11 -0700
Subject: i965/fs: Change incorrect use of 'struct fs_reg' to simply 'fs_reg'.

It's actually a class.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2dc9132cec6..33ad12763f7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -510,7 +510,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
 void
 fs_visitor::visit(ir_assignment *ir)
 {
-   struct fs_reg l, r;
+   fs_reg l, r;
    fs_inst *inst;
 
    /* FINISHME: arrays on the lhs */
-- 
cgit v1.2.3


From 07e9b9049f94ceb443eac1206cc3f9e1e51ac6c1 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 18 Aug 2011 14:08:06 -0700
Subject: ir_to_mesa: Remove incorrect usage of the 'struct' keyword on
 classes.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/program/ir_to_mesa.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index b1211c1145c..ec3fba182d8 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -581,7 +581,7 @@ ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
    }
 }
 
-struct src_reg
+src_reg
 ir_to_mesa_visitor::src_reg_for_float(float val)
 {
    src_reg src(PROGRAM_CONSTANT, -1, NULL);
@@ -725,7 +725,7 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
 	 }
       }
 
-      struct variable_storage *storage;
+      variable_storage *storage;
       dst_reg dst;
       if (i == ir->num_state_slots) {
 	 /* We'll set the index later. */
-- 
cgit v1.2.3


From 01d81dedc795005ed235856ce762bb1981655716 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 15 Aug 2011 14:18:16 -0700
Subject: mesa, glsl_to_tgsi: Add new gl_context::NativeIntegers flag.

Previously, native integer support was based on whether the driver
advertised GLSL 1.30 or not.  However, drivers that natively support
integers may wish to do so for older GLSL versions as well.  Adding this
new opt-in flag allows them to do so.

Currently disabled by default on all drivers, which was the existing
behavior (no drivers currently implement GLSL 1.30).

Fixes piglit tests on i965 with INTEL_GLSL_VERSION=130 set:
- spec/glsl-1.10/fs-uniform-int-110.shader_test
- spec/glsl-1.30/fs-uniform-int-130.shader_test
(it was doubly converting the data)

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/main/mtypes.h                     |  6 +++++
 src/mesa/main/uniforms.c                   | 18 +++++++-------
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 40 ++++++++++++++++--------------
 3 files changed, 37 insertions(+), 27 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 2d5f44c1e7b..8b3650321db 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2717,6 +2717,12 @@ struct gl_constants
 
    GLuint GLSLVersion;  /**< GLSL version supported (ex: 120 = 1.20) */
 
+   /**
+    * Does the driver support real 32-bit integers?  (Otherwise, integers are
+    * simulated via floats.)
+    */
+   GLboolean NativeIntegers;
+
    /** Which texture units support GL_ATI_envmap_bumpmap as targets */
    GLbitfield SupportedBumpUnits;
 
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 1329af4cd7e..cda840fe2d2 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -454,9 +454,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = ctx->Const.GLSLVersion <= 120 ? 
-                     (GLint) prog->Parameters->ParameterValues[base][j].f : 
-                     prog->Parameters->ParameterValues[base][j].i;
+                  params[k++] = ctx->Const.NativeIntegers ?
+                     prog->Parameters->ParameterValues[base][j].i :
+                     (GLint) prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -468,9 +468,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = ctx->Const.GLSLVersion <= 120 ? 
-                     (GLuint) prog->Parameters->ParameterValues[base][j].f : 
-                     prog->Parameters->ParameterValues[base][j].u;
+                  params[k++] = ctx->Const.NativeIntegers ?
+                     prog->Parameters->ParameterValues[base][j].u :
+                     (GLuint) prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -750,7 +750,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
          if (basicType == GL_INT) {
             const GLint *iValues = ((const GLint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               if (ctx->Const.GLSLVersion <= 120)
+               if (!ctx->Const.NativeIntegers)
                   uniformVal[i].f = (GLfloat) iValues[i];
                else
                   uniformVal[i].i = iValues[i];
@@ -759,7 +759,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
          else if (basicType == GL_UNSIGNED_INT) {
             const GLuint *iValues = ((const GLuint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               if (ctx->Const.GLSLVersion <= 120)
+               if (!ctx->Const.NativeIntegers)
                   uniformVal[i].f = (GLfloat)(GLuint) iValues[i];
                else
                   uniformVal[i].u = iValues[i];
@@ -781,7 +781,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
                else
                   uniformVal[i].b = uniformVal[i].u ? 1 : 0;
                
-               if (ctx->Const.GLSLVersion <= 120)
+               if (!ctx->Const.NativeIntegers)
                   uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
             }
          }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 7b90c812595..886a1776210 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -295,6 +295,7 @@ public:
    bool indirect_addr_consts;
    
    int glsl_version;
+   bool native_integers;
 
    variable_storage *find_variable_storage(ir_variable *var);
 
@@ -600,7 +601,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
    
    if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
       type = GLSL_TYPE_FLOAT;
-   else if (glsl_version >= 130)
+   else if (native_integers)
       type = src0.type;
 
 #define case4(c, f, i, u) \
@@ -881,7 +882,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
    union gl_constant_value uval;
    
-   assert(glsl_version >= 130);
+   assert(native_integers);
 
    uval.i = val;
    src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
@@ -892,7 +893,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
 struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
 {
-   if (glsl_version >= 130)
+   if (native_integers)
       return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 
                                        st_src_reg_for_int(val);
    else
@@ -950,7 +951,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
 {
    st_src_reg src;
 
-   src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
+   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
    src.file = PROGRAM_TEMPORARY;
    src.index = next_temp;
    src.reladdr = NULL;
@@ -1053,7 +1054,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
          this->next_temp += type_size(ir->type);
 
          dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
-               glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT));
+               native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
       }
 
 
@@ -1069,7 +1070,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
             }
          } else {
             st_src_reg src(PROGRAM_STATE_VAR, index,
-                  glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT);
+                  native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
             src.swizzle = slots[i].swizzle;
             emit(ir, TGSI_OPCODE_MOV, dst, src);
             /* even a float takes up a whole vec4 reg in a struct/array. */
@@ -1444,7 +1445,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       /* "==" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+         st_src_reg temp = get_temp(native_integers ?
                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
@@ -1459,7 +1460,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       /* "!=" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+         st_src_reg temp = get_temp(native_integers ?
                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
@@ -1514,7 +1515,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
    case ir_unop_i2f:
    case ir_unop_b2f:
-      if (glsl_version >= 130) {
+      if (native_integers) {
          emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
          break;
       }
@@ -1526,7 +1527,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       result_src = op[0];
       break;
    case ir_unop_f2i:
-      if (glsl_version >= 130)
+      if (native_integers)
          emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
       else
          emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
@@ -1567,7 +1568,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
          break;
       }
    case ir_unop_u2f:
-      if (glsl_version >= 130) {
+      if (native_integers) {
          emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
          break;
       }
@@ -1719,7 +1720,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
    }
 
    this->result = st_src_reg(entry->file, entry->index, var->type);
-   if (glsl_version <= 120)
+   if (!native_integers)
       this->result.type = GLSL_TYPE_FLOAT;
 }
 
@@ -2109,27 +2110,27 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
       }
       break;
    case GLSL_TYPE_UINT:
-      gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT;
+      gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
             values[i].u = ir->value.u[i];
          else
             values[i].f = ir->value.u[i];
       }
       break;
    case GLSL_TYPE_INT:
-      gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT;
+      gl_type = native_integers ? GL_INT : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
             values[i].i = ir->value.i[i];
          else
             values[i].f = ir->value.i[i];
       }
       break;
    case GLSL_TYPE_BOOL:
-      gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT;
+      gl_type = native_integers ? GL_BOOL : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
             values[i].b = ir->value.b[i];
          else
             values[i].f = ir->value.b[i];
@@ -3611,6 +3612,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    v->ctx = original->ctx;
    v->prog = prog;
    v->glsl_version = original->glsl_version;
+   v->native_integers = original->native_integers;
    v->options = original->options;
    v->next_temp = original->next_temp;
    v->num_address_regs = original->num_address_regs;
@@ -3739,6 +3741,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
    v->ctx = original->ctx;
    v->prog = prog;
    v->glsl_version = original->glsl_version;
+   v->native_integers = original->native_integers;
    v->options = original->options;
    v->next_temp = original->next_temp;
    v->num_address_regs = original->num_address_regs;
@@ -4674,6 +4677,7 @@ get_mesa_program(struct gl_context *ctx,
    v->shader_program = shader_program;
    v->options = options;
    v->glsl_version = ctx->Const.GLSLVersion;
+   v->native_integers = ctx->Const.NativeIntegers;
 
    add_uniforms_to_parameters_list(shader_program, shader, prog);
 
-- 
cgit v1.2.3


From e98ee06776e0ba055e0194836d5813a0bc7e7795 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 11 Aug 2011 16:42:01 -0700
Subject: i965/fs: Don't double-convert integer/boolean uniforms.

When ctx->Const.NativeIntegers is set, Core Mesa loads integer/boolean
uniforms directly, rather than loading the floating point equivalent.
So, when that's set, we don't need to perform any conversions.

Unfortunately, we can't properly support native integers with the old
vertex shader backend, so this patch leaves them disabled for now.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b19c6e72fa6..c8f74252654 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -279,23 +279,27 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
 
 	 assert(param < ARRAY_SIZE(c->prog_data.param));
 
-	 switch (type->base_type) {
-	 case GLSL_TYPE_FLOAT:
+	 if (ctx->Const.NativeIntegers) {
 	    c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
-	    break;
-	 case GLSL_TYPE_UINT:
-	    c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
-	    break;
-	 case GLSL_TYPE_INT:
-	    c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
-	    break;
-	 case GLSL_TYPE_BOOL:
-	    c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
-	    break;
-	 default:
-	    assert(!"not reached");
-	    c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
-	    break;
+	 } else {
+	    switch (type->base_type) {
+	    case GLSL_TYPE_FLOAT:
+	       c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+	       break;
+	    case GLSL_TYPE_UINT:
+	       c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
+	       break;
+	    case GLSL_TYPE_INT:
+	       c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
+	       break;
+	    case GLSL_TYPE_BOOL:
+	       c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
+	       break;
+	    default:
+	       assert(!"not reached");
+	       c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+	       break;
+	    }
 	 }
 	 this->param_index[param] = loc;
 	 this->param_offset[param] = i;
-- 
cgit v1.2.3


From f7d2dcae3b6bf39b14c1e71f0721d0e4a2833962 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 18 Aug 2011 02:15:56 -0700
Subject: i965/gen7: Use align1 mode to set URB_WRITE_HWORD channel enables.

Makes the new vertex shader backend work on Ivybridge.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 27e81306e9c..c5013de7ec1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2244,10 +2244,13 @@ void brw_urb_WRITE(struct brw_compile *p,
 
    if (intel->gen == 7) {
       /* Enable Channel Masks in the URB_WRITE_HWORD message header */
+      brw_push_insn_state(p);
+      brw_set_access_mode(p, BRW_ALIGN_1);
       brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
 		       BRW_REGISTER_TYPE_UD),
 	        retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
 		brw_imm_ud(0xff00));
+      brw_pop_insn_state(p);
    }
 
    insn = next_insn(p, BRW_OPCODE_SEND);
-- 
cgit v1.2.3


From a40008ac649acf0e7f47294145fe0b14393f03d3 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 8 Apr 2011 22:50:43 +0800
Subject: mesa: fix !FEATURE_GL build

Move vbo_exec_FlushVertices_internal out of FEATURE_beginend.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/vbo/vbo_exec_api.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index 2b8d38ef283..af66dbd44d4 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -431,6 +431,24 @@ do {									\
 #include "vbo_attrib_tmp.h"
 
 
+/**
+ * Flush (draw) vertices.
+ * \param  unmap - leave VBO unmapped after flushing?
+ */
+static void
+vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap)
+{
+   if (exec->vtx.vert_count || unmap) {
+      vbo_exec_vtx_flush( exec, unmap );
+   }
+
+   if (exec->vtx.vertex_size) {
+      vbo_exec_copy_to_current( exec );
+      reset_attrfv( exec );
+   }
+}
+
+
 #if FEATURE_beginend
 
 
@@ -534,24 +552,6 @@ static void GLAPIENTRY vbo_exec_EvalPoint2( GLint i, GLint j )
 #endif /* FEATURE_evaluators */
 
 
-/**
- * Flush (draw) vertices.
- * \param  unmap - leave VBO unmapped after flushing?
- */
-static void
-vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap)
-{
-   if (exec->vtx.vert_count || unmap) {
-      vbo_exec_vtx_flush( exec, unmap );
-   }
-
-   if (exec->vtx.vertex_size) {
-      vbo_exec_copy_to_current( exec );
-      reset_attrfv( exec );
-   }
-}
-
-
 /**
  * Called via glBegin.
  */
-- 
cgit v1.2.3


From 504f92c739ffc916084ed821cb9f437276213057 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 5 Aug 2011 12:54:05 +0900
Subject: mesa: android has no log2f nor ffs

Define log2f(v) to be logf(v) / M_LN2 and ffs to __builtin_ffs.

Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/main/imports.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index 3fa1db02aee..70defdc4327 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -134,7 +134,13 @@ typedef union { GLfloat f; GLint i; } fi_type;
 #define exp2f(f) ((float) exp2(f))
 #define floorf(f) ((float) floor(f))
 #define logf(f) ((float) log(f))
+
+#ifdef ANDROID
+#define log2f(f) (logf(f) * (float) (1.0 / M_LN2))
+#else
 #define log2f(f) ((float) log2(f))
+#endif
+
 #define powf(x,y) ((float) pow(x,y))
 #define sinf(f) ((float) sin(f))
 #define sinhf(f) ((float) sinh(f))
@@ -562,7 +568,7 @@ _mesa_init_sqrt_table(void);
 
 #ifdef __GNUC__
 
-#ifdef __MINGW32__
+#if defined(__MINGW32__) || defined(ANDROID)
 #define ffs __builtin_ffs
 #define ffsll __builtin_ffsll
 #endif
-- 
cgit v1.2.3


From ee40f18054088dad8e1d1eca2c44204576813d6b Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 11:31:36 +0800
Subject: android: build core mesa

This builds the static library libmesa_st_mesa from core mesa.

Acked-by: Chad Versace <chad@chad-versace.us>
---
 Android.mk              |   1 +
 src/mesa/Android.gen.mk | 131 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/mesa/Android.mk     |  83 ++++++++++++++++++++++++++++++
 3 files changed, 215 insertions(+)
 create mode 100644 src/mesa/Android.gen.mk
 create mode 100644 src/mesa/Android.mk

(limited to 'src/mesa')

diff --git a/Android.mk b/Android.mk
index 4178ea2f36e..0d9475fa9a8 100644
--- a/Android.mk
+++ b/Android.mk
@@ -71,6 +71,7 @@ endif
 ifneq ($(strip $(MESA_GPU_DRIVERS)),)
 
 SUBDIRS := \
+	src/mesa \
 	src/egl/main
 
 ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
diff --git a/src/mesa/Android.gen.mk b/src/mesa/Android.gen.mk
new file mode 100644
index 00000000000..2a08184aee6
--- /dev/null
+++ b/src/mesa/Android.gen.mk
@@ -0,0 +1,131 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# included by core mesa Android.mk for source generation
+
+ifeq ($(LOCAL_MODULE_CLASS),)
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+endif
+
+intermediates := $(call local-intermediates-dir)
+
+sources := \
+	main/api_exec_es1.c \
+	main/api_exec_es1_dispatch.h \
+	main/api_exec_es1_remap_helper.h \
+	main/api_exec_es2.c \
+	main/api_exec_es2_dispatch.h \
+	main/api_exec_es2_remap_helper.h \
+	program/lex.yy.c \
+	program/program_parse.tab.c
+
+LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+
+LOCAL_C_INCLUDES += $(intermediates)/main
+
+ifeq ($(strip $(MESA_ENABLE_ASM)),true)
+ifeq ($(TARGET_ARCH),x86)
+sources += x86/matypes.h
+LOCAL_C_INCLUDES += $(intermediates)/x86
+endif
+endif
+
+sources += main/git_sha1.h
+
+sources := $(addprefix $(intermediates)/, $(sources))
+LOCAL_GENERATED_SOURCES += $(sources)
+
+glapi := $(MESA_TOP)/src/mapi/glapi/gen
+
+es_src_deps := \
+	$(LOCAL_PATH)/main/APIspec.xml \
+	$(LOCAL_PATH)/main/es_generator.py \
+	$(LOCAL_PATH)/main/APIspecutil.py \
+	$(LOCAL_PATH)/main/APIspec.py
+
+es_hdr_deps := \
+	$(wildcard $(glapi)/*.py) \
+	$(wildcard $(glapi)/*.xml)
+
+define es-gen
+	@mkdir -p $(dir $@)
+	@echo "Gen ES: $(PRIVATE_MODULE) <= $(notdir $(@))"
+	$(hide) $(PRIVATE_SCRIPT) $(1) $(PRIVATE_XML) > $@
+endef
+
+define local-l-to-c
+	@mkdir -p $(dir $@)
+	@echo "Mesa Lex: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(LEX) -o$@ $<
+endef
+
+define local-y-to-c-and-h
+	@mkdir -p $(dir $@)
+	@echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(YACC) -o $@ $<
+endef
+
+$(intermediates)/main/api_exec_%.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/main/es_generator.py
+$(intermediates)/main/api_exec_%.c: PRIVATE_XML := -S $(LOCAL_PATH)/main/APIspec.xml
+$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/gl_table.py
+$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml
+$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/remap_helper.py
+$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml
+
+$(intermediates)/main/api_exec_es1.c: $(es_src_deps)
+	$(call es-gen,-V GLES1.1)
+
+$(intermediates)/main/api_exec_es2.c: $(es_src_deps)
+	$(call es-gen,-V GLES2.0)
+
+$(intermediates)/main/api_exec_%_dispatch.h: $(es_hdr_deps)
+	$(call es-gen, -c $* -m remap_table)
+
+$(intermediates)/main/api_exec_%_remap_helper.h: $(es_hdr_deps)
+	$(call es-gen, -c $*)
+
+$(intermediates)/program/program_parse.tab.c: $(LOCAL_PATH)/program/program_parse.y
+	$(local-y-to-c-and-h)
+
+$(intermediates)/program/lex.yy.c: $(LOCAL_PATH)/program/program_lexer.l
+	$(local-l-to-c)
+
+$(intermediates)/main/git_sha1.h:
+	@mkdir -p $(dir $@)
+	@echo "GIT-SHA1: $(PRIVATE_MODULE) <= git"
+	$(hide) touch $@
+	$(hide) if which git > /dev/null; then \
+			git --git-dir $(PRIVATE_PATH)/../../.git log -n 1 --oneline | \
+			sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \
+			> $@; \
+		fi
+
+matypes_deps := \
+	$(BUILD_OUT_EXECUTABLES)/mesa_gen_matypes$(BUILD_EXECUTABLE_SUFFIX) \
+	$(LOCAL_PATH)/main/mtypes.h \
+	$(LOCAL_PATH)/tnl/t_context.h
+
+$(intermediates)/x86/matypes.h: $(matypes_deps) 
+	@mkdir -p $(dir $@)
+	@echo "MATYPES: $(PRIVATE_MODULE) <= $(notdir $@)"
+	$(hide) $< > $@
diff --git a/src/mesa/Android.mk b/src/mesa/Android.mk
new file mode 100644
index 00000000000..e32fabed49e
--- /dev/null
+++ b/src/mesa/Android.mk
@@ -0,0 +1,83 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for core mesa
+
+LOCAL_PATH := $(call my-dir)
+
+include $(LOCAL_PATH)/sources.mak
+
+common_CFLAGS := \
+	-DFEATURE_ES1=1 \
+	-DFEATURE_ES2=1
+
+common_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/glsl
+
+common_ASM :=
+
+# ---------------------------------------
+# Build mesa_gen_matypes for host
+# ---------------------------------------
+
+ifeq ($(strip $(MESA_ENABLE_ASM)),true)
+ifeq ($(TARGET_ARCH),x86)
+common_ASM += $(X86_SOURCES)
+
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := x86/gen_matypes.c
+LOCAL_CFLAGS := $(common_CFLAGS)
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_MODULE := mesa_gen_matypes
+include $(MESA_COMMON_MK)
+include $(BUILD_HOST_EXECUTABLE)
+
+endif # x86
+endif # MESA_ENABLE_ASM
+
+# ---------------------------------------
+# Build libmesa_st_mesa
+# ---------------------------------------
+
+ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(MESA_GALLIUM_SOURCES) \
+	$(MESA_GALLIUM_CXX_SOURCES) \
+	$(common_ASM)
+
+LOCAL_CFLAGS := $(common_CFLAGS)
+
+LOCAL_C_INCLUDES := \
+	$(common_C_INCLUDES) \
+	$(MESA_TOP)/src/gallium/include \
+	$(MESA_TOP)/src/gallium/auxiliary
+
+LOCAL_MODULE := libmesa_st_mesa
+
+include $(LOCAL_PATH)/Android.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+endif # MESA_BUILD_GALLIUM
-- 
cgit v1.2.3


From b81b82df955ad19eec55fa593b533673f083f593 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 11:56:43 +0800
Subject: android: build glsl

This builds the static library libmesa_glsl and executable glsl_compiler
from glsl.  glsl_compiler is only installed for engineering build.

Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 Android.mk              |   1 +
 src/glsl/Android.gen.mk |  98 +++++++++++++++++++++++++++
 src/glsl/Android.mk     | 171 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/mesa/Android.mk     |  32 +++++++++
 4 files changed, 302 insertions(+)
 create mode 100644 src/glsl/Android.gen.mk
 create mode 100644 src/glsl/Android.mk

(limited to 'src/mesa')

diff --git a/Android.mk b/Android.mk
index 0d9475fa9a8..def8ec2de75 100644
--- a/Android.mk
+++ b/Android.mk
@@ -71,6 +71,7 @@ endif
 ifneq ($(strip $(MESA_GPU_DRIVERS)),)
 
 SUBDIRS := \
+	src/glsl \
 	src/mesa \
 	src/egl/main
 
diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
new file mode 100644
index 00000000000..e4ccb7291ef
--- /dev/null
+++ b/src/glsl/Android.gen.mk
@@ -0,0 +1,98 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# included by glsl Android.mk for source generation
+
+ifeq ($(LOCAL_MODULE_CLASS),)
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+endif
+
+intermediates := $(call local-intermediates-dir)
+
+sources := \
+	glsl_lexer.cpp \
+	glsl_parser.cpp \
+	glcpp/glcpp-lex.c \
+	glcpp/glcpp-parse.c
+
+ifneq ($(LOCAL_IS_HOST_MODULE),true)
+sources += builtin_function.cpp
+endif
+
+LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+
+LOCAL_C_INCLUDES += $(intermediates) $(intermediates)/glcpp $(MESA_TOP)/src/glsl/glcpp
+
+sources := $(addprefix $(intermediates)/, $(sources))
+LOCAL_GENERATED_SOURCES += $(sources)
+
+define local-l-or-ll-to-c-or-cpp
+	@mkdir -p $(dir $@)
+	@echo "Mesa Lex: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(LEX) --nounistd -o$@ $<
+endef
+
+define local-y-to-c-and-h
+	@mkdir -p $(dir $@)
+	@echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(YACC) -o $@ $<
+endef
+
+define local-yy-to-cpp-and-h
+	@mkdir -p $(dir $@)
+	@echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(YACC) -p "_mesa_glsl_" -o $@ $<
+	touch $(@:$1=$(YACC_HEADER_SUFFIX))
+	echo '#ifndef '$(@F:$1=_h) > $(@:$1=.h)
+	echo '#define '$(@F:$1=_h) >> $(@:$1=.h)
+	cat $(@:$1=$(YACC_HEADER_SUFFIX)) >> $(@:$1=.h)
+	echo '#endif' >> $(@:$1=.h)
+	rm -f $(@:$1=$(YACC_HEADER_SUFFIX))
+endef
+
+$(intermediates)/glsl_lexer.cpp: $(LOCAL_PATH)/glsl_lexer.ll
+	$(call local-l-or-ll-to-c-or-cpp)
+
+$(intermediates)/glsl_parser.cpp: $(LOCAL_PATH)/glsl_parser.yy
+	$(call local-yy-to-cpp-and-h,.cpp)
+
+$(intermediates)/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glcpp/glcpp-lex.l
+	$(call local-l-or-ll-to-c-or-cpp)
+
+$(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y
+	$(call local-y-to-c-and-h)
+
+BUILTIN_COMPILER := $(BUILD_OUT_EXECUTABLES)/mesa_builtin_compiler$(BUILD_EXECUTABLE_SUFFIX)
+
+builtin_function_deps := \
+	$(LOCAL_PATH)/builtins/tools/generate_builtins.py \
+	$(LOCAL_PATH)/builtins/tools/texture_builtins.py \
+	$(BUILTIN_COMPILER) \
+	$(wildcard $(LOCAL_PATH)/builtins/profiles/*) \
+       	$(wildcard $(LOCAL_PATH)/builtins/ir/*)
+
+$(intermediates)/builtin_function.cpp: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/builtins/tools/generate_builtins.py
+$(intermediates)/builtin_function.cpp: $(builtin_function_deps)
+	@mkdir -p $(dir $@)
+	@echo "Gen GLSL: $(PRIVATE_MODULE) <= $(notdir $@)"
+	$(hide) $(PRIVATE_SCRIPT) $(BUILTIN_COMPILER) > $@ || rm -f $@
diff --git a/src/glsl/Android.mk b/src/glsl/Android.mk
new file mode 100644
index 00000000000..d0b3ff3becf
--- /dev/null
+++ b/src/glsl/Android.mk
@@ -0,0 +1,171 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for glsl
+
+LOCAL_PATH := $(call my-dir)
+
+# from Makefile
+LIBGLCPP_SOURCES = \
+	glcpp/glcpp-lex.c \
+	glcpp/glcpp-parse.c \
+	glcpp/pp.c
+
+C_SOURCES = \
+	strtod.c \
+	ralloc.c \
+	$(LIBGLCPP_SOURCES)
+
+CXX_SOURCES = \
+	ast_expr.cpp \
+	ast_function.cpp \
+	ast_to_hir.cpp \
+	ast_type.cpp \
+	glsl_lexer.cpp \
+	glsl_parser.cpp \
+	glsl_parser_extras.cpp \
+	glsl_types.cpp \
+	glsl_symbol_table.cpp \
+	hir_field_selection.cpp \
+	ir_basic_block.cpp \
+	ir_clone.cpp \
+	ir_constant_expression.cpp \
+	ir.cpp \
+	ir_expression_flattening.cpp \
+	ir_function_can_inline.cpp \
+	ir_function_detect_recursion.cpp \
+	ir_function.cpp \
+	ir_hierarchical_visitor.cpp \
+	ir_hv_accept.cpp \
+	ir_import_prototypes.cpp \
+	ir_print_visitor.cpp \
+	ir_reader.cpp \
+	ir_rvalue_visitor.cpp \
+	ir_set_program_inouts.cpp \
+	ir_validate.cpp \
+	ir_variable.cpp \
+	ir_variable_refcount.cpp \
+	linker.cpp \
+	link_functions.cpp \
+	loop_analysis.cpp \
+	loop_controls.cpp \
+	loop_unroll.cpp \
+	lower_discard.cpp \
+	lower_if_to_cond_assign.cpp \
+	lower_instructions.cpp \
+	lower_jumps.cpp \
+	lower_mat_op_to_vec.cpp \
+	lower_noise.cpp \
+	lower_texture_projection.cpp \
+	lower_variable_index_to_cond_assign.cpp \
+	lower_vec_index_to_cond_assign.cpp \
+	lower_vec_index_to_swizzle.cpp \
+	lower_vector.cpp \
+	opt_algebraic.cpp \
+	opt_constant_folding.cpp \
+	opt_constant_propagation.cpp \
+	opt_constant_variable.cpp \
+	opt_copy_propagation.cpp \
+	opt_copy_propagation_elements.cpp \
+	opt_dead_code.cpp \
+	opt_dead_code_local.cpp \
+	opt_dead_functions.cpp \
+	opt_discard_simplification.cpp \
+	opt_function_inlining.cpp \
+	opt_if_simplification.cpp \
+	opt_noop_swizzle.cpp \
+	opt_redundant_jumps.cpp \
+	opt_structure_splitting.cpp \
+	opt_swizzle_swizzle.cpp \
+	opt_tree_grafting.cpp \
+	s_expression.cpp
+
+# ---------------------------------------
+# Build libmesa_glsl
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(C_SOURCES) \
+	$(CXX_SOURCES) \
+	builtin_function.cpp
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa
+
+LOCAL_MODULE := libmesa_glsl
+
+include $(LOCAL_PATH)/Android.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+
+# ---------------------------------------
+# Build mesa_builtin_compiler for host
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(C_SOURCES) \
+	$(CXX_SOURCES) \
+	builtin_stubs.cpp \
+	main.cpp \
+	standalone_scaffolding.cpp
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa
+
+LOCAL_STATIC_LIBRARIES := libmesa_glsl_utils
+
+LOCAL_MODULE := mesa_builtin_compiler
+
+LOCAL_MODULE_CLASS := EXECUTABLES
+LOCAL_IS_HOST_MODULE := true
+include $(LOCAL_PATH)/Android.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_HOST_EXECUTABLE)
+
+# ---------------------------------------
+# Build glsl_compiler
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	main.cpp \
+	standalone_scaffolding.cpp
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa
+
+LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_glsl_utils
+
+LOCAL_MODULE_TAGS := eng
+LOCAL_MODULE := glsl_compiler
+
+include $(MESA_COMMON_MK)
+include $(BUILD_EXECUTABLE)
diff --git a/src/mesa/Android.mk b/src/mesa/Android.mk
index e32fabed49e..67808d491ac 100644
--- a/src/mesa/Android.mk
+++ b/src/mesa/Android.mk
@@ -81,3 +81,35 @@ include $(LOCAL_PATH)/Android.gen.mk
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
 endif # MESA_BUILD_GALLIUM
+
+# ---------------------------------------
+# Build libmesa_glsl_utils
+#
+# It is used to avoid circular dependency between core mesa and glsl.
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	program/hash_table.c \
+	program/symbol_table.c
+
+LOCAL_MODULE := libmesa_glsl_utils
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+
+# ---------------------------------------
+# Build libmesa_glsl_utils for host
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	program/hash_table.c \
+	program/symbol_table.c
+
+LOCAL_MODULE := libmesa_glsl_utils
+
+include $(MESA_COMMON_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
-- 
cgit v1.2.3


From 8c31bc704826d46cad65c4d65b4b70de7144205a Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 17 Aug 2011 10:01:30 -0500
Subject: glsl_to_tgsi: implement ir_unop_logic_not using 1-x

Since our logic values are 0.0 (false) and 1.0 (true), 1.0 - x accurately
implements logical not.

This is a port of commit 6ad08989d7c1 to glsl_to_tgsi.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 886a1776210..73a647efe34 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1336,7 +1336,17 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
 
    switch (ir->operation) {
    case ir_unop_logic_not:
-      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+      if (result_dst.type != GLSL_TYPE_FLOAT)
+         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+      else {
+         /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
+          * older GPUs implement SEQ using multiple instructions (i915 uses two
+          * SGE instructions and a MUL instruction).  Since our logic values are
+          * 0.0 and 1.0, 1-x also implements !x.
+          */
+         op[0].negate = ~op[0].negate;
+         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
+      }
       break;
    case ir_unop_neg:
       assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
-- 
cgit v1.2.3


From 691cc0e3a8716a2cdb7271765cd7d4c7465066eb Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 17 Aug 2011 14:35:35 -0500
Subject: glsl_to_tgsi: implement ir_binop_logic_or using an add w/saturate or
 add w/SLT

Logical-or is implemented using addition (followed by clamping to [0,1]) on
values of 0.0 and 1.0. Replacing the logical-or operators with addition gives
a + b which has a result on the range [0, 2].

Previously a SNE instruction was used to clamp the resulting logic value to
[0,1]. In a fragment shader, using a saturate on the add has the same effect.
Adding the saturate to the add is free, so (at least) one instruction is
saved. In a vertex shader, using an SLT on the negation of the add result has
the same effect. Many older shader architectures do not support the SNE
instruction. It must be emulated using two SLT instructions and an ADD. On
these architectures, the single SLT saves two instructions.

Note that SNE is still used when integers are used for boolean values, since
there is no such thing as an integer saturate, and older shader architectures
without SNE don't support integers.

This is a port of commit 41f8ffe5e07c to glsl_to_tgsi with integer support
added.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 31 ++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 73a647efe34..5f4aef16b66 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1493,11 +1493,34 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       break;
 
-   case ir_binop_logic_or:
-      /* This could be a saturated add and skip the SNE. */
-      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
-      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+   case ir_binop_logic_or: {
+      /* After the addition, the value will be an integer on the
+       * range [0,2].  Zero stays zero, and positive values become 1.0.
+       */
+      glsl_to_tgsi_instruction *add =
+         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+          result_dst.type == GLSL_TYPE_FLOAT) {
+         /* The clamping to [0,1] can be done for free in the fragment
+          * shader with a saturate if floats are being used as boolean values.
+          */
+         add->saturate = true;
+      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+         /* Negating the result of the addition gives values on the range
+          * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
+          * is achieved using SLT.
+          */
+         st_src_reg slt_src = result_src;
+         slt_src.negate = ~slt_src.negate;
+         emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+      } else {
+         /* Use an SNE on the result of the addition.  Zero stays zero,
+          * 1 stays 1, and 2 becomes 1.
+          */
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+      }
       break;
+   }
 
    case ir_binop_logic_and:
       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
-- 
cgit v1.2.3


From c15eb5569bf76c5dc41327017b92a5d960207b97 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 17 Aug 2011 20:34:19 -0500
Subject: glsl_to_tgsi: make glsl_to_tgsi_visitor::emit_dp return the
 instruction

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5f4aef16b66..62127afadde 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -373,11 +373,11 @@ public:
    /**
     * Emit the correct dot-product instruction for the type of arguments
     */
-   void emit_dp(ir_instruction *ir,
-                st_dst_reg dst,
-                st_src_reg src0,
-                st_src_reg src1,
-                unsigned elements);
+   glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
+                                     st_dst_reg dst,
+                                     st_src_reg src0,
+                                     st_src_reg src1,
+                                     unsigned elements);
 
    void emit_scalar(ir_instruction *ir, unsigned op,
         	    st_dst_reg dst, st_src_reg src0);
@@ -642,7 +642,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
    return op;
 }
 
-void
+glsl_to_tgsi_instruction *
 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
         		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
         		    unsigned elements)
@@ -651,7 +651,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
       TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
    };
 
-   emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 }
 
 /**
-- 
cgit v1.2.3


From a43f68810a347f3e952a0bc401be6edb91e1baea Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 20 Aug 2011 13:26:12 -0500
Subject: glsl_to_tgsi: implement ir_unop_any using DP4 w/saturate or DP4 w/SLT

This is a port of commit 92ca560d68e8 to glsl_to_tgsi, with integer support
added.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 32 ++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 62127afadde..f7d79e9f50c 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1482,12 +1482,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       }
       break;
 
-   case ir_unop_any:
+   case ir_unop_any: {
       assert(ir->operands[0]->type->is_vector());
-      emit_dp(ir, result_dst, op[0], op[0],
-              ir->operands[0]->type->vector_elements);
-      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+
+      /* After the dot-product, the value will be an integer on the
+       * range [0,4].  Zero stays zero, and positive values become 1.0.
+       */
+      glsl_to_tgsi_instruction *const dp =
+         emit_dp(ir, result_dst, op[0], op[0],
+                 ir->operands[0]->type->vector_elements);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+          result_dst.type == GLSL_TYPE_FLOAT) {
+	      /* The clamping to [0,1] can be done for free in the fragment
+	       * shader with a saturate.
+	       */
+	      dp->saturate = true;
+      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+	      /* Negating the result of the dot-product gives values on the range
+	       * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+	       * is achieved using SLT.
+	       */
+	      st_src_reg slt_src = result_src;
+	      slt_src.negate = ~slt_src.negate;
+	      emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+      }
+      else {
+         /* Use SNE 0 if integers are being used as boolean values. */
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+      }
       break;
+   }
 
    case ir_binop_logic_xor:
       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
-- 
cgit v1.2.3


From f3dce133f0422c42ca61f07f488237107efc30e6 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 20 Aug 2011 13:56:06 -0500
Subject: glsl_to_tgsi: implement ir_binop_any_nequal using DP4 w/saturate or
 DP4 w/SLT

Implement the any() part of the operation the same way regular ir_unop_any
is implemented.

This is a port of commit e7bf096e8b04 to glsl_to_tgsi, with added integer
support.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f7d79e9f50c..b238c267c81 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1475,8 +1475,29 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
-         emit_dp(ir, result_dst, temp, temp, vector_elements);
-         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+
+         /* After the dot-product, the value will be an integer on the
+          * range [0,4].  Zero stays zero, and positive values become 1.0.
+          */
+         glsl_to_tgsi_instruction *const dp =
+               emit_dp(ir, result_dst, temp, temp, vector_elements);
+         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+             result_dst.type == GLSL_TYPE_FLOAT) {
+            /* The clamping to [0,1] can be done for free in the fragment
+             * shader with a saturate.
+             */
+            dp->saturate = true;
+         } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+            /* Negating the result of the dot-product gives values on the range
+             * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+             * achieved using SLT.
+             */
+            st_src_reg slt_src = result_src;
+            slt_src.negate = ~slt_src.negate;
+            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+         } else {
+            emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+         }
       } else {
          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       }
-- 
cgit v1.2.3


From 9098953ee6e0699e13e35183c817ecf40363d538 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 20 Aug 2011 14:15:03 -0500
Subject: glsl_to_tgsi: implement ir_binop_all_equal using DP4 w/SGE

This is a port of commit ba01df11c4d0 to glsl_to_tgsi with integer support
added.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index b238c267c81..b211fc680a3 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1460,8 +1460,26 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         
+         /* After the dot-product, the value will be an integer on the
+          * range [0,4].  Zero becomes 1.0, and positive values become zero.
+          */
          emit_dp(ir, result_dst, temp, temp, vector_elements);
-         emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
+         
+         if (result_dst.type == GLSL_TYPE_FLOAT) {
+            /* Negating the result of the dot-product gives values on the range
+             * [-4, 0].  Zero becomes 1.0, and negative values become zero.
+             * This is achieved using SGE.
+             */
+            st_src_reg sge_src = result_src;
+            sge_src.negate = ~sge_src.negate;
+            emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
+         } else {
+            /* The TGSI negate flag doesn't work for integers, so use SEQ 0
+             * instead.
+             */
+            emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_int(0));
+         }
       } else {
          emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
       }
-- 
cgit v1.2.3


From c721d7b7bc70503d2ebb6c742be96371b68bd152 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 20 Aug 2011 14:17:52 -0500
Subject: glsl_to_tgsi: fix typo

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index b211fc680a3..2885630c7db 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1514,7 +1514,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
             slt_src.negate = ~slt_src.negate;
             emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
          } else {
-            emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+            emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
          }
       } else {
          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
-- 
cgit v1.2.3


From 5379a70d3fabd9cf92a615647f81289d33ae9468 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 20 Aug 2011 14:43:25 -0500
Subject: glsl_to_tgsi: emit a MAD(b, -a, b) for !a && b

This is a port of commit ff2cfb8989cd to glsl_to_tgsi.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 52 ++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 2885630c7db..a1f56d3d78a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -392,6 +392,8 @@ public:
 
    GLboolean try_emit_mad(ir_expression *ir,
         		  int mul_operand);
+   bool try_emit_mad_for_and_not(ir_expression *ir,
+              int mul_operand);
    GLboolean try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
@@ -1210,6 +1212,46 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    return true;
 }
 
+/**
+ * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ *     - a * !b
+ *     - a * (1 - b)
+ *     - (a * 1) - (a * b)
+ *     - a + -(a * b)
+ *     - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+   const int other_operand = 1 - try_operand;
+   st_src_reg a, b;
+
+   ir_expression *expr = ir->operands[try_operand]->as_expression();
+   if (!expr || expr->operation != ir_unop_logic_not)
+      return false;
+
+   ir->operands[other_operand]->accept(this);
+   a = this->result;
+   expr->operands[0]->accept(this);
+   b = this->result;
+
+   b.negate = ~b.negate;
+
+   this->result = get_temp(ir->type);
+   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
+
+   return true;
+}
+
 GLboolean
 glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
 {
@@ -1291,6 +1333,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       if (try_emit_mad(ir, 0))
          return;
    }
+
+   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+    */
+   if (ir->operation == ir_binop_logic_and) {
+      if (try_emit_mad_for_and_not(ir, 1))
+	 return;
+      if (try_emit_mad_for_and_not(ir, 0))
+	 return;
+   }
+
    if (try_emit_sat(ir))
       return;
 
-- 
cgit v1.2.3


From 7a5d28908c03c5ce38da3f041d23bfd103a5becd Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sun, 21 Aug 2011 23:06:39 -0700
Subject: glsl_to_tgsi: Fix a few more struct vs. class warnings.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index a1f56d3d78a..4b3e00c4242 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -866,7 +866,7 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file,
    }
 }
 
-struct st_src_reg
+st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 {
    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
@@ -878,7 +878,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
    return src;
 }
 
-struct st_src_reg
+st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
 {
    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
@@ -892,7 +892,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
    return src;
 }
 
-struct st_src_reg
+st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
 {
    if (native_integers)
@@ -1035,7 +1035,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
          }
       }
 
-      struct variable_storage *storage;
+      variable_storage *storage;
       st_dst_reg dst;
       if (i == ir->num_state_slots) {
          /* We'll set the index later. */
@@ -4236,7 +4236,7 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg)
 
 static void
 compile_tgsi_instruction(struct st_translate *t,
-                         const struct glsl_to_tgsi_instruction *inst)
+                         const glsl_to_tgsi_instruction *inst)
 {
    struct ureg_program *ureg = t->ureg;
    GLuint i;
-- 
cgit v1.2.3


From 69595283b64d1f01b33022c38468376ad8596ea7 Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Wed, 17 Aug 2011 17:35:07 -0700
Subject: intel: Abort when DRI2 separate stencil handshake fails

When intel_context requires separate stencil but the DRI2 separate stencil
handshake fails, then abort and emit an error instructing the user to
upgrade the DDX to 2.16.0.

CC: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/drivers/dri/intel/intel_context.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index fe8be082dfc..14342ef6246 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -1454,6 +1454,13 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
 	  * a combined depth/stencil buffer. Discard the hiz buffer too.
 	  */
 	 intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_FALSE;
+	 if (intel->must_use_separate_stencil) {
+	    _mesa_problem(&intel->ctx,
+			  "intel_context requires separate stencil, but the "
+			  "DRIscreen does not support it. You may need to "
+			  "upgrade the Intel X driver to 2.16.0");
+	    abort();
+	 }
 
 	 /* 1. Discard depth and stencil renderbuffers. */
 	 _mesa_remove_renderbuffer(fb, BUFFER_DEPTH);
-- 
cgit v1.2.3


From 29d7a0bb16dba1416e3a63f7f44cf82e307ac46a Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 25 Mar 2011 03:10:51 +0800
Subject: mesa: call _mesa_set_vp_override in glDrawTex*

The driver may install its own vertex shader.  _mesa_set_vp_override
must be called so that core mesa can generate correct fragment program..

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/drawtex.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/main/drawtex.c b/src/mesa/main/drawtex.c
index 2089cdfcef9..83485a928d8 100644
--- a/src/mesa/main/drawtex.c
+++ b/src/mesa/main/drawtex.c
@@ -45,11 +45,15 @@ draw_texture(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
       return;
    }
 
+   _mesa_set_vp_override(ctx, GL_TRUE);
+
    if (ctx->NewState)
       _mesa_update_state(ctx);
 
    ASSERT(ctx->Driver.DrawTex);
    ctx->Driver.DrawTex(ctx, x, y, z, width, height);
+
+   _mesa_set_vp_override(ctx, GL_FALSE);
 }
 
 
-- 
cgit v1.2.3


From 1e3bcbdf31f09666ba358f35ff9486faee3642ca Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 25 Feb 2011 14:45:33 -0800
Subject: glsl: Add a new ir_txs (textureSize) opcode to ir_texture.

One unique aspect of TXS is that it doesn't have a coordinate.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
---
 src/glsl/ir.cpp                              | 16 +++++++-----
 src/glsl/ir.h                                |  4 ++-
 src/glsl/ir_clone.cpp                        |  4 ++-
 src/glsl/ir_hv_accept.cpp                    |  9 ++++---
 src/glsl/ir_print_visitor.cpp                | 21 +++++++++-------
 src/glsl/ir_reader.cpp                       | 37 +++++++++++++++++-----------
 src/glsl/ir_rvalue_visitor.cpp               |  1 +
 src/glsl/opt_tree_grafting.cpp               |  1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  6 ++++-
 src/mesa/program/ir_to_mesa.cpp              |  1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   |  1 +
 11 files changed, 65 insertions(+), 36 deletions(-)

(limited to 'src/mesa')

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 6f8676ecceb..41ed4f114ca 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1121,7 +1121,7 @@ ir_dereference::is_lvalue() const
 }
 
 
-const char *tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf" };
+const char *tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txs" };
 
 const char *ir_texture::opcode_string()
 {
@@ -1150,11 +1150,15 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
    this->sampler = sampler;
    this->type = type;
 
-   assert(sampler->type->sampler_type == (int) type->base_type);
-   if (sampler->type->sampler_shadow)
-      assert(type->vector_elements == 4 || type->vector_elements == 1);
-   else
-      assert(type->vector_elements == 4);
+   if (this->op == ir_txs) {
+      assert(type->base_type == GLSL_TYPE_INT);
+   } else {
+      assert(sampler->type->sampler_type == (int) type->base_type);
+      if (sampler->type->sampler_shadow)
+	 assert(type->vector_elements == 4 || type->vector_elements == 1);
+      else
+	 assert(type->vector_elements == 4);
+   }
 }
 
 
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 04fa97bf56f..990aaa16af3 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1212,7 +1212,8 @@ enum ir_texture_opcode {
    ir_txb,		/**< Texture look-up with LOD bias */
    ir_txl,		/**< Texture look-up with explicit LOD */
    ir_txd,		/**< Texture look-up with partial derivatvies */
-   ir_txf		/**< Texel fetch with explicit LOD */
+   ir_txf,		/**< Texel fetch with explicit LOD */
+   ir_txs		/**< Texture size */
 };
 
 
@@ -1233,6 +1234,7 @@ enum ir_texture_opcode {
  * (txl <type> <sampler> <coordinate> 0 1 ( ) <lod>)
  * (txd <type> <sampler> <coordinate> 0 1 ( ) (dPdx dPdy))
  * (txf <type> <sampler> <coordinate> 0       <lod>)
+ * (txs <type> <sampler> <lod>)
  */
 class ir_texture : public ir_rvalue {
 public:
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index 069bb85e8de..f0757365dd2 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -222,7 +222,8 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
    new_tex->type = this->type;
 
    new_tex->sampler = this->sampler->clone(mem_ctx, ht);
-   new_tex->coordinate = this->coordinate->clone(mem_ctx, ht);
+   if (this->coordinate)
+      new_tex->coordinate = this->coordinate->clone(mem_ctx, ht);
    if (this->projector)
       new_tex->projector = this->projector->clone(mem_ctx, ht);
    if (this->shadow_comparitor) {
@@ -240,6 +241,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       new_tex->lod_info.lod = this->lod_info.lod->clone(mem_ctx, ht);
       break;
    case ir_txd:
diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp
index 4a607dc8749..d33fc85bf0a 100644
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -171,9 +171,11 @@ ir_texture::accept(ir_hierarchical_visitor *v)
    if (s != visit_continue)
       return (s == visit_continue_with_parent) ? visit_continue : s;
 
-   s = this->coordinate->accept(v);
-   if (s != visit_continue)
-      return (s == visit_continue_with_parent) ? visit_continue : s;
+   if (this->coordinate) {
+      s = this->coordinate->accept(v);
+      if (s != visit_continue)
+	 return (s == visit_continue_with_parent) ? visit_continue : s;
+   }
 
    if (this->projector) {
       s = this->projector->accept(v);
@@ -203,6 +205,7 @@ ir_texture::accept(ir_hierarchical_visitor *v)
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       s = this->lod_info.lod->accept(v);
       if (s != visit_continue)
 	 return (s == visit_continue_with_parent) ? visit_continue : s;
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 518910bd129..ea7858224bb 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -244,19 +244,21 @@ void ir_print_visitor::visit(ir_texture *ir)
    ir->sampler->accept(this);
    printf(" ");
 
-   ir->coordinate->accept(this);
+   if (ir->op != ir_txs) {
+      ir->coordinate->accept(this);
 
-   printf(" ");
+      printf(" ");
 
-   if (ir->offset != NULL) {
-      ir->offset->accept(this);
-   } else {
-      printf("0");
-   }
+      if (ir->offset != NULL) {
+	 ir->offset->accept(this);
+      } else {
+	 printf("0");
+      }
 
-   printf(" ");
+      printf(" ");
+   }
 
-   if (ir->op != ir_txf) {
+   if (ir->op != ir_txf && ir->op != ir_txs) {
       if (ir->projector)
 	 ir->projector->accept(this);
       else
@@ -280,6 +282,7 @@ void ir_print_visitor::visit(ir_texture *ir)
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       ir->lod_info.lod->accept(this);
       break;
    case ir_txd:
diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp
index f3a621734ba..22009eebcb9 100644
--- a/src/glsl/ir_reader.cpp
+++ b/src/glsl/ir_reader.cpp
@@ -885,6 +885,8 @@ ir_reader::read_texture(s_expression *expr)
       { "tex", s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow };
    s_pattern txf_pattern[] =
       { "txf", s_type, s_sampler, s_coord, s_offset, s_lod };
+   s_pattern txs_pattern[] =
+      { "txs", s_type, s_sampler, s_lod };
    s_pattern other_pattern[] =
       { tag, s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow, s_lod };
 
@@ -892,6 +894,8 @@ ir_reader::read_texture(s_expression *expr)
       op = ir_tex;
    } else if (MATCH(expr, txf_pattern)) {
       op = ir_txf;
+   } else if (MATCH(expr, txs_pattern)) {
+      op = ir_txs;
    } else if (MATCH(expr, other_pattern)) {
       op = ir_texture::get_opcode(tag->value());
       if (op == -1)
@@ -920,25 +924,27 @@ ir_reader::read_texture(s_expression *expr)
    }
    tex->set_sampler(sampler, type);
 
-   // Read coordinate (any rvalue)
-   tex->coordinate = read_rvalue(s_coord);
-   if (tex->coordinate == NULL) {
-      ir_read_error(NULL, "when reading coordinate in (%s ...)",
-		    tex->opcode_string());
-      return NULL;
-   }
-
-   // Read texel offset - either 0 or an rvalue.
-   s_int *si_offset = SX_AS_INT(s_offset);
-   if (si_offset == NULL || si_offset->value() != 0) {
-      tex->offset = read_rvalue(s_offset);
-      if (tex->offset == NULL) {
-	 ir_read_error(s_offset, "expected 0 or an expression");
+   if (op != ir_txs) {
+      // Read coordinate (any rvalue)
+      tex->coordinate = read_rvalue(s_coord);
+      if (tex->coordinate == NULL) {
+	 ir_read_error(NULL, "when reading coordinate in (%s ...)",
+		       tex->opcode_string());
 	 return NULL;
       }
+
+      // Read texel offset - either 0 or an rvalue.
+      s_int *si_offset = SX_AS_INT(s_offset);
+      if (si_offset == NULL || si_offset->value() != 0) {
+	 tex->offset = read_rvalue(s_offset);
+	 if (tex->offset == NULL) {
+	    ir_read_error(s_offset, "expected 0 or an expression");
+	    return NULL;
+	 }
+      }
    }
 
-   if (op != ir_txf) {
+   if (op != ir_txf && op != ir_txs) {
       s_int *proj_as_int = SX_AS_INT(s_proj);
       if (proj_as_int && proj_as_int->value() == 1) {
 	 tex->projector = NULL;
@@ -973,6 +979,7 @@ ir_reader::read_texture(s_expression *expr)
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       tex->lod_info.lod = read_rvalue(s_lod);
       if (tex->lod_info.lod == NULL) {
 	 ir_read_error(NULL, "when reading LOD in (%s ...)",
diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp
index ed6c7cb6a1a..193bcd2d7bd 100644
--- a/src/glsl/ir_rvalue_visitor.cpp
+++ b/src/glsl/ir_rvalue_visitor.cpp
@@ -63,6 +63,7 @@ ir_rvalue_visitor::visit_leave(ir_texture *ir)
       break;
    case ir_txf:
    case ir_txl:
+   case ir_txs:
       handle_rvalue(&ir->lod_info.lod);
       break;
    case ir_txd:
diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp
index 1ef940f9c72..22a1749b9dd 100644
--- a/src/glsl/opt_tree_grafting.cpp
+++ b/src/glsl/opt_tree_grafting.cpp
@@ -258,6 +258,7 @@ ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
       break;
    case ir_txf:
    case ir_txl:
+   case ir_txs:
       if (do_graft(&ir->lod_info.lod))
 	 return visit_stop;
       break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 33ad12763f7..764351a34f9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -716,6 +716,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       inst = emit(FS_OPCODE_TXD, dst);
       break;
    case ir_txf:
+   case ir_txs:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
@@ -837,6 +838,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       break;
    }
    case ir_txf:
+   case ir_txs:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
@@ -926,6 +928,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       break;
    }
    case ir_txf:
+   case ir_txs:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
@@ -949,7 +952,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
    case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
    case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
-   case ir_txf: assert(!"TXF unsupported.");
+   case ir_txf: assert(!"TXF unsupported."); break;
+   case ir_txs: assert(!"TXS unsupported."); break;
    }
    inst->base_mrf = base_mrf;
    inst->mlen = mlen;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index ec3fba182d8..b222005d1a9 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2148,6 +2148,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
       dy = this->result;
       break;
    case ir_txf:
+   case ir_txs:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 4b3e00c4242..6f0d9fa3f8f 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2469,6 +2469,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       ir->lod_info.grad.dPdy->accept(this);
       dy = this->result;
       break;
+   case ir_txs:
    case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
       assert(!"GLSL 1.30 features unsupported");
       break;
-- 
cgit v1.2.3


From b6bdcf2a908889532ef6d5eb643791176dffcb9d Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 18 Aug 2011 00:18:15 -0700
Subject: i965/fs: Rudimentary support for non-floating point texture results.

Not all texturing operations return floating point data.  For example,
the resinfo message (textureSize or TXS) returns integer data.  In the
future, we'll also add integer texture support.

ir_texture's type field contains this information; use its base type to
appropriately type the destination register.  We want to keep it as a
four component vector, however, since SIMD8 samplers always have a
response length of 4.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 764351a34f9..792799d9063 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1075,7 +1075,7 @@ fs_visitor::visit(ir_texture *ir)
    /* Writemasking doesn't eliminate channels on SIMD8 texture
     * samples, so don't worry about them.
     */
-   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
+   fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
 
    if (intel->gen >= 7) {
       inst = emit_texture_gen7(ir, dst, coordinate, sampler);
-- 
cgit v1.2.3


From ecf8963754489abfb5097c130a9bcd4cdb76b6bd Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sun, 19 Jun 2011 01:47:50 -0700
Subject: i965/fs: Implement textureSize (TXS) on Gen5+.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h      |  2 ++
 src/mesa/drivers/dri/i965/brw_fs.cpp         |  1 +
 src/mesa/drivers/dri/i965/brw_fs.h           |  3 ++-
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp    |  4 ++++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 28 +++++++++++++++++++++-------
 src/mesa/program/ir_to_mesa.cpp              |  7 +++++--
 6 files changed, 35 insertions(+), 10 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index b740d87c933..69e0026ee6b 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -634,6 +634,7 @@ enum opcode {
    FS_OPCODE_TXB,
    FS_OPCODE_TXD,
    FS_OPCODE_TXL,
+   FS_OPCODE_TXS,
    FS_OPCODE_DISCARD,
    FS_OPCODE_SPILL,
    FS_OPCODE_UNSPILL,
@@ -781,6 +782,7 @@ enum opcode {
 #define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS       4
 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE  6
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
 
 /* for GEN5 only */
 #define BRW_SAMPLER_SIMD_MODE_SIMD4X2                   0
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c8f74252654..0b0445ea142 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -157,6 +157,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
    case FS_OPCODE_TXB:
    case FS_OPCODE_TXD:
    case FS_OPCODE_TXL:
+   case FS_OPCODE_TXS:
       return 1;
    case FS_OPCODE_FB_WRITE:
       return 2;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 94af0e1af16..10f45f30fe9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -291,7 +291,8 @@ public:
       return (opcode == FS_OPCODE_TEX ||
 	      opcode == FS_OPCODE_TXB ||
 	      opcode == FS_OPCODE_TXD ||
-	      opcode == FS_OPCODE_TXL);
+	      opcode == FS_OPCODE_TXL ||
+	      opcode == FS_OPCODE_TXS);
    }
 
    bool is_math()
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 482d250c333..5c057e9a00b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -242,6 +242,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
 	 }
 	 break;
+      case FS_OPCODE_TXS:
+	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
+	 break;
       case FS_OPCODE_TXD:
 	 /* There is no sample_d_c message; comparisons are done manually */
 	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
@@ -775,6 +778,7 @@ fs_visitor::generate_code()
       case FS_OPCODE_TXB:
       case FS_OPCODE_TXD:
       case FS_OPCODE_TXL:
+      case FS_OPCODE_TXS:
 	 generate_tex(inst, dst, src[0]);
 	 break;
       case FS_OPCODE_DISCARD:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 792799d9063..3551e3dfe81 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -751,6 +751,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    int base_mrf = 2;
    int reg_width = c->dispatch_width / 8;
    bool header_present = false;
+   const int vector_elements =
+      ir->coordinate ? ir->coordinate->type->vector_elements : 0;
 
    if (ir->offset) {
       /* The offsets set up by the ir_texture visitor are in the
@@ -761,7 +763,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       base_mrf--;
    }
 
-   for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+   for (int i = 0; i < vector_elements; i++) {
       fs_inst *inst = emit(BRW_OPCODE_MOV,
 			   fs_reg(MRF, base_mrf + mlen + i * reg_width),
 			   coordinate);
@@ -769,7 +771,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 	 inst->saturate = true;
       coordinate.reg_offset++;
    }
-   mlen += ir->coordinate->type->vector_elements * reg_width;
+   mlen += vector_elements * reg_width;
 
    if (ir->shadow_comparitor && ir->op != ir_txd) {
       mlen = MAX2(mlen, header_present + 4 * reg_width);
@@ -837,8 +839,14 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       inst = emit(FS_OPCODE_TXD, dst);
       break;
    }
-   case ir_txf:
    case ir_txs:
+      this->result = reg_undef;
+      ir->lod_info.lod->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+      mlen += reg_width;
+      inst = emit(FS_OPCODE_TXS, dst);
+      break;
+   case ir_txf:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
@@ -927,14 +935,19 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       }
       break;
    }
-   case ir_txf:
    case ir_txs:
+      this->result = reg_undef;
+      ir->lod_info.lod->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+      mlen += reg_width;
+      break;
+   case ir_txf:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
 
    /* Set up the coordinate (except for TXD where it was done earlier) */
-   if (ir->op != ir_txd) {
+   if (ir->op != ir_txd && ir->op != ir_txs) {
       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
 	 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
 			      coordinate);
@@ -953,7 +966,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
    case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
    case ir_txf: assert(!"TXF unsupported."); break;
-   case ir_txs: assert(!"TXS unsupported."); break;
+   case ir_txs: inst = emit(FS_OPCODE_TXS, dst); break;
    }
    inst->base_mrf = base_mrf;
    inst->mlen = mlen;
@@ -988,7 +1001,8 @@ fs_visitor::visit(ir_texture *ir)
    }
 
    this->result = reg_undef;
-   ir->coordinate->accept(this);
+   if (ir->coordinate)
+      ir->coordinate->accept(this);
    fs_reg coordinate = this->result;
 
    if (ir->offset != NULL) {
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index b222005d1a9..e7609df19ee 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2104,7 +2104,10 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
    ir_to_mesa_instruction *inst = NULL;
    prog_opcode opcode = OPCODE_NOP;
 
-   ir->coordinate->accept(this);
+   if (ir->op == ir_txs)
+      this->result = src_reg_for_float(0.0);
+   else
+      ir->coordinate->accept(this);
 
    /* Put our coords in a temp.  We'll need to modify them for shadow,
     * projection, or LOD, so the only case we'd use it as is is if
@@ -2128,6 +2131,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
 
    switch (ir->op) {
    case ir_tex:
+   case ir_txs:
       opcode = OPCODE_TEX;
       break;
    case ir_txb:
@@ -2148,7 +2152,6 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
       dy = this->result;
       break;
    case ir_txf:
-   case ir_txs:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
-- 
cgit v1.2.3


From 4eeb4c150598605d1be3ce6674fa63076a720ae9 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 17 Aug 2011 10:45:47 -0700
Subject: i965: Implement textureSize (TXS) on Gen4.

Also, remove the BRW_SAMPLER_MESSAGE_SIMD8_RESINFO #define because
there totally isn't a SIMD8 variant.

Unfortunately, resinfo returns FLOAT32 on Broadwater/Crestline, unlike
G45 which returns a proper UINT32.  This turns out to be simple,
however: when we emit MOVs to select the desired half of the SIMD16
result, we can simply override the register type to be float so it's
converted to an integer.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h      |  1 -
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp    |  5 +++++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 23 ++++++++++++++++++-----
 3 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 69e0026ee6b..d1799c0ab4f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -769,7 +769,6 @@ enum opcode {
 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
-#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO             2
 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
 #define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
 #define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 5c057e9a00b..28efbd3605f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -292,6 +292,11 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	 assert(inst->mlen == 7 || inst->mlen == 10);
 	 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
 	 break;
+      case FS_OPCODE_TXS:
+	 assert(inst->mlen == 3);
+	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
+	 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+	 break;
       default:
 	 assert(!"not reached");
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 3551e3dfe81..cbc0af02407 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -657,10 +657,18 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 	 dPdy.reg_offset++;
       }
       mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
+   } else if (ir->op == ir_txs) {
+      /* There's no SIMD8 resinfo message on Gen4.  Use SIMD16 instead. */
+      simd16 = true;
+      this->result = reg_undef;
+      ir->lod_info.lod->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+      mlen += 2;
    } else {
       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
        * instructions.  We'll need to do SIMD16 here.
        */
+      simd16 = true;
       assert(ir->op == ir_txb || ir->op == ir_txl);
 
       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
@@ -689,16 +697,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 
       /* The unused upper half. */
       mlen++;
+   }
 
+   if (simd16) {
       /* Now, since we're doing simd16, the return is 2 interleaved
        * vec4s where the odd-indexed ones are junk. We'll need to move
        * this weirdness around to the expected layout.
        */
-      simd16 = true;
       orig_dst = dst;
-      dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
-						       2));
-      dst.type = BRW_REGISTER_TYPE_F;
+      const glsl_type *vec_type =
+	 glsl_type::get_instance(ir->type->base_type, 4, 1);
+      dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2));
+      dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type)
+			       : BRW_REGISTER_TYPE_F;
    }
 
    fs_inst *inst = NULL;
@@ -715,8 +726,10 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    case ir_txd:
       inst = emit(FS_OPCODE_TXD, dst);
       break;
-   case ir_txf:
    case ir_txs:
+      inst = emit(FS_OPCODE_TXS, dst);
+      break;
+   case ir_txf:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
-- 
cgit v1.2.3


From 9d4b98eb9eadecc17cd1cda0074b420a39e74647 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 17 Aug 2011 13:41:43 -0700
Subject: i965/gen6+: Use non-normalized coordinates for GL_TEXTURE_RECTANGLE.

Improves performance of a GL_TEXTURE_RECTANGLE microbenchmark by 1.84%
+/- .15% (n=3)
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp     | 3 ++-
 src/mesa/drivers/dri/i965/brw_wm_fp.c            | 4 +++-
 src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 7 +++++++
 src/mesa/drivers/dri/i965/gen7_sampler_state.c   | 7 +++++++
 4 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index cbc0af02407..cdaf543c88b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1056,7 +1056,8 @@ fs_visitor::visit(ir_texture *ir)
     * texture coordinates.  We use the program parameter state
     * tracking to get the scaling factor.
     */
-   if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
+   if (intel->gen < 6 &&
+       ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
       struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
       int tokens[STATE_LENGTH] = {
 	 STATE_INTERNAL,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index d52a9581f5e..bd46bd8de43 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -664,6 +664,8 @@ static void precalc_lit( struct brw_wm_compile *c,
 static void precalc_tex( struct brw_wm_compile *c,
 			 const struct prog_instruction *inst )
 {
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    struct prog_src_register coord;
    struct prog_dst_register tmpcoord = { 0 };
    const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
@@ -727,7 +729,7 @@ static void precalc_tex( struct brw_wm_compile *c,
        release_temp(c, tmp0);
        release_temp(c, tmp1);
    }
-   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+   else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
       struct prog_src_register scale = 
 	 search_or_add_param5( c, 
 			       STATE_INTERNAL, 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 98146136703..6834ebad780 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -289,6 +289,13 @@ static void brw_update_sampler_state(struct brw_context *brw,
    sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6);
    sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6);
 
+   /* On Gen6+, the sampler can handle non-normalized texture
+    * rectangle coordinates natively
+    */
+   if (intel->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) {
+      sampler->ss3.non_normalized_coord = 1;
+   }
+
    upload_default_color(brw, gl_sampler, unit);
 
    if (intel->gen >= 6) {
diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
index e787c21f4d1..aee67c87472 100644
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@@ -157,6 +157,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
    sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 8);
    sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 8);
 
+   /* The sampler can handle non-normalized texture rectangle coordinates
+    * natively
+    */
+   if (texObj->Target == GL_TEXTURE_RECTANGLE) {
+      sampler->ss3.non_normalized_coord = 1;
+   }
+
    upload_default_color(brw, gl_sampler, unit);
 
    sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5;
-- 
cgit v1.2.3


From abbb8fc3a7d49066ecca10cb9db0b4756a1bbef0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 23 Aug 2011 10:51:16 -0700
Subject: i965: Fix typo in 2b224d66a01f3ce867fb05558b25749705bbfe7a

Unfortunately, since a previous efficiency improvement, we no longer
have any open-source testcases producing register spilling, so this
code was untested in the fragment shader path.  That should change
when we get proper temporary array support in the fragment shader.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=40194
---
 src/mesa/drivers/dri/i965/brw_wm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index a4524fc7889..e76832515fe 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -246,7 +246,7 @@ bool do_wm_prog(struct brw_context *brw,
    if (c->last_scratch) {
       c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch);
 
-      brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
+      brw_get_scratch_bo(intel, &brw->wm.scratch_bo,
 			 c->prog_data.total_scratch * brw->wm_max_threads);
    }
 
-- 
cgit v1.2.3


From 56f0c00f125ee75caeadc1c9e8cab8a488635e5e Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 16:59:30 -0700
Subject: mesa: Remove target parameter from dd_function_table::UnmapBuffer

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c         |  2 +-
 src/mesa/drivers/dri/intel/intel_buffer_objects.c   | 14 ++++++--------
 src/mesa/drivers/dri/intel/intel_pixel_bitmap.c     |  3 +--
 src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c    |  2 +-
 src/mesa/drivers/dri/r300/r300_draw.c               |  8 ++++----
 src/mesa/drivers/dri/r600/evergreen_render.c        |  8 ++++----
 src/mesa/drivers/dri/r600/r700_render.c             |  8 ++++----
 src/mesa/drivers/dri/radeon/radeon_buffer_objects.c |  1 -
 src/mesa/drivers/x11/xm_dd.c                        |  6 ++----
 src/mesa/main/api_arrayelt.c                        |  4 +---
 src/mesa/main/api_validate.c                        |  2 +-
 src/mesa/main/bufferobj.c                           | 14 ++++++--------
 src/mesa/main/dd.h                                  |  2 +-
 src/mesa/main/dlist.c                               |  3 +--
 src/mesa/main/pbo.c                                 |  8 +++-----
 src/mesa/main/shared.c                              |  2 +-
 src/mesa/main/texgetimage.c                         |  6 ++----
 src/mesa/state_tracker/st_cb_bufferobjects.c        |  2 +-
 src/mesa/tnl/t_draw.c                               |  4 +---
 src/mesa/vbo/vbo_exec_api.c                         |  2 +-
 src/mesa/vbo/vbo_exec_array.c                       | 12 +++++-------
 src/mesa/vbo/vbo_exec_draw.c                        |  2 +-
 src/mesa/vbo/vbo_rebase.c                           |  4 +---
 src/mesa/vbo/vbo_save_api.c                         |  2 +-
 src/mesa/vbo/vbo_save_draw.c                        |  3 +--
 src/mesa/vbo/vbo_split_copy.c                       |  4 ++--
 26 files changed, 53 insertions(+), 75 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 56a46ced6e3..2049850417b 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -699,7 +699,7 @@ static void brw_prepare_indices(struct brw_context *brw)
 			     &bo, &offset);
 	   brw->ib.start_vertex_offset = offset / ib_type_size;
 
-           ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
+           ctx->Driver.UnmapBuffer(ctx, bufferobj);
        } else {
 	  /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
 	   * the index buffer state when we're just moving the start index
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 703300b31af..57609fd5d32 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -41,8 +41,7 @@
 #include "intel_regions.h"
 
 static GLboolean
-intel_bufferobj_unmap(struct gl_context * ctx,
-                      GLenum target, struct gl_buffer_object *obj);
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj);
 
 /** Allocates a new drm_intel_bo to store the data for the buffer object. */
 static void
@@ -122,7 +121,7 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj)
     * (though it does if you call glDeleteBuffers)
     */
    if (obj->Pointer)
-      intel_bufferobj_unmap(ctx, 0, obj);
+      intel_bufferobj_unmap(ctx, obj);
 
    free(intel_obj->sys_buffer);
    if (intel_obj->region) {
@@ -507,8 +506,7 @@ intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
  * Called via glUnmapBuffer().
  */
 static GLboolean
-intel_bufferobj_unmap(struct gl_context * ctx,
-                      GLenum target, struct gl_buffer_object *obj)
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj)
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
@@ -766,7 +764,7 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
 	 char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
 					 GL_READ_WRITE, dst);
 	 memmove(ptr + write_offset, ptr + read_offset, size);
-	 intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+	 intel_bufferobj_unmap(ctx, dst);
       } else {
 	 const char *src_ptr;
 	 char *dst_ptr;
@@ -778,8 +776,8 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
 
 	 memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
 
-	 intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src);
-	 intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+	 intel_bufferobj_unmap(ctx, src);
+	 intel_bufferobj_unmap(ctx, dst);
       }
       return;
    }
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 86d0ef2d748..1727d4c1a91 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -292,8 +292,7 @@ out:
 
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
       /* done with PBO so unmap it now */
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
    }
 
    intel_check_front_buffer_rendering(intel);
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index e60b91f64be..c0ab31b0b11 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -169,7 +169,7 @@ nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offs
 }
 
 static GLboolean
-nouveau_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
+nouveau_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj)
 {
 	assert(obj->Pointer);
 
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index 0c4d8537c61..c47e15534ca 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -138,7 +138,7 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 	r300->ind_buf.count = mesa_ind_buf->count;
 
 	if (mapped_named_bo) {
-		ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+		ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
 	}
 }
 
@@ -184,7 +184,7 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 		r300->ind_buf.count = mesa_ind_buf->count;
 
 		if (mapped_named_bo) {
-			ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+			ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
 		}
 	} else {
 		r300FixupIndexBuffer(ctx, mesa_ind_buf);
@@ -286,7 +286,7 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl
 
 	radeon_bo_unmap(attr->bo);
 	if (mapped_named_bo) {
-		ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+		ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
 	}
 }
 
@@ -321,7 +321,7 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_
 	}
 
 	if (mapped_named_bo) {
-		ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+		ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
 	}
 
 	radeon_bo_unmap(attr->bo);
diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c
index 4507be29d86..9a310eec6bc 100644
--- a/src/mesa/drivers/dri/r600/evergreen_render.c
+++ b/src/mesa/drivers/dri/r600/evergreen_render.c
@@ -456,7 +456,7 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count,
 
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 }
 
@@ -531,7 +531,7 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
     if (mapped_named_bo)
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
     }
 }
 
@@ -629,7 +629,7 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
         if (mapped_named_bo)
         {
-	        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+	        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
         }
     }
     else
@@ -675,7 +675,7 @@ static void evergreenAlignDataToDword(struct gl_context *ctx,
     radeon_bo_unmap(attr->bo);
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 
     attr->stride = dst_stride;
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 0f7a7a46b71..2300fe6d33f 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -543,7 +543,7 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count,
 
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 }
 
@@ -584,7 +584,7 @@ static void r700AlignDataToDword(struct gl_context *ctx,
     radeon_bo_unmap(attr->bo);
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 
     attr->stride = dst_stride;
@@ -788,7 +788,7 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
     if (mapped_named_bo)
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
     }
 }
 
@@ -836,7 +836,7 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
         if (mapped_named_bo)
         {
-	        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+	        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
         }
     }
     else
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index 0d1af726c07..eac1277a719 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -205,7 +205,6 @@ radeonMapBuffer(struct gl_context * ctx,
  */
 static GLboolean
 radeonUnmapBuffer(struct gl_context * ctx,
-                  GLenum target,
                   struct gl_buffer_object *obj)
 {
     struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index 81f48f9d95a..3319d118142 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -508,8 +508,7 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx,
       }
 
       if (_mesa_is_bufferobj(unpack->BufferObj)) {
-         ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                 unpack->BufferObj);
+         ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
       }
    }
    else {
@@ -642,8 +641,7 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx,
       }
 
       if (unpack->BufferObj->Name) {
-         ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                 unpack->BufferObj);
+         ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
       }
    }
    else {
diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index f88da845853..385bef1c53e 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1622,9 +1622,7 @@ void _ae_unmap_vbos( struct gl_context *ctx )
    assert (!actx->NewState);
 
    for (i = 0; i < actx->nr_vbos; i++)
-      ctx->Driver.UnmapBuffer(ctx,
-			      GL_ARRAY_BUFFER_ARB,
-			      actx->vbo[i]);
+      ctx->Driver.UnmapBuffer(ctx, actx->vbo[i]);
 
    actx->mapped_vbos = GL_FALSE;
 }
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 2981d42297a..08faf9e08b4 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -89,7 +89,7 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type,
    }
 
    if (map) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuf);
+      ctx->Driver.UnmapBuffer(ctx, elementBuf);
    }
 
    return max;
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index c52358ecb04..e52e59eb5c2 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -512,11 +512,9 @@ _mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLenum target,
  * \sa glUnmapBufferARB, dd_function_table::UnmapBuffer
  */
 static GLboolean
-_mesa_buffer_unmap( struct gl_context *ctx, GLenum target,
-                    struct gl_buffer_object *bufObj )
+_mesa_buffer_unmap( struct gl_context *ctx, struct gl_buffer_object *bufObj )
 {
    (void) ctx;
-   (void) target;
    /* XXX we might assert here that bufObj->Pointer is non-null */
    bufObj->Pointer = NULL;
    bufObj->Length = 0;
@@ -551,8 +549,8 @@ _mesa_copy_buffer_subdata(struct gl_context *ctx,
    if (srcPtr && dstPtr)
       memcpy(dstPtr + writeOffset, srcPtr + readOffset, size);
 
-   ctx->Driver.UnmapBuffer(ctx, GL_COPY_READ_BUFFER, src);
-   ctx->Driver.UnmapBuffer(ctx, GL_COPY_WRITE_BUFFER, dst);
+   ctx->Driver.UnmapBuffer(ctx, src);
+   ctx->Driver.UnmapBuffer(ctx, dst);
 }
 
 
@@ -774,7 +772,7 @@ _mesa_DeleteBuffersARB(GLsizei n, const GLuint *ids)
 
          if (_mesa_bufferobj_mapped(bufObj)) {
             /* if mapped, unmap it now */
-            ctx->Driver.UnmapBuffer(ctx, 0, bufObj);
+            ctx->Driver.UnmapBuffer(ctx, bufObj);
             bufObj->AccessFlags = DEFAULT_ACCESS;
             bufObj->Pointer = NULL;
          }
@@ -934,7 +932,7 @@ _mesa_BufferDataARB(GLenum target, GLsizeiptrARB size,
    
    if (_mesa_bufferobj_mapped(bufObj)) {
       /* Unmap the existing buffer.  We'll replace it now.  Not an error. */
-      ctx->Driver.UnmapBuffer(ctx, target, bufObj);
+      ctx->Driver.UnmapBuffer(ctx, bufObj);
       bufObj->AccessFlags = DEFAULT_ACCESS;
       ASSERT(bufObj->Pointer == NULL);
    }  
@@ -1147,7 +1145,7 @@ _mesa_UnmapBufferARB(GLenum target)
    }
 #endif
 
-   status = ctx->Driver.UnmapBuffer( ctx, target, bufObj );
+   status = ctx->Driver.UnmapBuffer( ctx, bufObj );
    bufObj->AccessFlags = DEFAULT_ACCESS;
    ASSERT(bufObj->Pointer == NULL);
    ASSERT(bufObj->Offset == 0);
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index b5ed9a40c70..318ea1f25aa 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -725,7 +725,7 @@ struct dd_function_table {
                                   GLintptr offset, GLsizeiptr length,
                                   struct gl_buffer_object *obj);
 
-   GLboolean (*UnmapBuffer)( struct gl_context *ctx, GLenum target,
+   GLboolean (*UnmapBuffer)( struct gl_context *ctx,
 			     struct gl_buffer_object *obj );
    /*@}*/
 
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index f9282398c21..3e54af25d00 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -906,8 +906,7 @@ unpack_image(struct gl_context *ctx, GLuint dimensions,
       image = _mesa_unpack_image(dimensions, width, height, depth,
                                  format, type, src, unpack);
 
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
 
       if (!image) {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "display list construction");
diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c
index 15e0480e9f1..f93cdf1e392 100644
--- a/src/mesa/main/pbo.c
+++ b/src/mesa/main/pbo.c
@@ -201,8 +201,7 @@ _mesa_unmap_pbo_source(struct gl_context *ctx,
 {
    ASSERT(unpack != &ctx->Pack); /* catch pack/unpack mismatch */
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
    }
 }
 
@@ -297,7 +296,7 @@ _mesa_unmap_pbo_dest(struct gl_context *ctx,
 {
    ASSERT(pack != &ctx->Unpack); /* catch pack/unpack mismatch */
    if (_mesa_is_bufferobj(pack->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, pack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, pack->BufferObj);
    }
 }
 
@@ -384,8 +383,7 @@ _mesa_unmap_teximage_pbo(struct gl_context *ctx,
                          const struct gl_pixelstore_attrib *unpack)
 {
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
    }
 }
 
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index d84f59690c5..8b7159db09c 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -200,7 +200,7 @@ delete_bufferobj_cb(GLuint id, void *data, void *userData)
    struct gl_buffer_object *bufObj = (struct gl_buffer_object *) data;
    struct gl_context *ctx = (struct gl_context *) userData;
    if (_mesa_bufferobj_mapped(bufObj)) {
-      ctx->Driver.UnmapBuffer(ctx, 0, bufObj);
+      ctx->Driver.UnmapBuffer(ctx, bufObj);
       bufObj->Pointer = NULL;
    }
    _mesa_reference_buffer_object(ctx, &bufObj, NULL);
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 26c2ff98ba1..20595ef3b56 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -474,8 +474,7 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level,
    }
 
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                              ctx->Pack.BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj);
    }
 }
 
@@ -531,8 +530,7 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level
    }
 
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                              ctx->Pack.BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj);
    }
 }
 
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 7374bb0acc5..732bbaabd4a 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -378,7 +378,7 @@ st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
  * Called via glUnmapBufferARB().
  */
 static GLboolean
-st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
+st_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj)
 {
    struct pipe_context *pipe = st_context(ctx)->pipe;
    struct st_buffer_object *st_obj = st_buffer_object(obj);
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index b1967e65417..19d4f203c35 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -402,9 +402,7 @@ static void unmap_vbos( struct gl_context *ctx,
 {
    GLuint i;
    for (i = 0; i < nr_bo; i++) { 
-      ctx->Driver.UnmapBuffer(ctx, 
-			      0, /* target -- I don't see why this would be needed */
-			      bo[i]);
+      ctx->Driver.UnmapBuffer(ctx, bo[i]);
    }
 }
 
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index af66dbd44d4..8474c787a46 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -947,7 +947,7 @@ void vbo_exec_vtx_destroy( struct vbo_exec_context *exec )
    /* Free the vertex buffer.  Unmap first if needed.
     */
    if (_mesa_bufferobj_mapped(exec->vtx.bufferobj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, exec->vtx.bufferobj);
+      ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj);
    }
    _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, NULL);
 }
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 32ce0e4a8ff..2692b7539a4 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -176,7 +176,7 @@ vbo_get_minmax_index(struct gl_context *ctx,
    }
 
    if (_mesa_is_bufferobj(ib->obj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj);
+      ctx->Driver.UnmapBuffer(ctx, ib->obj);
    }
 }
 
@@ -238,7 +238,7 @@ unmap_array_buffer(struct gl_context *ctx, struct gl_client_array *array)
    if (array->Enabled &&
        _mesa_is_bufferobj(array->BufferObj) &&
        _mesa_bufferobj_mapped(array->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, array->BufferObj);
    }
 }
 
@@ -296,8 +296,7 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
    }
 
    if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
-			      ctx->Array.ElementArrayBufferObj);
+      ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj);
    }
 
    unmap_array_buffer(ctx, &arrayObj->Vertex);
@@ -364,7 +363,7 @@ print_draw_arrays(struct gl_context *ctx,
          for (i = 0; i < n; i++) {
             printf("    float[%d] = 0x%08x %f\n", i, k[i], f[i]);
          }
-         ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, bufObj);
+         ctx->Driver.UnmapBuffer(ctx, bufObj);
       }
    }
 }
@@ -760,8 +759,7 @@ dump_element_buffer(struct gl_context *ctx, GLenum type)
       ;
    }
 
-   ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
-                           ctx->Array.ElementArrayBufferObj);
+   ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj);
 }
 
 
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 7e8d8602093..5366b989df2 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -281,7 +281,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
       assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE);
       assert(exec->vtx.buffer_ptr != NULL);
       
-      ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj);
+      ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj);
       exec->vtx.buffer_map = NULL;
       exec->vtx.buffer_ptr = NULL;
       exec->vtx.max_vert = 0;
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c
index 1de290ff602..09ba0a6e9ac 100644
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/vbo/vbo_rebase.c
@@ -183,9 +183,7 @@ void vbo_rebase_prims( struct gl_context *ctx,
       }      
 
       if (map_ib) 
-	 ctx->Driver.UnmapBuffer(ctx, 
-				 GL_ELEMENT_ARRAY_BUFFER,
-				 ib->obj);
+	 ctx->Driver.UnmapBuffer(ctx, ib->obj);
 
       tmp_ib.obj = ctx->Shared->NullBufferObj;
       tmp_ib.ptr = tmp_indices;
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index 9041f791edd..00f34ad1ec0 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -247,7 +247,7 @@ static void
 unmap_vertex_store(struct gl_context *ctx,
                    struct vbo_save_vertex_store *vertex_store)
 {
-   ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vertex_store->bufferobj);
+   ctx->Driver.UnmapBuffer(ctx, vertex_store->bufferobj);
    vertex_store->buffer = NULL;
 }
 
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index a37af73e0db..0d505555789 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -230,8 +230,7 @@ vbo_save_loopback_vertex_list(struct gl_context *ctx,
                             list->wrap_count,
                             list->vertex_size);
 
-   ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, 
-			   list->vertex_store->bufferobj);
+   ctx->Driver.UnmapBuffer(ctx, list->vertex_store->bufferobj);
 }
 
 
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index ecca1171673..49c7435ab5e 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -564,14 +564,14 @@ replay_finish( struct copy_context *copy )
    for (i = 0; i < copy->nr_varying; i++) {
       struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
       if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo)) 
-	 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, vbo);
+	 ctx->Driver.UnmapBuffer(ctx, vbo);
    }
 
    /* Unmap index buffer:
     */
    if (_mesa_is_bufferobj(copy->ib->obj) &&
        _mesa_bufferobj_mapped(copy->ib->obj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, copy->ib->obj);
+      ctx->Driver.UnmapBuffer(ctx, copy->ib->obj);
    }
 }
 
-- 
cgit v1.2.3


From 12d924c5ae14a1c6a05a3dcae29b77e7668e227d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:07:56 -0700
Subject: mesa: Remove target parameter from dd_function_table::MapBuffer

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c         |  1 -
 src/mesa/drivers/dri/intel/intel_buffer_objects.c   | 10 +++-------
 src/mesa/drivers/dri/intel/intel_pixel_bitmap.c     |  2 +-
 src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c    |  4 ++--
 src/mesa/drivers/dri/r300/r300_draw.c               |  8 ++++----
 src/mesa/drivers/dri/r600/evergreen_render.c        |  8 ++++----
 src/mesa/drivers/dri/r600/r700_render.c             |  8 ++++----
 src/mesa/drivers/dri/radeon/radeon_buffer_objects.c |  1 -
 src/mesa/drivers/x11/xm_dd.c                        |  1 -
 src/mesa/main/api_arrayelt.c                        |  1 -
 src/mesa/main/api_validate.c                        |  3 +--
 src/mesa/main/bufferobj.c                           | 11 ++++-------
 src/mesa/main/dd.h                                  |  2 +-
 src/mesa/main/dlist.c                               |  3 +--
 src/mesa/main/pbo.c                                 | 11 +++++------
 src/mesa/main/texgetimage.c                         |  6 ++----
 src/mesa/state_tracker/st_cb_bufferobjects.c        |  2 +-
 src/mesa/tnl/t_draw.c                               |  2 --
 src/mesa/vbo/vbo_exec_array.c                       | 11 +++--------
 src/mesa/vbo/vbo_exec_draw.c                        |  2 +-
 src/mesa/vbo/vbo_rebase.c                           |  5 +----
 src/mesa/vbo/vbo_save_api.c                         |  1 -
 src/mesa/vbo/vbo_save_draw.c                        |  1 -
 src/mesa/vbo/vbo_split_copy.c                       |  5 ++---
 24 files changed, 40 insertions(+), 69 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 2049850417b..66c42aa0779 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -690,7 +690,6 @@ static void brw_prepare_indices(struct brw_context *brw)
        */
        if ((get_size(index_buffer->type) - 1) & offset) {
            GLubyte *map = ctx->Driver.MapBuffer(ctx,
-                                                GL_ELEMENT_ARRAY_BUFFER_ARB,
                                                 GL_DYNAMIC_DRAW_ARB,
                                                 bufferobj);
            map += offset;
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 57609fd5d32..6f3a90942f6 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -301,7 +301,6 @@ intel_bufferobj_get_subdata(struct gl_context * ctx,
  */
 static void *
 intel_bufferobj_map(struct gl_context * ctx,
-                    GLenum target,
                     GLenum access, struct gl_buffer_object *obj)
 {
    struct intel_context *intel = intel_context(ctx);
@@ -761,18 +760,15 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
        * not overlap.
        */
       if (src == dst) {
-	 char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
-					 GL_READ_WRITE, dst);
+	 char *ptr = intel_bufferobj_map(ctx, GL_READ_WRITE, dst);
 	 memmove(ptr + write_offset, ptr + read_offset, size);
 	 intel_bufferobj_unmap(ctx, dst);
       } else {
 	 const char *src_ptr;
 	 char *dst_ptr;
 
-	 src_ptr =  intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER,
-					GL_READ_ONLY, src);
-	 dst_ptr =  intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
-					GL_WRITE_ONLY, dst);
+	 src_ptr =  intel_bufferobj_map(ctx, GL_READ_ONLY, src);
+	 dst_ptr =  intel_bufferobj_map(ctx, GL_WRITE_ONLY, dst);
 
 	 memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
 
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 1727d4c1a91..44d79534ffd 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -74,7 +74,7 @@ static const GLubyte *map_pbo( struct gl_context *ctx,
       return NULL;
    }
 
-   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
 					   GL_READ_ONLY_ARB,
 					   unpack->BufferObj);
    if (!buf) {
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index c0ab31b0b11..9db39491515 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -123,7 +123,7 @@ nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB
 }
 
 static void *
-nouveau_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
+nouveau_bufferobj_map(struct gl_context *ctx, GLenum access,
 		   struct gl_buffer_object *obj)
 {
 	unsigned flags = 0;
@@ -135,7 +135,7 @@ nouveau_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
 	    access == GL_READ_WRITE_ARB)
 		flags |= GL_MAP_WRITE_BIT;
 
-	return ctx->Driver.MapBufferRange(ctx, target, 0, obj->Size, flags,
+	return ctx->Driver.MapBufferRange(ctx, 0, 0, obj->Size, flags,
 					  obj);
 }
 
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index c47e15534ca..ba37923736c 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -84,7 +84,7 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 	GLboolean mapped_named_bo = GL_FALSE;
 
 	if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
-		ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+		ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 		mapped_named_bo = GL_TRUE;
 		assert(mesa_ind_buf->obj->Pointer != NULL);
 	}
@@ -163,7 +163,7 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 		GLboolean mapped_named_bo = GL_FALSE;
 
 		if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
-			ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+			ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 			assert(mesa_ind_buf->obj->Pointer != NULL);
 			mapped_named_bo = GL_TRUE;
 		}
@@ -235,7 +235,7 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl
 
 	if (input->BufferObj->Name) {
 		if (!input->BufferObj->Pointer) {
-			ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+			ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
 			mapped_named_bo = GL_TRUE;
 		}
 
@@ -302,7 +302,7 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_
 	radeon_bo_map(attr->bo, 1);
 
 	if (!input->BufferObj->Pointer) {
-		ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+		ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
 		mapped_named_bo = GL_TRUE;
 	}
 
diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c
index 9a310eec6bc..e82f4d445f5 100644
--- a/src/mesa/drivers/dri/r600/evergreen_render.c
+++ b/src/mesa/drivers/dri/r600/evergreen_render.c
@@ -403,7 +403,7 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count,
     {
         if (!input->BufferObj->Pointer) 
         {
-            ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+            ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
             mapped_named_bo = GL_TRUE;
         }
 
@@ -470,7 +470,7 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
     {
-        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
         mapped_named_bo = GL_TRUE;
         assert(mesa_ind_buf->obj->Pointer != NULL);
     }
@@ -606,7 +606,7 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
         {
-	        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+	        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 	        assert(mesa_ind_buf->obj->Pointer != NULL);
 	        mapped_named_bo = GL_TRUE;
         }
@@ -655,7 +655,7 @@ static void evergreenAlignDataToDword(struct gl_context *ctx,
 
     if (!input->BufferObj->Pointer) 
     {
-        ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
         mapped_named_bo = GL_TRUE;
     }
 
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 2300fe6d33f..52a6f7cc45e 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -490,7 +490,7 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count,
     {
         if (!input->BufferObj->Pointer) 
         {
-            ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+            ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
             mapped_named_bo = GL_TRUE;
         }
 
@@ -564,7 +564,7 @@ static void r700AlignDataToDword(struct gl_context *ctx,
 
     if (!input->BufferObj->Pointer) 
     {
-        ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
         mapped_named_bo = GL_TRUE;
     }
 
@@ -727,7 +727,7 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
     {
-        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
         mapped_named_bo = GL_TRUE;
         assert(mesa_ind_buf->obj->Pointer != NULL);
     }
@@ -813,7 +813,7 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
         {
-	        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+	        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 	        assert(mesa_ind_buf->obj->Pointer != NULL);
 	        mapped_named_bo = GL_TRUE;
         }
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index eac1277a719..e645723299f 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -175,7 +175,6 @@ radeonGetBufferSubData(struct gl_context * ctx,
  */
 static void *
 radeonMapBuffer(struct gl_context * ctx,
-                GLenum target,
                 GLenum access,
                 struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index 3319d118142..fe00bdd520d 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -455,7 +455,6 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx,
             return;
          }
          buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                                 GL_PIXEL_UNPACK_BUFFER_EXT,
                                                  GL_READ_ONLY_ARB,
                                                  unpack->BufferObj);
          if (!buf) {
diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index 385bef1c53e..6400c8f59d7 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1603,7 +1603,6 @@ void _ae_map_vbos( struct gl_context *ctx )
 
    for (i = 0; i < actx->nr_vbos; i++)
       ctx->Driver.MapBuffer(ctx,
-			    GL_ARRAY_BUFFER_ARB,
 			    GL_DYNAMIC_DRAW_ARB,
 			    actx->vbo[i]);
 
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 08faf9e08b4..507d0ce6883 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -65,8 +65,7 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type,
 
    if (_mesa_is_bufferobj(elementBuf)) {
       /* elements are in a user-defined buffer object.  need to map it */
-      map = ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER,
-                                  GL_READ_ONLY, elementBuf);
+      map = ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, elementBuf);
       /* Actual address is the sum of pointers */
       indices = (const GLvoid *) ADD_POINTERS(map, (const GLubyte *) indices);
    }
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index e52e59eb5c2..fc1ca2a3680 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -447,11 +447,10 @@ _mesa_buffer_get_subdata( struct gl_context *ctx,
  * \sa glMapBufferARB, dd_function_table::MapBuffer
  */
 static void *
-_mesa_buffer_map( struct gl_context *ctx, GLenum target, GLenum access,
+_mesa_buffer_map( struct gl_context *ctx, GLenum access,
 		  struct gl_buffer_object *bufObj )
 {
    (void) ctx;
-   (void) target;
    (void) access;
    /* Just return a direct pointer to the data */
    if (_mesa_bufferobj_mapped(bufObj)) {
@@ -541,10 +540,8 @@ _mesa_copy_buffer_subdata(struct gl_context *ctx,
    assert(!_mesa_bufferobj_mapped(src));
    assert(!_mesa_bufferobj_mapped(dst));
 
-   srcPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_READ_BUFFER,
-                                              GL_READ_ONLY, src);
-   dstPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_WRITE_BUFFER,
-                                              GL_WRITE_ONLY, dst);
+   srcPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, src);
+   dstPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY, dst);
 
    if (srcPtr && dstPtr)
       memcpy(dstPtr + writeOffset, srcPtr + readOffset, size);
@@ -1042,7 +1039,7 @@ _mesa_MapBufferARB(GLenum target, GLenum access)
    }
 
    ASSERT(ctx->Driver.MapBuffer);
-   map = ctx->Driver.MapBuffer( ctx, target, access, bufObj );
+   map = ctx->Driver.MapBuffer( ctx, access, bufObj );
    if (!map) {
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)");
       return NULL;
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 318ea1f25aa..cfccdb0d828 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -706,7 +706,7 @@ struct dd_function_table {
 			     GLintptrARB offset, GLsizeiptrARB size,
 			     GLvoid *data, struct gl_buffer_object *obj );
 
-   void * (*MapBuffer)( struct gl_context *ctx, GLenum target, GLenum access,
+   void * (*MapBuffer)( struct gl_context *ctx, GLenum access,
 			struct gl_buffer_object *obj );
 
    void (*CopyBufferSubData)( struct gl_context *ctx,
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 3e54af25d00..a135810ed27 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -894,8 +894,7 @@ unpack_image(struct gl_context *ctx, GLuint dimensions,
       GLvoid *image;
 
       map = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                               GL_READ_ONLY_ARB, unpack->BufferObj);
+         ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, unpack->BufferObj);
       if (!map) {
          /* unable to map src buffer! */
          _mesa_error(ctx, GL_INVALID_OPERATION, "unable to map PBO");
diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c
index f93cdf1e392..ce362b9e444 100644
--- a/src/mesa/main/pbo.c
+++ b/src/mesa/main/pbo.c
@@ -128,7 +128,7 @@ _mesa_map_pbo_source(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
       /* unpack from PBO */
-      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
                                               GL_READ_ONLY_ARB,
                                               unpack->BufferObj);
       if (!buf)
@@ -223,7 +223,7 @@ _mesa_map_pbo_dest(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(pack->BufferObj)) {
       /* pack into PBO */
-      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
+      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
                                               GL_WRITE_ONLY_ARB,
                                               pack->BufferObj);
       if (!buf)
@@ -326,8 +326,8 @@ _mesa_validate_pbo_teximage(struct gl_context *ctx, GLuint dimensions,
       return NULL;
    }
 
-   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                          GL_READ_ONLY_ARB, unpack->BufferObj);
+   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB,
+					   unpack->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped)");
       return NULL;
@@ -363,8 +363,7 @@ _mesa_validate_pbo_compressed_teximage(struct gl_context *ctx,
       return NULL;
    }
 
-   buf = (GLubyte*) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                         GL_READ_ONLY_ARB, packing->BufferObj);
+   buf = (GLubyte*) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, packing->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped");
       return NULL;
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 20595ef3b56..a54da7160c7 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -441,8 +441,7 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level,
        * texture data to the PBO if the PBO is in VRAM along with the texture.
        */
       GLubyte *buf = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                               GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+         ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
       if (!buf) {
          /* out of memory or other unexpected error */
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage(map PBO failed)");
@@ -499,8 +498,7 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
       /* pack texture image into a PBO */
       GLubyte *buf = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                               GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+         ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
       if (!buf) {
          /* out of memory or other unexpected error */
          _mesa_error(ctx, GL_OUT_OF_MEMORY,
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 732bbaabd4a..a1df11806eb 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -241,7 +241,7 @@ static long st_bufferobj_zero_length = 0;
  * Called via glMapBufferARB().
  */
 static void *
-st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
+st_bufferobj_map(struct gl_context *ctx, GLenum access,
                  struct gl_buffer_object *obj)
 {
    struct st_buffer_object *st_obj = st_buffer_object(obj);
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index 19d4f203c35..7351f6f3be6 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -281,7 +281,6 @@ static void bind_inputs( struct gl_context *ctx,
 	    bo[*nr_bo] = inputs[i]->BufferObj;
 	    (*nr_bo)++;
 	    ctx->Driver.MapBuffer(ctx, 
-				  GL_ARRAY_BUFFER,
 				  GL_READ_ONLY_ARB,
 				  inputs[i]->BufferObj);
 	    
@@ -351,7 +350,6 @@ static void bind_indices( struct gl_context *ctx,
       bo[*nr_bo] = ib->obj;
       (*nr_bo)++;
       ctx->Driver.MapBuffer(ctx, 
-			    GL_ELEMENT_ARRAY_BUFFER,
 			    GL_READ_ONLY_ARB,
 			    ib->obj);
 
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 2692b7539a4..8359a7f1529 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -96,8 +96,7 @@ vbo_get_minmax_index(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(ib->obj)) {
       const GLvoid *map =
-         ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
-                               GL_READ_ONLY, ib->obj);
+         ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, ib->obj);
       indices = ADD_POINTERS(map, ib->ptr);
    } else {
       indices = ib->ptr;
@@ -196,8 +195,7 @@ check_array_data(struct gl_context *ctx, struct gl_client_array *array,
          if (!array->BufferObj->Pointer) {
             /* need to map now */
             array->BufferObj->Pointer =
-               ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
-                                     GL_READ_ONLY, array->BufferObj);
+               ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, array->BufferObj);
          }
          data = ADD_POINTERS(data, array->BufferObj->Pointer);
       }
@@ -257,7 +255,6 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
 
    if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
       elemMap = ctx->Driver.MapBuffer(ctx,
-                                      GL_ELEMENT_ARRAY_BUFFER_ARB,
                                       GL_READ_ONLY,
                                       ctx->Array.ElementArrayBufferObj);
       elements = ADD_POINTERS(elements, elemMap);
@@ -350,8 +347,7 @@ print_draw_arrays(struct gl_context *ctx,
 	     bufName);
 
       if (bufName) {
-         GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
-                                            GL_READ_ONLY_ARB, bufObj);
+         GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, bufObj);
          int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr;
          float *f = (float *) (p + offset);
          int *k = (int *) f;
@@ -715,7 +711,6 @@ static void
 dump_element_buffer(struct gl_context *ctx, GLenum type)
 {
    const GLvoid *map = ctx->Driver.MapBuffer(ctx,
-                                             GL_ELEMENT_ARRAY_BUFFER_ARB,
                                              GL_READ_ONLY,
                                              ctx->Array.ElementArrayBufferObj);
    switch (type) {
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 5366b989df2..07c5c969453 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -342,7 +342,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
                                                   exec->vtx.bufferobj);
       if (!exec->vtx.buffer_map)
          exec->vtx.buffer_map =
-            (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj);
+            (GLfloat *)ctx->Driver.MapBuffer(ctx, access, exec->vtx.bufferobj);
       assert(exec->vtx.buffer_map);
       exec->vtx.buffer_ptr = exec->vtx.buffer_map;
    }
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c
index 09ba0a6e9ac..e10908d5ece 100644
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/vbo/vbo_rebase.c
@@ -159,10 +159,7 @@ void vbo_rebase_prims( struct gl_context *ctx,
       void *ptr;
 
       if (map_ib) 
-	 ctx->Driver.MapBuffer(ctx, 
-			       GL_ELEMENT_ARRAY_BUFFER,
-			       GL_READ_ONLY_ARB,
-			       ib->obj);
+	 ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, ib->obj);
 
 
       ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index 00f34ad1ec0..f90f00c5aae 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -233,7 +233,6 @@ map_vertex_store(struct gl_context *ctx,
    assert(!vertex_store->buffer);
    vertex_store->buffer =
       (GLfloat *) ctx->Driver.MapBuffer(ctx,
-                                        GL_ARRAY_BUFFER_ARB,   /* not used */
                                         GL_WRITE_ONLY,      /* not used */
                                         vertex_store->
                                         bufferobj);
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index 0d505555789..52952a57ec8 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -218,7 +218,6 @@ vbo_save_loopback_vertex_list(struct gl_context *ctx,
                               const struct vbo_save_vertex_list *list)
 {
    const char *buffer = ctx->Driver.MapBuffer(ctx, 
-					      GL_ARRAY_BUFFER_ARB, 
 					      GL_READ_ONLY, /* ? */
                                               list->vertex_store->bufferobj);
 
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index 49c7435ab5e..8dc5aa0ed76 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -444,7 +444,7 @@ replay_init( struct copy_context *copy )
 	 copy->vertex_size += attr_size(copy->array[i]);
       
 	 if (_mesa_is_bufferobj(vbo) && !_mesa_bufferobj_mapped(vbo)) 
-	    ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY, vbo);
+	    ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, vbo);
 
 	 copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer,
 						 copy->array[i]->Ptr);
@@ -459,8 +459,7 @@ replay_init( struct copy_context *copy )
     */
    if (_mesa_is_bufferobj(copy->ib->obj) &&
        !_mesa_bufferobj_mapped(copy->ib->obj)) 
-      ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY,
-			    copy->ib->obj);
+      ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, copy->ib->obj);
 
    srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer,
                                            copy->ib->ptr);
-- 
cgit v1.2.3


From 92f3fca0ea429dcf07123e63447449db53308266 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:23:58 -0700
Subject: mesa: Remove target parameter from dd_function_table::BufferSubData

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c   | 1 -
 src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c    | 2 +-
 src/mesa/drivers/dri/radeon/radeon_buffer_objects.c | 1 -
 src/mesa/main/bufferobj.c                           | 6 +++---
 src/mesa/main/dd.h                                  | 2 +-
 src/mesa/state_tracker/st_cb_bufferobjects.c        | 1 -
 6 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 6f3a90942f6..307b5542798 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -202,7 +202,6 @@ intel_bufferobj_data(struct gl_context * ctx,
  */
 static void
 intel_bufferobj_subdata(struct gl_context * ctx,
-                        GLenum target,
                         GLintptrARB offset,
                         GLsizeiptrARB size,
                         const GLvoid * data, struct gl_buffer_object *obj)
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index 9db39491515..6b10d28c3cf 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -107,7 +107,7 @@ nouveau_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size
 }
 
 static void
-nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset,
+nouveau_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset,
 			  GLsizeiptrARB size, const GLvoid *data,
 			  struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index e645723299f..319890c48ac 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -130,7 +130,6 @@ radeonBufferData(struct gl_context * ctx,
  */
 static void
 radeonBufferSubData(struct gl_context * ctx,
-                    GLenum target,
                     GLintptrARB offset,
                     GLsizeiptrARB size,
                     const GLvoid * data,
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index fc1ca2a3680..b0721ea600b 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -386,11 +386,11 @@ _mesa_buffer_data( struct gl_context *ctx, GLenum target, GLsizeiptrARB size,
  * \sa glBufferSubDataARB, dd_function_table::BufferSubData.
  */
 static void
-_mesa_buffer_subdata( struct gl_context *ctx, GLenum target, GLintptrARB offset,
+_mesa_buffer_subdata( struct gl_context *ctx, GLintptrARB offset,
 		      GLsizeiptrARB size, const GLvoid * data,
 		      struct gl_buffer_object * bufObj )
 {
-   (void) ctx; (void) target;
+   (void) ctx;
 
    /* this should have been caught in _mesa_BufferSubData() */
    ASSERT(size + offset <= bufObj->Size);
@@ -975,7 +975,7 @@ _mesa_BufferSubDataARB(GLenum target, GLintptrARB offset,
    bufObj->Written = GL_TRUE;
 
    ASSERT(ctx->Driver.BufferSubData);
-   ctx->Driver.BufferSubData( ctx, target, offset, size, data, bufObj );
+   ctx->Driver.BufferSubData( ctx, offset, size, data, bufObj );
 }
 
 
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index cfccdb0d828..54fcb88f69a 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -698,7 +698,7 @@ struct dd_function_table {
                             const GLvoid *data, GLenum usage,
                             struct gl_buffer_object *obj );
 
-   void (*BufferSubData)( struct gl_context *ctx, GLenum target, GLintptrARB offset,
+   void (*BufferSubData)( struct gl_context *ctx, GLintptrARB offset,
 			  GLsizeiptrARB size, const GLvoid *data,
 			  struct gl_buffer_object *obj );
 
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index a1df11806eb..d71b38bbddf 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -93,7 +93,6 @@ st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj)
  */
 static void
 st_bufferobj_subdata(struct gl_context *ctx,
-		     GLenum target,
 		     GLintptrARB offset,
 		     GLsizeiptrARB size,
 		     const GLvoid * data, struct gl_buffer_object *obj)
-- 
cgit v1.2.3


From 6c8aa3491a19535e8c39a47a3766bf8524e80582 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:30:35 -0700
Subject: mesa: Remove target parameter from
 dd_function_table::GetBufferSubData

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c   | 1 -
 src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c    | 2 +-
 src/mesa/drivers/dri/radeon/radeon_buffer_objects.c | 1 -
 src/mesa/main/bufferobj.c                           | 7 +++----
 src/mesa/main/dd.h                                  | 2 +-
 src/mesa/state_tracker/st_cb_bufferobjects.c        | 1 -
 src/mesa/vbo/vbo_save_draw.c                        | 2 +-
 7 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 307b5542798..d9c70dec85b 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -274,7 +274,6 @@ intel_bufferobj_subdata(struct gl_context * ctx,
  */
 static void
 intel_bufferobj_get_subdata(struct gl_context * ctx,
-                            GLenum target,
                             GLintptrARB offset,
                             GLsizeiptrARB size,
                             GLvoid * data, struct gl_buffer_object *obj)
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index 6b10d28c3cf..87a2bfee093 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -115,7 +115,7 @@ nouveau_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset,
 }
 
 static void
-nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset,
+nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset,
 			   GLsizeiptrARB size, GLvoid *data,
 			   struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index 319890c48ac..ee634363dca 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -154,7 +154,6 @@ radeonBufferSubData(struct gl_context * ctx,
  */
 static void
 radeonGetBufferSubData(struct gl_context * ctx,
-                       GLenum target,
                        GLintptrARB offset,
                        GLsizeiptrARB size,
                        GLvoid * data,
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index b0721ea600b..41e83b0d689 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -419,12 +419,11 @@ _mesa_buffer_subdata( struct gl_context *ctx, GLintptrARB offset,
  * \sa glBufferGetSubDataARB, dd_function_table::GetBufferSubData.
  */
 static void
-_mesa_buffer_get_subdata( struct gl_context *ctx,
-                          GLenum target, GLintptrARB offset,
+_mesa_buffer_get_subdata( struct gl_context *ctx, GLintptrARB offset,
 			  GLsizeiptrARB size, GLvoid * data,
 			  struct gl_buffer_object * bufObj )
 {
-   (void) ctx; (void) target;
+   (void) ctx;
 
    if (bufObj->Data && ((GLsizeiptrARB) (size + offset) <= bufObj->Size)) {
       memcpy( data, (GLubyte *) bufObj->Data + offset, size );
@@ -995,7 +994,7 @@ _mesa_GetBufferSubDataARB(GLenum target, GLintptrARB offset,
    }
 
    ASSERT(ctx->Driver.GetBufferSubData);
-   ctx->Driver.GetBufferSubData( ctx, target, offset, size, data, bufObj );
+   ctx->Driver.GetBufferSubData( ctx, offset, size, data, bufObj );
 }
 
 
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 54fcb88f69a..5560d96931a 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -702,7 +702,7 @@ struct dd_function_table {
 			  GLsizeiptrARB size, const GLvoid *data,
 			  struct gl_buffer_object *obj );
 
-   void (*GetBufferSubData)( struct gl_context *ctx, GLenum target,
+   void (*GetBufferSubData)( struct gl_context *ctx,
 			     GLintptrARB offset, GLsizeiptrARB size,
 			     GLvoid *data, struct gl_buffer_object *obj );
 
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index d71b38bbddf..2de56bdb54b 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -132,7 +132,6 @@ st_bufferobj_subdata(struct gl_context *ctx,
  */
 static void
 st_bufferobj_get_subdata(struct gl_context *ctx,
-                         GLenum target,
                          GLintptrARB offset,
                          GLsizeiptrARB size,
                          GLvoid * data, struct gl_buffer_object *obj)
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index 52952a57ec8..e7996f29307 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -70,7 +70,7 @@ _playback_copy_to_current(struct gl_context *ctx,
       else
          offset = node->buffer_offset;
 
-      ctx->Driver.GetBufferSubData( ctx, 0, offset, 
+      ctx->Driver.GetBufferSubData( ctx, offset,
                                     node->vertex_size * sizeof(GLfloat), 
                                     data, node->vertex_store->bufferobj );
 
-- 
cgit v1.2.3


From 4ddae2fb666c86e3267ef6e3d2699f9bfb40d206 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:37:56 -0700
Subject: mesa: Remove target parameter from dd_function_table::MapBufferRange

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c | 2 +-
 src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c  | 3 +--
 src/mesa/main/bufferobj.c                         | 6 ++----
 src/mesa/main/dd.h                                | 2 +-
 src/mesa/state_tracker/st_cb_bufferobjects.c      | 2 +-
 src/mesa/vbo/vbo_exec_draw.c                      | 3 +--
 6 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index d9c70dec85b..7a0102b7226 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -369,7 +369,7 @@ intel_bufferobj_map(struct gl_context * ctx,
  */
 static void *
 intel_bufferobj_map_range(struct gl_context * ctx,
-			  GLenum target, GLintptr offset, GLsizeiptr length,
+			  GLintptr offset, GLsizeiptr length,
 			  GLbitfield access, struct gl_buffer_object *obj)
 {
    struct intel_context *intel = intel_context(ctx);
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index 87a2bfee093..cf892a893f8 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -135,8 +135,7 @@ nouveau_bufferobj_map(struct gl_context *ctx, GLenum access,
 	    access == GL_READ_WRITE_ARB)
 		flags |= GL_MAP_WRITE_BIT;
 
-	return ctx->Driver.MapBufferRange(ctx, 0, 0, obj->Size, flags,
-					  obj);
+	return ctx->Driver.MapBufferRange(ctx, 0, obj->Size, flags, obj);
 }
 
 static void *
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 41e83b0d689..ba3811d315b 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -468,12 +468,11 @@ _mesa_buffer_map( struct gl_context *ctx, GLenum access,
  * Called via glMapBufferRange().
  */
 static void *
-_mesa_buffer_map_range( struct gl_context *ctx, GLenum target, GLintptr offset,
+_mesa_buffer_map_range( struct gl_context *ctx, GLintptr offset,
                         GLsizeiptr length, GLbitfield access,
                         struct gl_buffer_object *bufObj )
 {
    (void) ctx;
-   (void) target;
    assert(!_mesa_bufferobj_mapped(bufObj));
    /* Just return a direct pointer to the data */
    bufObj->Pointer = bufObj->Data + offset;
@@ -1445,8 +1444,7 @@ _mesa_MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
    }
       
    ASSERT(ctx->Driver.MapBufferRange);
-   map = ctx->Driver.MapBufferRange(ctx, target, offset, length,
-                                    access, bufObj);
+   map = ctx->Driver.MapBufferRange(ctx, offset, length, access, bufObj);
    if (!map) {
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)");
    }
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 5560d96931a..9876d5a53e0 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -717,7 +717,7 @@ struct dd_function_table {
 
    /* May return NULL if MESA_MAP_NOWAIT_BIT is set in access:
     */
-   void * (*MapBufferRange)( struct gl_context *ctx, GLenum target, GLintptr offset,
+   void * (*MapBufferRange)( struct gl_context *ctx, GLintptr offset,
                              GLsizeiptr length, GLbitfield access,
                              struct gl_buffer_object *obj);
 
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 2de56bdb54b..6857c00b08d 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -281,7 +281,7 @@ st_bufferobj_map(struct gl_context *ctx, GLenum access,
  * Called via glMapBufferRange().
  */
 static void *
-st_bufferobj_map_range(struct gl_context *ctx, GLenum target, 
+st_bufferobj_map_range(struct gl_context *ctx,
                        GLintptr offset, GLsizeiptr length, GLbitfield access,
                        struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 07c5c969453..2c8340ca5ca 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -316,7 +316,6 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
       /* The VBO exists and there's room for more */
       exec->vtx.buffer_map = 
          (GLfloat *)ctx->Driver.MapBufferRange(ctx, 
-                                               target, 
                                                exec->vtx.buffer_used,
                                                (VBO_VERT_BUFFER_SIZE - 
                                                 exec->vtx.buffer_used),
@@ -336,7 +335,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
 
       if (ctx->Driver.MapBufferRange)
          exec->vtx.buffer_map = 
-            (GLfloat *)ctx->Driver.MapBufferRange(ctx, target,
+            (GLfloat *)ctx->Driver.MapBufferRange(ctx,
                                                   0, VBO_VERT_BUFFER_SIZE,
                                                   accessRange,
                                                   exec->vtx.bufferobj);
-- 
cgit v1.2.3


From f973be59fa293ea75f05cdbac2372360deb5e186 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:55:33 -0700
Subject: intel: Correctly check for read-only mappings in
 intel_bufferobj_map_range

The old code was an obvious cut-and-paste fail from intel_bufferobj_map.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 7a0102b7226..16a56b03a2e 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -374,7 +374,6 @@ intel_bufferobj_map_range(struct gl_context * ctx,
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-   GLboolean read_only = (access == GL_READ_ONLY_ARB);
 
    assert(intel_obj);
 
@@ -386,6 +385,9 @@ intel_bufferobj_map_range(struct gl_context * ctx,
    obj->AccessFlags = access;
 
    if (intel_obj->sys_buffer) {
+      const bool read_only =
+	 (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT;
+
       if (!read_only && intel_obj->source)
 	 release_buffer(intel_obj);
 
-- 
cgit v1.2.3


From 6183edc070e2d3dce36ab5ee7aee72b0c38775a7 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:56:39 -0700
Subject: mesa: Remove target parameter from
 dd_function_table::FlushMappedBufferRange

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c | 2 +-
 src/mesa/main/bufferobj.c                         | 5 ++---
 src/mesa/main/dd.h                                | 2 +-
 src/mesa/state_tracker/st_cb_bufferobjects.c      | 2 +-
 src/mesa/vbo/vbo_exec_draw.c                      | 3 +--
 5 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 16a56b03a2e..1a0f5486c4f 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -471,7 +471,7 @@ intel_bufferobj_map_range(struct gl_context * ctx,
  * would defeat the point.
  */
 static void
-intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
+intel_bufferobj_flush_mapped_range(struct gl_context *ctx,
 				   GLintptr offset, GLsizeiptr length,
 				   struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index ba3811d315b..47af8b59587 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -488,12 +488,11 @@ _mesa_buffer_map_range( struct gl_context *ctx, GLintptr offset,
  * Called via glFlushMappedBufferRange().
  */
 static void
-_mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLenum target, 
+_mesa_buffer_flush_mapped_range( struct gl_context *ctx,
                                  GLintptr offset, GLsizeiptr length,
                                  struct gl_buffer_object *obj )
 {
    (void) ctx;
-   (void) target;
    (void) offset;
    (void) length;
    (void) obj;
@@ -1527,7 +1526,7 @@ _mesa_FlushMappedBufferRange(GLenum target, GLintptr offset, GLsizeiptr length)
    ASSERT(bufObj->AccessFlags & GL_MAP_WRITE_BIT);
 
    if (ctx->Driver.FlushMappedBufferRange)
-      ctx->Driver.FlushMappedBufferRange(ctx, target, offset, length, bufObj);
+      ctx->Driver.FlushMappedBufferRange(ctx, offset, length, bufObj);
 }
 
 
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 9876d5a53e0..8dfea937f37 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -721,7 +721,7 @@ struct dd_function_table {
                              GLsizeiptr length, GLbitfield access,
                              struct gl_buffer_object *obj);
 
-   void (*FlushMappedBufferRange)(struct gl_context *ctx, GLenum target, 
+   void (*FlushMappedBufferRange)(struct gl_context *ctx,
                                   GLintptr offset, GLsizeiptr length,
                                   struct gl_buffer_object *obj);
 
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 6857c00b08d..dd2eca7bc96 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -351,7 +351,7 @@ st_bufferobj_map_range(struct gl_context *ctx,
 
 
 static void
-st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, 
+st_bufferobj_flush_mapped_range(struct gl_context *ctx,
                                 GLintptr offset, GLsizeiptr length,
                                 struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 2c8340ca5ca..2dc60661796 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -270,8 +270,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
          GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float);
 
          if (length)
-            ctx->Driver.FlushMappedBufferRange(ctx, target,
-                                               offset, length,
+            ctx->Driver.FlushMappedBufferRange(ctx, offset, length,
                                                exec->vtx.bufferobj);
       }
 
-- 
cgit v1.2.3


From b2184da684fc20849b5e6e554f0a0f92d2872ce9 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 18:45:06 -0700
Subject: mesa: Fix incorrect access parameter passed to MapBuffer

The code previously passed GL_DYNAMIC_DRAW for the access parameter.
By inspection, I believe that all drivers would treat this as
GL_READ_WRITE because it's not GL_READ_ONLY and it's not
GL_WRITE_ONLY.

It appears the i965 code wants GL_WRITE_ONLY (it's about to write a
bunch of data in, never read data), while the arrayelt code is
GL_READ_ONLY (just dereffed as arguments to CALL_Whatever*v).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Keith Whitwell <keithw@vmware.com>
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c | 4 +---
 src/mesa/main/api_arrayelt.c                | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 66c42aa0779..9acec45799a 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -689,9 +689,7 @@ static void brw_prepare_indices(struct brw_context *brw)
        * rebase it into a temporary.
        */
        if ((get_size(index_buffer->type) - 1) & offset) {
-           GLubyte *map = ctx->Driver.MapBuffer(ctx,
-                                                GL_DYNAMIC_DRAW_ARB,
-                                                bufferobj);
+           GLubyte *map = ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY, bufferobj);
            map += offset;
 
 	   intel_upload_data(&brw->intel, map, ib_size, ib_type_size,
diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index 6400c8f59d7..8e1e3ff8dd5 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1602,9 +1602,7 @@ void _ae_map_vbos( struct gl_context *ctx )
       _ae_update_state(ctx);
 
    for (i = 0; i < actx->nr_vbos; i++)
-      ctx->Driver.MapBuffer(ctx,
-			    GL_DYNAMIC_DRAW_ARB,
-			    actx->vbo[i]);
+      ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, actx->vbo[i]);
 
    if (actx->nr_vbos)
       actx->mapped_vbos = GL_TRUE;
-- 
cgit v1.2.3


From cccc7412c22a704d85203d7bb9c8e73d45cccf49 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 18:32:09 -0700
Subject: radeon: Hack up an implementation of MapBufferRange

This doesn't implement any of the "cool" features of MapBufferRange.
Adding this function is necessary for the next commit in the series.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Alex Deucher <alexdeucher@gmail.com>
Cc: Maciej Cencora <m.cencora@gmail.com>
---
 .../drivers/dri/radeon/radeon_buffer_objects.c     | 33 ++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index ee634363dca..e106d98c38e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -197,6 +197,38 @@ radeonMapBuffer(struct gl_context * ctx,
 }
 
 
+/**
+ * Called via glMapBufferRange()
+ */
+static void *
+radeonMapBufferRange(struct gl_context * ctx,
+		     GLintptr offset, GLsizeiptr length,
+		     GLbitfield access, struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+    const GLboolean write_only =
+       (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_WRITE_BIT;
+
+    if (write_only) {
+        ctx->Driver.Flush(ctx);
+    }
+
+    if (radeon_obj->bo == NULL) {
+        obj->Pointer = NULL;
+        return NULL;
+    }
+
+    obj->Offset = offset;
+    obj->Length = length;
+    obj->AccessFlags = access;
+
+    radeon_bo_map(radeon_obj->bo, write_only);
+
+    obj->Pointer = radeon_obj->bo->ptr + offset;
+    return obj->Pointer;
+}
+
+
 /**
  * Called via glUnmapBufferARB()
  */
@@ -226,5 +258,6 @@ radeonInitBufferObjectFuncs(struct dd_function_table *functions)
     functions->BufferSubData = radeonBufferSubData;
     functions->GetBufferSubData = radeonGetBufferSubData;
     functions->MapBuffer = radeonMapBuffer;
+    functions->MapBufferRange = radeonMapBufferRange;
     functions->UnmapBuffer = radeonUnmapBuffer;
 }
-- 
cgit v1.2.3


From 28249bd260f4c52badf3eb61ade2744604b21bca Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 18:34:27 -0700
Subject: mesa: Eliminate dd_function_table::MapBuffer

Replace all calls to dd_function_table::MapBuffer with appropriate
calls to dd_function_table::MapBufferRange, then remove all the cruft.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c        |  6 +-
 src/mesa/drivers/dri/intel/intel_buffer_objects.c  | 69 +++-------------------
 src/mesa/drivers/dri/intel/intel_pixel_bitmap.c    |  6 +-
 src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c   | 17 ------
 src/mesa/drivers/dri/r300/r300_draw.c              | 14 +++--
 src/mesa/drivers/dri/r600/evergreen_render.c       | 12 ++--
 src/mesa/drivers/dri/r600/r700_render.c            | 12 ++--
 .../drivers/dri/radeon/radeon_buffer_objects.c     | 32 +---------
 src/mesa/drivers/x11/xm_dd.c                       | 15 ++---
 src/mesa/main/api_arrayelt.c                       |  5 +-
 src/mesa/main/api_validate.c                       |  3 +-
 src/mesa/main/bufferobj.c                          | 43 ++------------
 src/mesa/main/dd.h                                 |  3 -
 src/mesa/main/dlist.c                              |  3 +-
 src/mesa/main/pbo.c                                | 24 +++++---
 src/mesa/main/texgetimage.c                        |  6 +-
 src/mesa/state_tracker/st_cb_bufferobjects.c       | 43 --------------
 src/mesa/tnl/t_draw.c                              | 11 ++--
 src/mesa/vbo/vbo_exec_array.c                      | 24 +++++---
 src/mesa/vbo/vbo_exec_draw.c                       | 18 ++----
 src/mesa/vbo/vbo_rebase.c                          |  3 +-
 src/mesa/vbo/vbo_save_api.c                        |  8 +--
 src/mesa/vbo/vbo_save_draw.c                       |  8 ++-
 src/mesa/vbo/vbo_split_copy.c                      |  5 +-
 24 files changed, 123 insertions(+), 267 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 9acec45799a..ed1964f5a6f 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -689,7 +689,11 @@ static void brw_prepare_indices(struct brw_context *brw)
        * rebase it into a temporary.
        */
        if ((get_size(index_buffer->type) - 1) & offset) {
-           GLubyte *map = ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY, bufferobj);
+           GLubyte *map = ctx->Driver.MapBufferRange(ctx,
+						     0,
+						     bufferobj->Size,
+						     GL_MAP_WRITE_BIT,
+						     bufferobj);
            map += offset;
 
 	   intel_upload_data(&brw->intel, map, ib_size, ib_type_size,
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 1a0f5486c4f..d908975fc87 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -295,64 +295,7 @@ intel_bufferobj_get_subdata(struct gl_context * ctx,
 
 
 /**
- * Called via glMapBufferARB().
- */
-static void *
-intel_bufferobj_map(struct gl_context * ctx,
-                    GLenum access, struct gl_buffer_object *obj)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-   GLboolean read_only = (access == GL_READ_ONLY_ARB);
-   GLboolean write_only = (access == GL_WRITE_ONLY_ARB);
-
-   assert(intel_obj);
-
-   if (intel_obj->sys_buffer) {
-      if (!read_only && intel_obj->source) {
-	 release_buffer(intel_obj);
-      }
-
-      if (!intel_obj->buffer || intel_obj->source) {
-	 obj->Pointer = intel_obj->sys_buffer;
-	 obj->Length = obj->Size;
-	 obj->Offset = 0;
-	 return obj->Pointer;
-      }
-
-      free(intel_obj->sys_buffer);
-      intel_obj->sys_buffer = NULL;
-   }
-
-   /* Flush any existing batchbuffer that might reference this data. */
-   if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer))
-      intel_flush(ctx);
-
-   if (intel_obj->region)
-      intel_bufferobj_cow(intel, intel_obj);
-
-   if (intel_obj->buffer == NULL) {
-      obj->Pointer = NULL;
-      return NULL;
-   }
-
-   if (write_only) {
-      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
-      intel_obj->mapped_gtt = GL_TRUE;
-   } else {
-      drm_intel_bo_map(intel_obj->buffer, !read_only);
-      intel_obj->mapped_gtt = GL_FALSE;
-   }
-
-   obj->Pointer = intel_obj->buffer->virtual;
-   obj->Length = obj->Size;
-   obj->Offset = 0;
-
-   return obj->Pointer;
-}
-
-/**
- * Called via glMapBufferRange().
+ * Called via glMapBufferRange and glMapBuffer
  *
  * The goal of this extension is to allow apps to accumulate their rendering
  * at the same time as they accumulate their buffer object.  Without it,
@@ -760,15 +703,18 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
        * not overlap.
        */
       if (src == dst) {
-	 char *ptr = intel_bufferobj_map(ctx, GL_READ_WRITE, dst);
+	 char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size,
+					       GL_MAP_READ_BIT, dst);
 	 memmove(ptr + write_offset, ptr + read_offset, size);
 	 intel_bufferobj_unmap(ctx, dst);
       } else {
 	 const char *src_ptr;
 	 char *dst_ptr;
 
-	 src_ptr =  intel_bufferobj_map(ctx, GL_READ_ONLY, src);
-	 dst_ptr =  intel_bufferobj_map(ctx, GL_WRITE_ONLY, dst);
+	 src_ptr =  intel_bufferobj_map_range(ctx, 0, src->Size,
+					      GL_MAP_READ_BIT, src);
+	 dst_ptr =  intel_bufferobj_map_range(ctx, 0, dst->Size,
+					      GL_MAP_WRITE_BIT, dst);
 
 	 memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
 
@@ -923,7 +869,6 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions)
    functions->BufferData = intel_bufferobj_data;
    functions->BufferSubData = intel_bufferobj_subdata;
    functions->GetBufferSubData = intel_bufferobj_get_subdata;
-   functions->MapBuffer = intel_bufferobj_map;
    functions->MapBufferRange = intel_bufferobj_map_range;
    functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
    functions->UnmapBuffer = intel_bufferobj_unmap;
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 44d79534ffd..d9873a303ee 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -74,9 +74,9 @@ static const GLubyte *map_pbo( struct gl_context *ctx,
       return NULL;
    }
 
-   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-					   GL_READ_ONLY_ARB,
-					   unpack->BufferObj);
+   buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+						GL_MAP_READ_BIT,
+						unpack->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
       return NULL;
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index cf892a893f8..433590c4181 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -122,22 +122,6 @@ nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset,
 	memcpy(data, get_bufferobj_map(obj, NOUVEAU_BO_RD) + offset, size);
 }
 
-static void *
-nouveau_bufferobj_map(struct gl_context *ctx, GLenum access,
-		   struct gl_buffer_object *obj)
-{
-	unsigned flags = 0;
-
-	if (access == GL_READ_ONLY_ARB ||
-	    access == GL_READ_WRITE_ARB)
-		flags |= GL_MAP_READ_BIT;
-	if (access == GL_WRITE_ONLY_ARB ||
-	    access == GL_READ_WRITE_ARB)
-		flags |= GL_MAP_WRITE_BIT;
-
-	return ctx->Driver.MapBufferRange(ctx, 0, obj->Size, flags, obj);
-}
-
 static void *
 nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offset,
 			    GLsizeiptr length, GLbitfield access,
@@ -188,7 +172,6 @@ nouveau_bufferobj_functions_init(struct dd_function_table *functions)
 	functions->BufferData = nouveau_bufferobj_data;
 	functions->BufferSubData = nouveau_bufferobj_subdata;
 	functions->GetBufferSubData = nouveau_bufferobj_get_subdata;
-	functions->MapBuffer = nouveau_bufferobj_map;
 	functions->MapBufferRange = nouveau_bufferobj_map_range;
 	functions->UnmapBuffer = nouveau_bufferobj_unmap;
 }
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index ba37923736c..5587c16dd44 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -84,7 +84,8 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 	GLboolean mapped_named_bo = GL_FALSE;
 
 	if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
-		ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+		ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+					   GL_MAP_READ_BIT, mesa_ind_buf->obj);
 		mapped_named_bo = GL_TRUE;
 		assert(mesa_ind_buf->obj->Pointer != NULL);
 	}
@@ -163,7 +164,10 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 		GLboolean mapped_named_bo = GL_FALSE;
 
 		if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
-			ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+			ctx->Driver.MapBufferRange(ctx, 0,
+						   mesa_ind_buf->obj->Size,
+						   GL_MAP_READ_BIT,
+						   mesa_ind_buf->obj);
 			assert(mesa_ind_buf->obj->Pointer != NULL);
 			mapped_named_bo = GL_TRUE;
 		}
@@ -235,7 +239,8 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl
 
 	if (input->BufferObj->Name) {
 		if (!input->BufferObj->Pointer) {
-			ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+			ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+					      GL_MAP_READ_BIT, input->BufferObj);
 			mapped_named_bo = GL_TRUE;
 		}
 
@@ -302,7 +307,8 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_
 	radeon_bo_map(attr->bo, 1);
 
 	if (!input->BufferObj->Pointer) {
-		ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+		ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+					   GL_MAP_READ_BIT, input->BufferObj);
 		mapped_named_bo = GL_TRUE;
 	}
 
diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c
index e82f4d445f5..74563caf47c 100644
--- a/src/mesa/drivers/dri/r600/evergreen_render.c
+++ b/src/mesa/drivers/dri/r600/evergreen_render.c
@@ -403,7 +403,8 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count,
     {
         if (!input->BufferObj->Pointer) 
         {
-            ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+	    ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				       GL_MAP_READ_BIT, input->BufferObj);
             mapped_named_bo = GL_TRUE;
         }
 
@@ -470,7 +471,8 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
     {
-        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+        ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+				   GL_MAP_READ_BIT, mesa_ind_buf->obj);
         mapped_named_bo = GL_TRUE;
         assert(mesa_ind_buf->obj->Pointer != NULL);
     }
@@ -606,7 +608,8 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
         {
-	        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+	        ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+					   GL_MAP_READ_BIT, mesa_ind_buf->obj);
 	        assert(mesa_ind_buf->obj->Pointer != NULL);
 	        mapped_named_bo = GL_TRUE;
         }
@@ -655,7 +658,8 @@ static void evergreenAlignDataToDword(struct gl_context *ctx,
 
     if (!input->BufferObj->Pointer) 
     {
-        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+	ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				   GL_MAP_READ_BIT, input->BufferObj->obj);
         mapped_named_bo = GL_TRUE;
     }
 
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 52a6f7cc45e..a565c9f2087 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -490,7 +490,8 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count,
     {
         if (!input->BufferObj->Pointer) 
         {
-            ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+	   ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				      GL_MAP_READ_BIT, input->BufferObj);
             mapped_named_bo = GL_TRUE;
         }
 
@@ -564,7 +565,8 @@ static void r700AlignDataToDword(struct gl_context *ctx,
 
     if (!input->BufferObj->Pointer) 
     {
-        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+        ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				   GL_MAP_READ_BIT, input->BufferObj);
         mapped_named_bo = GL_TRUE;
     }
 
@@ -727,7 +729,8 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
     {
-        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+	ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+				   GL_MAP_READ_BIT, mesa_ind_buf->obj);
         mapped_named_bo = GL_TRUE;
         assert(mesa_ind_buf->obj->Pointer != NULL);
     }
@@ -813,7 +816,8 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
         {
-	        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+		ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+					   GL_MAP_READ_BIT, mesa_ind_buf->obj);
 	        assert(mesa_ind_buf->obj->Pointer != NULL);
 	        mapped_named_bo = GL_TRUE;
         }
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index e106d98c38e..7b59c0377f8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -169,36 +169,7 @@ radeonGetBufferSubData(struct gl_context * ctx,
 }
 
 /**
- * Called via glMapBufferARB()
- */
-static void *
-radeonMapBuffer(struct gl_context * ctx,
-                GLenum access,
-                struct gl_buffer_object *obj)
-{
-    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
-
-    if (access == GL_WRITE_ONLY_ARB) {
-        ctx->Driver.Flush(ctx);
-    }
-
-    if (radeon_obj->bo == NULL) {
-        obj->Pointer = NULL;
-        return NULL;
-    }
-
-    radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB);
-
-    obj->Pointer = radeon_obj->bo->ptr;
-    obj->Length = obj->Size;
-    obj->Offset = 0;
-
-    return obj->Pointer;
-}
-
-
-/**
- * Called via glMapBufferRange()
+ * Called via glMapBuffer() and glMapBufferRange()
  */
 static void *
 radeonMapBufferRange(struct gl_context * ctx,
@@ -257,7 +228,6 @@ radeonInitBufferObjectFuncs(struct dd_function_table *functions)
     functions->BufferData = radeonBufferData;
     functions->BufferSubData = radeonBufferSubData;
     functions->GetBufferSubData = radeonGetBufferSubData;
-    functions->MapBuffer = radeonMapBuffer;
     functions->MapBufferRange = radeonMapBufferRange;
     functions->UnmapBuffer = radeonUnmapBuffer;
 }
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index fe00bdd520d..3a5d0ae04fc 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -454,9 +454,10 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx,
                         "glDrawPixels(invalid PBO access)");
             return;
          }
-         buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                                 GL_READ_ONLY_ARB,
-                                                 unpack->BufferObj);
+         buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+						      unpack->BufferObj->Size,
+						      GL_MAP_READ_BIT,
+						      unpack->BufferObj);
          if (!buf) {
             /* buffer is already mapped - that's an error */
             _mesa_error(ctx, GL_INVALID_OPERATION,
@@ -587,10 +588,10 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx,
                         "glDrawPixels(invalid PBO access)");
             return;
          }
-         buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                                 GL_PIXEL_UNPACK_BUFFER_EXT,
-                                                 GL_READ_ONLY_ARB,
-                                                 unpack->BufferObj);
+         buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0
+						      unpack->BufferObj->Size,
+						      GL_MAP_READ_BIT,
+						      unpack->BufferObj);
          if (!buf) {
             /* buffer is already mapped - that's an error */
             _mesa_error(ctx, GL_INVALID_OPERATION,
diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index 8e1e3ff8dd5..b93a057e68b 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1602,7 +1602,10 @@ void _ae_map_vbos( struct gl_context *ctx )
       _ae_update_state(ctx);
 
    for (i = 0; i < actx->nr_vbos; i++)
-      ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, actx->vbo[i]);
+      ctx->Driver.MapBufferRange(ctx, 0,
+				 actx->vbo[i]->Size,
+				 GL_MAP_READ_BIT,
+				 actx->vbo[i]);
 
    if (actx->nr_vbos)
       actx->mapped_vbos = GL_TRUE;
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 507d0ce6883..699b414f502 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -65,7 +65,8 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type,
 
    if (_mesa_is_bufferobj(elementBuf)) {
       /* elements are in a user-defined buffer object.  need to map it */
-      map = ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, elementBuf);
+      map = ctx->Driver.MapBufferRange(ctx, 0, elementBuf->Size,
+				       GL_MAP_READ_BIT, elementBuf);
       /* Actual address is the sum of pointers */
       indices = (const GLvoid *) ADD_POINTERS(map, (const GLubyte *) indices);
    }
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 47af8b59587..c453f9c8554 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -431,38 +431,6 @@ _mesa_buffer_get_subdata( struct gl_context *ctx, GLintptrARB offset,
 }
 
 
-/**
- * Default callback for \c dd_function_tabel::MapBuffer().
- *
- * The function parameters will have been already tested for errors.
- *
- * \param ctx     GL context.
- * \param target  Buffer object target on which to operate.
- * \param access  Information about how the buffer will be accessed.
- * \param bufObj  Object to be mapped.
- * \return  A pointer to the object's internal data store that can be accessed
- *          by the processor
- *
- * \sa glMapBufferARB, dd_function_table::MapBuffer
- */
-static void *
-_mesa_buffer_map( struct gl_context *ctx, GLenum access,
-		  struct gl_buffer_object *bufObj )
-{
-   (void) ctx;
-   (void) access;
-   /* Just return a direct pointer to the data */
-   if (_mesa_bufferobj_mapped(bufObj)) {
-      /* already mapped! */
-      return NULL;
-   }
-   bufObj->Pointer = bufObj->Data;
-   bufObj->Length = bufObj->Size;
-   bufObj->Offset = 0;
-   return bufObj->Pointer;
-}
-
-
 /**
  * Default fallback for \c dd_function_table::MapBufferRange().
  * Called via glMapBufferRange().
@@ -537,8 +505,10 @@ _mesa_copy_buffer_subdata(struct gl_context *ctx,
    assert(!_mesa_bufferobj_mapped(src));
    assert(!_mesa_bufferobj_mapped(dst));
 
-   srcPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, src);
-   dstPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY, dst);
+   srcPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, src->Size,
+						   GL_MAP_READ_BIT, src);
+   dstPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, dst->Size,
+						   GL_MAP_WRITE_BIT, dst);
 
    if (srcPtr && dstPtr)
       memcpy(dstPtr + writeOffset, srcPtr + readOffset, size);
@@ -704,7 +674,6 @@ _mesa_init_buffer_object_functions(struct dd_function_table *driver)
    driver->BufferData = _mesa_buffer_data;
    driver->BufferSubData = _mesa_buffer_subdata;
    driver->GetBufferSubData = _mesa_buffer_get_subdata;
-   driver->MapBuffer = _mesa_buffer_map;
    driver->UnmapBuffer = _mesa_buffer_unmap;
 
    /* GL_ARB_map_buffer_range */
@@ -1035,8 +1004,8 @@ _mesa_MapBufferARB(GLenum target, GLenum access)
       return NULL;
    }
 
-   ASSERT(ctx->Driver.MapBuffer);
-   map = ctx->Driver.MapBuffer( ctx, access, bufObj );
+   ASSERT(ctx->Driver.MapBufferRange);
+   map = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size, accessFlags, bufObj);
    if (!map) {
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)");
       return NULL;
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 8dfea937f37..fcf40ecf102 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -706,9 +706,6 @@ struct dd_function_table {
 			     GLintptrARB offset, GLsizeiptrARB size,
 			     GLvoid *data, struct gl_buffer_object *obj );
 
-   void * (*MapBuffer)( struct gl_context *ctx, GLenum access,
-			struct gl_buffer_object *obj );
-
    void (*CopyBufferSubData)( struct gl_context *ctx,
                               struct gl_buffer_object *src,
                               struct gl_buffer_object *dst,
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index a135810ed27..6e075b4e54b 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -894,7 +894,8 @@ unpack_image(struct gl_context *ctx, GLuint dimensions,
       GLvoid *image;
 
       map = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, unpack->BufferObj);
+         ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+				    GL_MAP_READ_BIT, unpack->BufferObj);
       if (!map) {
          /* unable to map src buffer! */
          _mesa_error(ctx, GL_INVALID_OPERATION, "unable to map PBO");
diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c
index ce362b9e444..4e7e6f925cc 100644
--- a/src/mesa/main/pbo.c
+++ b/src/mesa/main/pbo.c
@@ -128,9 +128,10 @@ _mesa_map_pbo_source(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
       /* unpack from PBO */
-      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                              GL_READ_ONLY_ARB,
-                                              unpack->BufferObj);
+      buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+						   unpack->BufferObj->Size,
+						   GL_MAP_READ_BIT,
+						   unpack->BufferObj);
       if (!buf)
          return NULL;
 
@@ -223,9 +224,10 @@ _mesa_map_pbo_dest(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(pack->BufferObj)) {
       /* pack into PBO */
-      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                              GL_WRITE_ONLY_ARB,
-                                              pack->BufferObj);
+      buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+						   pack->BufferObj->Size,
+						   GL_MAP_WRITE_BIT,
+						   pack->BufferObj);
       if (!buf)
          return NULL;
 
@@ -326,8 +328,9 @@ _mesa_validate_pbo_teximage(struct gl_context *ctx, GLuint dimensions,
       return NULL;
    }
 
-   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB,
-					   unpack->BufferObj);
+   buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+						GL_MAP_READ_BIT,
+						unpack->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped)");
       return NULL;
@@ -363,7 +366,10 @@ _mesa_validate_pbo_compressed_teximage(struct gl_context *ctx,
       return NULL;
    }
 
-   buf = (GLubyte*) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, packing->BufferObj);
+   buf = (GLubyte*) ctx->Driver.MapBufferRange(ctx, 0,
+					       packing->BufferObj->Size,
+					       GL_MAP_READ_BIT,
+					       packing->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped");
       return NULL;
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index a54da7160c7..b2ebb0de475 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -441,7 +441,8 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level,
        * texture data to the PBO if the PBO is in VRAM along with the texture.
        */
       GLubyte *buf = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+         ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size,
+				    GL_MAP_WRITE_BIT, ctx->Pack.BufferObj);
       if (!buf) {
          /* out of memory or other unexpected error */
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage(map PBO failed)");
@@ -498,7 +499,8 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
       /* pack texture image into a PBO */
       GLubyte *buf = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+         ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size,
+				    GL_MAP_WRITE_BIT, ctx->Pack.BufferObj);
       if (!buf) {
          /* out of memory or other unexpected error */
          _mesa_error(ctx, GL_OUT_OF_MEMORY,
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index dd2eca7bc96..a451b44049e 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -235,48 +235,6 @@ static long st_bufferobj_zero_length = 0;
 
 
-/**
- * Called via glMapBufferARB().
- */
-static void *
-st_bufferobj_map(struct gl_context *ctx, GLenum access,
-                 struct gl_buffer_object *obj)
-{
-   struct st_buffer_object *st_obj = st_buffer_object(obj);
-   uint flags;
-
-   switch (access) {
-   case GL_WRITE_ONLY:
-      flags = PIPE_TRANSFER_WRITE;
-      break;
-   case GL_READ_ONLY:
-      flags = PIPE_TRANSFER_READ;
-      break;
-   case GL_READ_WRITE:
-   default:
-      flags = PIPE_TRANSFER_READ_WRITE;
-      break;      
-   }
-
-   /* Handle zero-size buffers here rather than in drivers */
-   if (obj->Size == 0) {
-      obj->Pointer = &st_bufferobj_zero_length;
-   }
-   else {
-      obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe,
-                                     st_obj->buffer,
-                                     flags,
-                                     &st_obj->transfer);
-   }
-
-   if (obj->Pointer) {
-      obj->Offset = 0;
-      obj->Length = obj->Size;
-   }
-   return obj->Pointer;
-}
-
-
 /**
  * Called via glMapBufferRange().
  */
@@ -442,7 +400,6 @@ st_init_bufferobject_functions(struct dd_function_table *functions)
    functions->BufferData = st_bufferobj_data;
    functions->BufferSubData = st_bufferobj_subdata;
    functions->GetBufferSubData = st_bufferobj_get_subdata;
-   functions->MapBuffer = st_bufferobj_map;
    functions->MapBufferRange = st_bufferobj_map_range;
    functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range;
    functions->UnmapBuffer = st_bufferobj_unmap;
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index 7351f6f3be6..a23d1754391 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -280,9 +280,9 @@ static void bind_inputs( struct gl_context *ctx,
 	 if (!inputs[i]->BufferObj->Pointer) {
 	    bo[*nr_bo] = inputs[i]->BufferObj;
 	    (*nr_bo)++;
-	    ctx->Driver.MapBuffer(ctx, 
-				  GL_READ_ONLY_ARB,
-				  inputs[i]->BufferObj);
+	    ctx->Driver.MapBufferRange(ctx, 0, inputs[i]->BufferObj->Size,
+				       GL_MAP_READ_BIT,
+				       inputs[i]->BufferObj);
 	    
 	    assert(inputs[i]->BufferObj->Pointer);
 	 }
@@ -349,9 +349,8 @@ static void bind_indices( struct gl_context *ctx,
    if (ib->obj->Name && !ib->obj->Pointer) {
       bo[*nr_bo] = ib->obj;
       (*nr_bo)++;
-      ctx->Driver.MapBuffer(ctx, 
-			    GL_READ_ONLY_ARB,
-			    ib->obj);
+      ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
+				 ib->obj);
 
       assert(ib->obj->Pointer);
    }
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 8359a7f1529..484e1a85e41 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -96,7 +96,8 @@ vbo_get_minmax_index(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(ib->obj)) {
       const GLvoid *map =
-         ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, ib->obj);
+         ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
+				    ib->obj);
       indices = ADD_POINTERS(map, ib->ptr);
    } else {
       indices = ib->ptr;
@@ -195,7 +196,8 @@ check_array_data(struct gl_context *ctx, struct gl_client_array *array,
          if (!array->BufferObj->Pointer) {
             /* need to map now */
             array->BufferObj->Pointer =
-               ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, array->BufferObj);
+               ctx->Driver.MapBufferRange(ctx, 0, array->BufferObj->Size,
+					  GL_MAP_READ_BIT, array->BufferObj);
          }
          data = ADD_POINTERS(data, array->BufferObj->Pointer);
       }
@@ -254,9 +256,10 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
    GLint i, k;
 
    if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
-      elemMap = ctx->Driver.MapBuffer(ctx,
-                                      GL_READ_ONLY,
-                                      ctx->Array.ElementArrayBufferObj);
+      elemMap = ctx->Driver.MapBufferRange(ctx, 0,
+					   ctx->Array.ElementArrayBufferObj->Size,
+					   GL_MAP_READ_BIT,
+					   ctx->Array.ElementArrayBufferObj);
       elements = ADD_POINTERS(elements, elemMap);
    }
 
@@ -347,7 +350,8 @@ print_draw_arrays(struct gl_context *ctx,
 	     bufName);
 
       if (bufName) {
-         GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, bufObj);
+         GLubyte *p = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size,
+						 GL_MAP_READ_BIT, bufObj);
          int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr;
          float *f = (float *) (p + offset);
          int *k = (int *) f;
@@ -710,9 +714,11 @@ vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
 static void
 dump_element_buffer(struct gl_context *ctx, GLenum type)
 {
-   const GLvoid *map = ctx->Driver.MapBuffer(ctx,
-                                             GL_READ_ONLY,
-                                             ctx->Array.ElementArrayBufferObj);
+   const GLvoid *map =
+      ctx->Driver.MapBufferRange(ctx, 0,
+				 ctx->Array.ElementArrayBufferObj->Size,
+				 GL_MAP_READ_BIT,
+				 ctx->Array.ElementArrayBufferObj);
    switch (type) {
    case GL_UNSIGNED_BYTE:
       {
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 2dc60661796..25436c6d6d2 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -296,7 +296,6 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
 {
    struct gl_context *ctx = exec->ctx;
    const GLenum target = GL_ARRAY_BUFFER_ARB;
-   const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */
    const GLenum accessRange = GL_MAP_WRITE_BIT |  /* for MapBufferRange */
                               GL_MAP_INVALIDATE_RANGE_BIT |
                               GL_MAP_UNSYNCHRONIZED_BIT |
@@ -310,8 +309,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
    assert(!exec->vtx.buffer_map);
    assert(!exec->vtx.buffer_ptr);
 
-   if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 &&
-       ctx->Driver.MapBufferRange) {
+   if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024) {
       /* The VBO exists and there's room for more */
       exec->vtx.buffer_map = 
          (GLfloat *)ctx->Driver.MapBufferRange(ctx, 
@@ -332,15 +330,11 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
                              NULL, usage, exec->vtx.bufferobj);
 
 
-      if (ctx->Driver.MapBufferRange)
-         exec->vtx.buffer_map = 
-            (GLfloat *)ctx->Driver.MapBufferRange(ctx,
-                                                  0, VBO_VERT_BUFFER_SIZE,
-                                                  accessRange,
-                                                  exec->vtx.bufferobj);
-      if (!exec->vtx.buffer_map)
-         exec->vtx.buffer_map =
-            (GLfloat *)ctx->Driver.MapBuffer(ctx, access, exec->vtx.bufferobj);
+      exec->vtx.buffer_map =
+	 (GLfloat *)ctx->Driver.MapBufferRange(ctx,
+					       0, VBO_VERT_BUFFER_SIZE,
+					       accessRange,
+					       exec->vtx.bufferobj);
       assert(exec->vtx.buffer_map);
       exec->vtx.buffer_ptr = exec->vtx.buffer_map;
    }
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c
index e10908d5ece..a1eab752ad6 100644
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/vbo/vbo_rebase.c
@@ -159,7 +159,8 @@ void vbo_rebase_prims( struct gl_context *ctx,
       void *ptr;
 
       if (map_ib) 
-	 ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, ib->obj);
+	 ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
+				    ib->obj);
 
 
       ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index f90f00c5aae..ad36e93329c 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -232,10 +232,10 @@ map_vertex_store(struct gl_context *ctx,
    assert(vertex_store->bufferobj);
    assert(!vertex_store->buffer);
    vertex_store->buffer =
-      (GLfloat *) ctx->Driver.MapBuffer(ctx,
-                                        GL_WRITE_ONLY,      /* not used */
-                                        vertex_store->
-                                        bufferobj);
+      (GLfloat *) ctx->Driver.MapBufferRange(ctx, 0,
+					     vertex_store->bufferobj->Size,
+					     GL_MAP_WRITE_BIT,    /* not used */
+					     vertex_store->bufferobj);
 
    assert(vertex_store->buffer);
    return vertex_store->buffer + vertex_store->used;
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index e7996f29307..6cda831aa85 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -217,9 +217,11 @@ static void
 vbo_save_loopback_vertex_list(struct gl_context *ctx,
                               const struct vbo_save_vertex_list *list)
 {
-   const char *buffer = ctx->Driver.MapBuffer(ctx, 
-					      GL_READ_ONLY, /* ? */
-                                              list->vertex_store->bufferobj);
+   const char *buffer =
+      ctx->Driver.MapBufferRange(ctx, 0,
+				 list->vertex_store->bufferobj->Size,
+				 GL_MAP_READ_BIT, /* ? */
+				 list->vertex_store->bufferobj);
 
    vbo_loopback_vertex_list(ctx,
                             (const GLfloat *)(buffer + list->buffer_offset),
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index 8dc5aa0ed76..40906e38917 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -444,7 +444,7 @@ replay_init( struct copy_context *copy )
 	 copy->vertex_size += attr_size(copy->array[i]);
       
 	 if (_mesa_is_bufferobj(vbo) && !_mesa_bufferobj_mapped(vbo)) 
-	    ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, vbo);
+	    ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo);
 
 	 copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer,
 						 copy->array[i]->Ptr);
@@ -459,7 +459,8 @@ replay_init( struct copy_context *copy )
     */
    if (_mesa_is_bufferobj(copy->ib->obj) &&
        !_mesa_bufferobj_mapped(copy->ib->obj)) 
-      ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, copy->ib->obj);
+      ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
+				 copy->ib->obj);
 
    srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer,
                                            copy->ib->ptr);
-- 
cgit v1.2.3


From 655c7d7498390ab69623e308abe5db4a8489a25c Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 22 Aug 2011 00:14:51 -0700
Subject: mesa: Only map the necessary buffer range in vbo_get_minmax_index

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/vbo/vbo_exec_array.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 484e1a85e41..18719d5f537 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -95,10 +95,25 @@ vbo_get_minmax_index(struct gl_context *ctx,
    GLuint i;
 
    if (_mesa_is_bufferobj(ib->obj)) {
-      const GLvoid *map =
-         ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
-				    ib->obj);
-      indices = ADD_POINTERS(map, ib->ptr);
+      unsigned map_size;
+
+      switch (ib->type) {
+      case GL_UNSIGNED_INT:
+	 map_size = count * sizeof(GLuint);
+	 break;
+      case GL_UNSIGNED_SHORT:
+	 map_size = count * sizeof(GLushort);
+	 break;
+      case GL_UNSIGNED_BYTE:
+	 map_size = count * sizeof(GLubyte);
+	 break;
+      default:
+	 assert(0);
+	 map_size = 0;
+      }
+
+      indices = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size,
+					   GL_MAP_READ_BIT, ib->obj);
    } else {
       indices = ib->ptr;
    }
-- 
cgit v1.2.3


From 2ea1ff38164d95f8291ef2e5dfe2cb13936a60f2 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 22 Aug 2011 00:31:19 -0700
Subject: tnl: Only map the necessary buffer range in bind_indices

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/tnl/t_draw.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index a23d1754391..86af4b7cfe2 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -347,16 +347,32 @@ static void bind_indices( struct gl_context *ctx,
    }
 
    if (ib->obj->Name && !ib->obj->Pointer) {
+      unsigned map_size;
+
+      switch (ib->type) {
+      case GL_UNSIGNED_BYTE:
+	 map_size = ib->count * sizeof(GLubyte);
+	 break;
+      case GL_UNSIGNED_SHORT:
+	 map_size = ib->count * sizeof(GLushort);
+	 break;
+      case GL_UNSIGNED_INT:
+	 map_size = ib->count * sizeof(GLuint);
+	 break;
+      default:
+	 assert(0);
+	 map_size = 0;
+      }
+
       bo[*nr_bo] = ib->obj;
       (*nr_bo)++;
-      ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
-				 ib->obj);
-
+      ptr = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size,
+				       GL_MAP_READ_BIT, ib->obj);
       assert(ib->obj->Pointer);
+   } else {
+      ptr = ib->ptr;
    }
 
-   ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
-
    if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) {
       VB->Elts = (GLuint *) ptr;
    }
-- 
cgit v1.2.3


From 0d636213d491f88726155e12c3b445a88e0f1cd8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 22 Aug 2011 10:52:47 -0700
Subject: i965: Only map the necessary buffer range in brw_prepare_indices

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index ed1964f5a6f..7bc69c612e3 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -690,11 +690,10 @@ static void brw_prepare_indices(struct brw_context *brw)
        */
        if ((get_size(index_buffer->type) - 1) & offset) {
            GLubyte *map = ctx->Driver.MapBufferRange(ctx,
-						     0,
-						     bufferobj->Size,
+						     offset,
+						     ib_size,
 						     GL_MAP_WRITE_BIT,
 						     bufferobj);
-           map += offset;
 
 	   intel_upload_data(&brw->intel, map, ib_size, ib_type_size,
 			     &bo, &offset);
-- 
cgit v1.2.3


From c3ad95ed40fca72dbc6c157de2948cb6d074aaac Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 24 Aug 2011 07:55:04 -0600
Subject: x11: add missing comma to fix compilation

---
 src/mesa/drivers/x11/xm_dd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index 3a5d0ae04fc..81d000b3952 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -588,7 +588,7 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx,
                         "glDrawPixels(invalid PBO access)");
             return;
          }
-         buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0
+         buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
 						      unpack->BufferObj->Size,
 						      GL_MAP_READ_BIT,
 						      unpack->BufferObj);
-- 
cgit v1.2.3


From a231d245514b43cf547ccb996b68efc0186e6821 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 14 Jul 2011 20:57:34 -0600
Subject: mesa: add gl_texture_image::Face, Level fields

Several drivers have these fields in their subclasses of gl_texture_image.
They'll be useful for core Mesa too...

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/main/mtypes.h   | 3 +++
 src/mesa/main/teximage.c | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'src/mesa')

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 8b3650321db..f2eb889feb4 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1279,6 +1279,9 @@ struct gl_texture_image
    GLboolean _IsPowerOfTwo;	/**< Are all dimensions powers of two? */
 
    struct gl_texture_object *TexObject;  /**< Pointer back to parent object */
+   GLuint Level;                /**< Which mipmap level am I? */
+   /** Cube map face: index into gl_texture_object::Image[] array */
+   GLuint Face;
 
    FetchTexelFuncC FetchTexelc;	/**< GLchan texel fetch function pointer */
    FetchTexelFuncF FetchTexelf;	/**< Float texel fetch function pointer */
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 27717cfb0f5..a005d2935fa 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -571,6 +571,8 @@ _mesa_set_tex_image(struct gl_texture_object *tObj,
 
    /* Set the 'back' pointer */
    texImage->TexObject = tObj;
+   texImage->Level = level;
+   texImage->Face = face;
 }
 
 
-- 
cgit v1.2.3


From 7dae1aaf142999e3cfeafb13d30abda667d66d87 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 14 Jul 2011 20:57:34 -0600
Subject: intel: use new gl_texture_image:Face, Level fields

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/intel/intel_blit.c         |  4 +--
 src/mesa/drivers/dri/intel/intel_fbo.c          | 12 ++++-----
 src/mesa/drivers/dri/intel/intel_mipmap_tree.c  |  2 +-
 src/mesa/drivers/dri/intel/intel_tex.c          |  5 ----
 src/mesa/drivers/dri/intel/intel_tex_copy.c     |  4 +--
 src/mesa/drivers/dri/intel/intel_tex_image.c    | 35 ++++++++++---------------
 src/mesa/drivers/dri/intel/intel_tex_obj.h      |  5 ----
 src/mesa/drivers/dri/intel/intel_tex_subimage.c |  6 ++---
 src/mesa/drivers/dri/intel/intel_tex_validate.c | 12 ++++-----
 9 files changed, 34 insertions(+), 51 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 30be1b9382f..b18dd2922d9 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -541,8 +541,8 @@ intel_set_teximage_alpha_to_one(struct gl_context *ctx,
 
    /* get dest x/y in destination texture */
    intel_miptree_get_image_offset(intel_image->mt,
-				  intel_image->level,
-				  intel_image->face,
+				  intel_image->base.Level,
+				  intel_image->base.Face,
 				  0,
 				  &image_x, &image_y);
 
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 65ad621e770..754f9f202d1 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -600,8 +600,8 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb,
 
    /* compute offset of the particular 2D image within the texture region */
    intel_miptree_get_image_offset(intel_image->mt,
-				  intel_image->level,
-				  intel_image->face,
+				  intel_image->base.Level,
+				  intel_image->base.Face,
 				  zoffset,
 				  &dst_x, &dst_y);
 
@@ -728,8 +728,8 @@ intel_render_texture(struct gl_context * ctx,
 
       new_mt = intel_miptree_create(intel, image->TexObject->Target,
 				    intel_image->base.TexFormat,
-				    intel_image->level,
-				    intel_image->level,
+				    intel_image->base.Level,
+				    intel_image->base.Level,
 				    intel_image->base.Width,
 				    intel_image->base.Height,
 				    intel_image->base.Depth,
@@ -737,8 +737,8 @@ intel_render_texture(struct gl_context * ctx,
 
       intel_miptree_image_copy(intel,
                                new_mt,
-                               intel_image->face,
-			       intel_image->level,
+			       intel_image->base.Face,
+			       intel_image->base.Level,
 			       old_mt);
 
       intel_miptree_release(intel, &intel_image->mt);
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 4e711de1ce1..f36240d7f1d 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -227,7 +227,7 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt,
                           struct gl_texture_image *image)
 {
    struct intel_texture_image *intelImage = intel_texture_image(image);
-   GLuint level = intelImage->level;
+   GLuint level = intelImage->base.Level;
 
    /* Images with borders are never pulled into mipmap trees. */
    if (image->Border)
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index 21c4a1dddba..ee0cd252375 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -95,17 +95,12 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target,
       if (!_mesa_is_format_compressed(first_image->TexFormat)) {
          GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
          GLuint face, i;
-         /* Update the level information in our private data in the new images,
-          * since it didn't get set as part of a normal TexImage path.
-          */
          for (face = 0; face < nr_faces; face++) {
             for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
                struct intel_texture_image *intelImage =
                   intel_texture_image(texObj->Image[face][i]);
                if (!intelImage)
                   break;
-               intelImage->level = i;
-               intelImage->face = face;
                /* Unreference the miptree to signal that the new Data is a
                 * bare pointer from mesa.
                 */
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index e89e91dee3e..600bd1251e0 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -118,8 +118,8 @@ intel_copy_texsubimage(struct intel_context *intel,
 
       /* get dest x/y in destination texture */
       intel_miptree_get_image_offset(intelImage->mt,
-				     intelImage->level,
-				     intelImage->face,
+				     intelImage->base.Level,
+				     intelImage->base.Face,
 				     0,
 				     &image_x, &image_y);
 
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 1f8b885bbec..4ee66847255 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -63,7 +63,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
    if (intelImage->base.Border)
       return NULL;
 
-   if (intelImage->level > intelObj->base.BaseLevel &&
+   if (intelImage->base.Level > intelObj->base.BaseLevel &&
        (intelImage->base.Width == 1 ||
         (intelObj->base.Target != GL_TEXTURE_1D &&
          intelImage->base.Height == 1) ||
@@ -74,19 +74,19 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
        * likely base level width/height/depth for a full mipmap stack
        * from this info, so just allocate this one level.
        */
-      firstLevel = intelImage->level;
-      lastLevel = intelImage->level;
+      firstLevel = intelImage->base.Level;
+      lastLevel = intelImage->base.Level;
    } else {
       /* If this image disrespects BaseLevel, allocate from level zero.
        * Usually BaseLevel == 0, so it's unlikely to happen.
        */
-      if (intelImage->level < intelObj->base.BaseLevel)
+      if (intelImage->base.Level < intelObj->base.BaseLevel)
 	 firstLevel = 0;
       else
 	 firstLevel = intelObj->base.BaseLevel;
 
       /* Figure out image dimensions at start level. */
-      for (i = intelImage->level; i > firstLevel; i--) {
+      for (i = intelImage->base.Level; i > firstLevel; i--) {
 	 width <<= 1;
 	 if (height != 1)
 	    height <<= 1;
@@ -101,7 +101,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
        */
       if ((intelObj->base.Sampler.MinFilter == GL_NEAREST ||
 	   intelObj->base.Sampler.MinFilter == GL_LINEAR) &&
-	  intelImage->level == firstLevel &&
+	  intelImage->base.Level == firstLevel &&
 	  (intel->gen < 4 || firstLevel == 0)) {
 	 lastLevel = firstLevel;
       } else {
@@ -186,8 +186,8 @@ try_pbo_upload(struct intel_context *intel,
    else
       src_stride = width;
 
-   intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
-				  intelImage->face, 0,
+   intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level,
+				  intelImage->base.Face, 0,
 				  &dst_x, &dst_y);
 
    dst_stride = intelImage->mt->region->pitch;
@@ -243,8 +243,8 @@ try_pbo_zcopy(struct intel_context *intel,
    else
       src_stride = width;
 
-   intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
-				  intelImage->face, 0,
+   intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level,
+				  intelImage->base.Face, 0,
 				  &dst_x, &dst_y);
 
    dst_stride = intelImage->mt->region->pitch;
@@ -407,9 +407,6 @@ intelTexImage(struct gl_context * ctx,
    DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
        _mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
 
-   intelImage->face = _mesa_tex_target_to_face(target);
-   intelImage->level = level;
-
    if (_mesa_is_format_compressed(texImage->TexFormat)) {
       texelBytes = 0;
    }
@@ -514,8 +511,8 @@ intelTexImage(struct gl_context * ctx,
 	 }
          texImage->Data = intel_miptree_image_map(intel,
                                                   intelImage->mt,
-                                                  intelImage->face,
-                                                  intelImage->level,
+                                                  intelImage->base.Face,
+                                                  intelImage->base.Level,
                                                   &dstRowStride,
                                                   intelImage->base.ImageOffsets);
       }
@@ -684,8 +681,8 @@ intel_get_tex_image(struct gl_context * ctx, GLenum target, GLint level,
       intelImage->base.Data =
          intel_miptree_image_map(intel,
                                  intelImage->mt,
-                                 intelImage->face,
-                                 intelImage->level,
+                                 intelImage->base.Face,
+                                 intelImage->base.Level,
                                  &intelImage->base.RowStride,
                                  intelImage->base.ImageOffsets);
       intelImage->base.RowStride /= intelImage->mt->cpp;
@@ -816,8 +813,6 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
 			      rb->region->width, rb->region->height, 1,
 			      0, internalFormat, texFormat);
 
-   intelImage->face = _mesa_tex_target_to_face(target);
-   intelImage->level = level;
    texImage->RowStride = rb->region->pitch;
    intel_miptree_reference(&intelImage->mt, intelObj->mt);
 
@@ -874,8 +869,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
 			      image->region->width, image->region->height, 1,
 			      0, image->internal_format, image->format);
 
-   intelImage->face = _mesa_tex_target_to_face(target);
-   intelImage->level = 0;
    texImage->RowStride = image->region->pitch;
    intel_miptree_reference(&intelImage->mt, intelObj->mt);
 
diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h
index a9ae2ec5429..e7a4318b8d8 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h
@@ -52,11 +52,6 @@ struct intel_texture_image
 {
    struct gl_texture_image base;
 
-   /* These aren't stored in gl_texture_image 
-    */
-   GLuint level;
-   GLuint face;
-
    /* If intelImage->mt != NULL, image data is stored here.
     * Else if intelImage->base.Data != NULL, image is stored there.
     * Else there is no image data.
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index 8b43c406cf9..5fd2cc36234 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -113,7 +113,7 @@ intelTexSubimage(struct gl_context * ctx,
 	 dstRowStride = pitch;
 
 	 intel_miptree_get_image_offset(intelImage->mt, level,
-					intelImage->face, 0,
+					intelImage->base.Face, 0,
 					&blit_x, &blit_y);
 	 blit_x += xoffset;
 	 blit_y += yoffset;
@@ -122,8 +122,8 @@ intelTexSubimage(struct gl_context * ctx,
       } else {
 	 texImage->Data = intel_miptree_image_map(intel,
 						  intelImage->mt,
-						  intelImage->face,
-						  intelImage->level,
+						  intelImage->base.Face,
+						  intelImage->base.Level,
 						  &dstRowStride,
 						  texImage->ImageOffsets);
       }
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index 7135a6276fe..31ac689ad77 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -42,8 +42,8 @@ copy_image_data_to_tree(struct intel_context *intel,
        */
       intel_miptree_image_copy(intel,
                                intelObj->mt,
-                               intelImage->face,
-                               intelImage->level, intelImage->mt);
+                               intelImage->base.Face,
+                               intelImage->base.Level, intelImage->mt);
 
       intel_miptree_release(intel, &intelImage->mt);
    }
@@ -54,8 +54,8 @@ copy_image_data_to_tree(struct intel_context *intel,
        */
       intel_miptree_image_data(intel,
                                intelObj->mt,
-                               intelImage->face,
-                               intelImage->level,
+                               intelImage->base.Face,
+                               intelImage->base.Level,
                                intelImage->base.Data,
                                intelImage->base.RowStride,
                                intelImage->base.RowStride *
@@ -177,8 +177,8 @@ intel_tex_map_level_images(struct intel_context *intel,
 	 intelImage->base.Data =
 	    intel_miptree_image_map(intel,
 				    intelImage->mt,
-				    intelImage->face,
-				    intelImage->level,
+				    intelImage->base.Face,
+				    intelImage->base.Level,
 				    &intelImage->base.RowStride,
 				    intelImage->base.ImageOffsets);
 	 /* convert stride to texels, not bytes */
-- 
cgit v1.2.3


From 6dbad425bc423eb7db7c99aab161955c7b4cdc4c Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 14 Jul 2011 20:57:34 -0600
Subject: st/mesa: remove st_texture_image::face,level fields

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/state_tracker/st_cb_texture.c | 30 +++++++++++++++---------------
 src/mesa/state_tracker/st_gen_mipmap.c |  1 -
 src/mesa/state_tracker/st_texture.c    |  4 ++--
 src/mesa/state_tracker/st_texture.h    |  5 -----
 4 files changed, 17 insertions(+), 23 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 25f08aa4d09..a3b2ba9e78d 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -344,7 +344,7 @@ guess_and_alloc_texture(struct st_context *st,
                               stImage->base.Width2,
                               stImage->base.Height2,
                               stImage->base.Depth2,
-                              stImage->level,
+                              stImage->base.Level,
                               &width, &height, &depth)) {
       /* we can't determine the image size at level=0 */
       stObj->width0 = stObj->height0 = stObj->depth0 = 0;
@@ -367,7 +367,7 @@ guess_and_alloc_texture(struct st_context *st,
         stImage->base._BaseFormat == GL_DEPTH_COMPONENT ||
         stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) &&
        !stObj->base.GenerateMipmap &&
-       stImage->level == 0) {
+       stImage->base.Level == 0) {
       /* only alloc space for a single mipmap level */
       lastLevel = 0;
    }
@@ -506,8 +506,8 @@ st_TexImage(struct gl_context * ctx,
       assert(texImage->Depth == depth);
    }
 
-   stImage->face = _mesa_tex_target_to_face(target);
-   stImage->level = level;
+   stImage->base.Face = _mesa_tex_target_to_face(target);
+   stImage->base.Level = level;
 
    _mesa_set_fetch_functions(texImage, dims);
 
@@ -529,7 +529,7 @@ st_TexImage(struct gl_context * ctx,
    if (stObj->pt) {
       if (level > (GLint) stObj->pt->last_level ||
           !st_texture_match_image(stObj->pt, &stImage->base,
-                                  stImage->face, stImage->level)) {
+                                  stImage->base.Face, stImage->base.Level)) {
          DBG("release it\n");
          pipe_resource_reference(&stObj->pt, NULL);
          assert(!stObj->pt);
@@ -563,7 +563,7 @@ st_TexImage(struct gl_context * ctx,
     */
    if (stObj->pt &&
        st_texture_match_image(stObj->pt, &stImage->base,
-                              stImage->face, stImage->level)) {
+                              stImage->base.Face, stImage->base.Level)) {
 
       pipe_resource_reference(&stImage->pt, stObj->pt);
       assert(stImage->pt);
@@ -1501,8 +1501,8 @@ st_copy_texsubimage(struct gl_context *ctx,
          pipe->resource_copy_region(pipe,
                                     /* dest */
                                     stImage->pt,
-                                    stImage->level,
-                                    destX, destY, destZ + stImage->face,
+                                    stImage->base.Level,
+                                    destX, destY, destZ + stImage->base.Face,
                                     /* src */
                                     strb->texture,
                                     strb->surface->u.tex.level,
@@ -1524,9 +1524,9 @@ st_copy_texsubimage(struct gl_context *ctx,
          memset(&surf_tmpl, 0, sizeof(surf_tmpl));
          surf_tmpl.format = util_format_linear(stImage->pt->format);
          surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
-         surf_tmpl.u.tex.level = stImage->level;
-         surf_tmpl.u.tex.first_layer = stImage->face + destZ;
-         surf_tmpl.u.tex.last_layer = stImage->face + destZ;
+         surf_tmpl.u.tex.level = stImage->base.Level;
+         surf_tmpl.u.tex.first_layer = stImage->base.Face + destZ;
+         surf_tmpl.u.tex.last_layer = stImage->base.Face + destZ;
 
          dest_surface = pipe->create_surface(pipe, stImage->pt,
                                              &surf_tmpl);
@@ -1629,7 +1629,7 @@ copy_image_data_to_texture(struct st_context *st,
    /* debug checks */
    {
       const struct gl_texture_image *dstImage =
-         stObj->base.Image[stImage->face][dstLevel];
+         stObj->base.Image[stImage->base.Face][dstLevel];
       assert(dstImage);
       assert(dstImage->Width == stImage->base.Width);
       assert(dstImage->Height == stImage->base.Height);
@@ -1641,15 +1641,15 @@ copy_image_data_to_texture(struct st_context *st,
        */
       st_texture_image_copy(st->pipe,
                             stObj->pt, dstLevel,  /* dest texture, level */
-                            stImage->pt, stImage->level, /* src texture, level */
-                            stImage->face);
+                            stImage->pt, stImage->base.Level, /* src texture, level */
+                            stImage->base.Face);
 
       pipe_resource_reference(&stImage->pt, NULL);
    }
    else if (stImage->base.Data) {
       st_texture_image_data(st,
                             stObj->pt,
-                            stImage->face,
+                            stImage->base.Face,
                             dstLevel,
                             stImage->base.Data,
                             stImage->base.RowStride * 
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
index b0911294a7c..82ca4af7fe4 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -453,7 +453,6 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
                                  srcImage->TexFormat);
 
       stImage = st_texture_image(dstImage);
-      stImage->level = dstLevel;
 
       pipe_resource_reference(&stImage->pt, pt);
    }
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index 0e857fddcdd..232c286c1d1 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -221,8 +221,8 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage,
 
    DBG("%s \n", __FUNCTION__);
 
-   stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->level,
-                                         stImage->face + zoffset,
+   stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->base.Level,
+                                         stImage->base.Face + zoffset,
                                          usage, x, y, w, h);
 
    if (stImage->transfer)
diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h
index b822f47cf9e..50b7284e760 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -45,11 +45,6 @@ struct st_texture_image
 {
    struct gl_texture_image base;
 
-   /* These aren't stored in gl_texture_image 
-    */
-   GLuint level;
-   GLuint face;
-
    /* If stImage->pt != NULL, image data is stored here.
     * Else if stImage->base.Data != NULL, image is stored there.
     * Else there is no image data.
-- 
cgit v1.2.3


From 4990e7e9fb6d64f5e4c23915767e5cea32c02965 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 3 Aug 2011 18:29:36 -0700
Subject: radeon: Fix flushing before writing a teximage's BO when !t->bo.

Before, if we ended up here without a BO for our image, but did choose
a miptree that had active rendering in the command buffer, our
teximage data would jump ahead of the rendering using the old texture
contents.

This showed up as breakage in gen-teximage and friends in the
following commit.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/radeon/radeon_texture.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index ce0df32bfe4..ad7e4c146a4 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -787,18 +787,6 @@ static void radeon_teximage(
 	radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
 			"%s %dd: texObj %p, texImage %p, face %d, level %d\n",
 			__func__, dims, texObj, texImage, face, level);
-	{
-		struct radeon_bo *bo;
-		bo = !image->mt ? image->bo : image->mt->bo;
-		if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
-			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
-				"%s Calling teximage for texture that is "
-				"queued for GPU processing.\n",
-				__func__);
-			radeon_firevertices(rmesa);
-		}
-	}
-
 
 	t->validated = GL_FALSE;
 
@@ -820,6 +808,18 @@ static void radeon_teximage(
 		}
 	}
 
+	{
+		struct radeon_bo *bo;
+		bo = !image->mt ? image->bo : image->mt->bo;
+		if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
+			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+				"%s Calling teximage for texture that is "
+				"queued for GPU processing.\n",
+				__func__);
+			radeon_firevertices(rmesa);
+		}
+	}
+
 	/* Upload texture image; note that the spec allows pixels to be NULL */
 	if (compressed) {
 		pixels = _mesa_validate_pbo_compressed_teximage(
-- 
cgit v1.2.3


From ddd6e5b8bce96cfcb72c7e7412296c9f39bdd5d7 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 29 Jul 2011 15:19:43 -0700
Subject: swrast: Remove swrast eject/validate texture image code.

No driver used the eject function, or set the validate hook that made
that function do anything.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/swrast/s_context.c | 79 ---------------------------------------------
 1 file changed, 79 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c
index 4434f11b990..792b528ee34 100644
--- a/src/mesa/swrast/s_context.c
+++ b/src/mesa/swrast/s_context.c
@@ -417,84 +417,6 @@ _swrast_validate_blend_func(struct gl_context *ctx, GLuint n, const GLubyte mask
    swrast->BlendFunc( ctx, n, mask, src, dst, chanType );
 }
 
-
-/**
- * Make sure we have texture image data for all the textures we may need
- * for subsequent rendering.
- */
-static void
-_swrast_validate_texture_images(struct gl_context *ctx)
-{
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
-   GLuint u;
-
-   if (!swrast->ValidateTextureImage || !ctx->Texture._EnabledUnits) {
-      /* no textures enabled, or no way to validate images! */
-      return;
-   }
-
-   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
-      if (ctx->Texture.Unit[u]._ReallyEnabled) {
-         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
-         ASSERT(texObj);
-         if (texObj) {
-            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-            GLuint face;
-            for (face = 0; face < numFaces; face++) {
-               GLint lvl;
-               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
-                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
-                  if (texImg && !texImg->Data) {
-                     swrast->ValidateTextureImage(ctx, texObj, face, lvl);
-                     ASSERT(texObj->Image[face][lvl]->Data);
-                  }
-               }
-            }
-         }
-      }
-   }
-}
-
-
-/**
- * Free the texture image data attached to all currently enabled
- * textures.  Meant to be called by device drivers when transitioning
- * from software to hardware rendering.
- */
-void
-_swrast_eject_texture_images(struct gl_context *ctx)
-{
-   GLuint u;
-
-   if (!ctx->Texture._EnabledUnits) {
-      /* no textures enabled */
-      return;
-   }
-
-   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
-      if (ctx->Texture.Unit[u]._ReallyEnabled) {
-         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
-         ASSERT(texObj);
-         if (texObj) {
-            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-            GLuint face;
-            for (face = 0; face < numFaces; face++) {
-               GLint lvl;
-               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
-                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
-                  if (texImg && texImg->Data) {
-                     _mesa_free_texmemory(texImg->Data);
-                     texImg->Data = NULL;
-                  }
-               }
-            }
-         }
-      }
-   }
-}
-
-
-
 static void
 _swrast_sleep( struct gl_context *ctx, GLbitfield new_state )
 {
@@ -640,7 +562,6 @@ _swrast_validate_derived( struct gl_context *ctx )
 
       if (swrast->NewState & (_NEW_TEXTURE | _NEW_PROGRAM)) {
          _swrast_update_texture_samplers( ctx );
-         _swrast_validate_texture_images(ctx);
       }
 
       if (swrast->NewState & (_NEW_COLOR | _NEW_PROGRAM))
-- 
cgit v1.2.3


From 038d654bcb4e9d88eab420cefc3169d4845db4c9 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 24 Aug 2011 14:50:28 -0600
Subject: vbo: remove unused var, remove unneeded local var

---
 src/mesa/vbo/vbo_exec_draw.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 25436c6d6d2..8ffaaaa4876 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -260,8 +260,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
 static void
 vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
 {
-   GLenum target = GL_ARRAY_BUFFER_ARB;
-
    if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
       struct gl_context *ctx = exec->ctx;
       
@@ -295,7 +293,6 @@ void
 vbo_exec_vtx_map( struct vbo_exec_context *exec )
 {
    struct gl_context *ctx = exec->ctx;
-   const GLenum target = GL_ARRAY_BUFFER_ARB;
    const GLenum accessRange = GL_MAP_WRITE_BIT |  /* for MapBufferRange */
                               GL_MAP_INVALIDATE_RANGE_BIT |
                               GL_MAP_UNSYNCHRONIZED_BIT |
@@ -325,7 +322,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
       /* Need to allocate a new VBO */
       exec->vtx.buffer_used = 0;
 
-      ctx->Driver.BufferData(ctx, target, 
+      ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER_ARB,
                              VBO_VERT_BUFFER_SIZE, 
                              NULL, usage, exec->vtx.bufferobj);
 
-- 
cgit v1.2.3


From 79a486ead92e4493b2de1fedf0c8cb5de47003cd Mon Sep 17 00:00:00 2001
From: Kai Wasserbäch <kai@dev.carbon-project.org>
Date: Tue, 23 Aug 2011 10:48:57 +0200
Subject: Change return type of try_emit_* methods to bool.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ian Romanick explained (Message-Id: <4E528973.6080902@freedesktop.org>),
that the return type of non-API methods shouldn't use GLboolean but a
standard C++ bool.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Bryan Cain <bryancain3@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Kai Wasserbäch <kai@dev.carbon-project.org>
---
 src/mesa/program/ir_to_mesa.cpp            |  8 ++++----
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index e7609df19ee..6820e4c6ba7 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -312,11 +312,11 @@ public:
    void emit_scs(ir_instruction *ir, enum prog_opcode op,
 		 dst_reg dst, const src_reg &src);
 
-   GLboolean try_emit_mad(ir_expression *ir,
+   bool try_emit_mad(ir_expression *ir,
 			  int mul_operand);
    bool try_emit_mad_for_and_not(ir_expression *ir,
 				 int mul_operand);
-   GLboolean try_emit_sat(ir_expression *ir);
+   bool try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
 
@@ -871,7 +871,7 @@ ir_to_mesa_visitor::visit(ir_function *ir)
    }
 }
 
-GLboolean
+bool
 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 {
    int nonmul_operand = 1 - mul_operand;
@@ -934,7 +934,7 @@ ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
    return true;
 }
 
-GLboolean
+bool
 ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
 {
    /* Saturates were only introduced to vertex programs in
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6f0d9fa3f8f..fff848cbdf6 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -390,11 +390,11 @@ public:
    void emit_scs(ir_instruction *ir, unsigned op,
         	 st_dst_reg dst, const st_src_reg &src);
 
-   GLboolean try_emit_mad(ir_expression *ir,
-        		  int mul_operand);
+   bool try_emit_mad(ir_expression *ir,
+              int mul_operand);
    bool try_emit_mad_for_and_not(ir_expression *ir,
               int mul_operand);
-   GLboolean try_emit_sat(ir_expression *ir);
+   bool try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
 
@@ -1186,7 +1186,7 @@ glsl_to_tgsi_visitor::visit(ir_function *ir)
    }
 }
 
-GLboolean
+bool
 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 {
    int nonmul_operand = 1 - mul_operand;
@@ -1252,7 +1252,7 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
    return true;
 }
 
-GLboolean
+bool
 glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
 {
    /* Saturates were only introduced to vertex programs in
-- 
cgit v1.2.3


From b9eb4d8a59699e233255113acafae220c3d8fe3c Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 9 Aug 2011 10:53:29 -0700
Subject: glsl: Implement the GL_ARB_conservative_depth extension.

It's the same as GL_AMD_conservative_depth.  The specs have slight
differences in wording, but don't differ in content or behavior.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 docs/GL3.txt                    | 2 +-
 src/glsl/glsl_parser.yy         | 4 ++--
 src/glsl/glsl_parser_extras.cpp | 1 +
 src/glsl/glsl_parser_extras.h   | 2 ++
 src/glsl/ir.h                   | 2 +-
 src/glsl/linker.cpp             | 2 +-
 src/mesa/main/extensions.c      | 1 +
 7 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'src/mesa')

diff --git a/docs/GL3.txt b/docs/GL3.txt
index c0cc4d172e0..ff1f5020a5b 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -123,7 +123,7 @@ GL_ARB_texture_storage                               not started
 GL_ARB_transform_feedback_instanced                  not started
 GL_ARB_base_instance                                 not started
 GL_ARB_shader_image_load_store                       not started
-GL_ARB_conservative_depth                            not started (may be close to AMD_conservative_depth though)
+GL_ARB_conservative_depth                            DONE (compiler)
 GL_ARB_shading_language_420pack                      not started
 GL_ARB_internalformat_query                          not started
 GL_ARB_map_buffer_alignment                          not started
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 1851f1e202e..25d02fb1eaf 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1111,7 +1111,7 @@ layout_qualifier_id:
 	      }
 	   }
 
-	   /* Layout qualifiers for AMD_conservative_depth. */
+	   /* Layout qualifiers for AMD/ARB_conservative_depth. */
 	   if (!got_one && state->AMD_conservative_depth_enable) {
 	      if (strcmp($1, "depth_any") == 0) {
 	         got_one = true;
@@ -1129,7 +1129,7 @@ layout_qualifier_id:
 	
 	      if (got_one && state->AMD_conservative_depth_warn) {
 	         _mesa_glsl_warning(& @1, state,
-	                            "GL_AMD_conservative_depth "
+	                            "GL_ARB_conservative_depth "
 	                            "layout qualifier `%s' is used\n", $1);
 	      }
 	   }
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index cc781378d76..8f740e6a8e9 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -253,6 +253,7 @@ struct _mesa_glsl_extension {
 static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    /*                                  target availability  API availability */
    /* name                             VS     GS     FS     GL     ES         supported flag */
+   EXT(ARB_conservative_depth,         true,  false, true,  true,  false,     AMD_conservative_depth),
    EXT(ARB_draw_buffers,               false, false, true,  true,  false,     dummy_true),
    EXT(ARB_draw_instanced,             true,  false, false, true,  false,     ARB_draw_instanced),
    EXT(ARB_explicit_attrib_location,   true,  false, true,  true,  false,     ARB_explicit_attrib_location),
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index fc392da5b21..dc6911d1c9a 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -180,6 +180,8 @@ struct _mesa_glsl_parse_state {
    bool ARB_shader_stencil_export_warn;
    bool AMD_conservative_depth_enable;
    bool AMD_conservative_depth_warn;
+   bool ARB_conservative_depth_enable;
+   bool ARB_conservative_depth_warn;
    bool AMD_shader_stencil_export_enable;
    bool AMD_shader_stencil_export_warn;
    bool OES_texture_3D_enable;
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 990aaa16af3..2e899f3ed6f 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -236,7 +236,7 @@ enum ir_variable_interpolation {
 /**
  * \brief Layout qualifiers for gl_FragDepth.
  *
- * The AMD_conservative_depth extension allows gl_FragDepth to be redeclared
+ * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
  * with a layout qualifier.
  */
 enum ir_depth_layout {
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index b54ef41080a..ba81c59ff2c 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -395,7 +395,7 @@ cross_validate_globals(struct gl_shader_program *prog,
 
         /* Validate layout qualifiers for gl_FragDepth.
          *
-         * From the AMD_conservative_depth spec:
+         * From the AMD/ARB_conservative_depth specs:
          *    "If gl_FragDepth is redeclared in any fragment shader in
          *    a program, it must be redeclared in all fragment shaders in that
          *    program that have static assignments to gl_FragDepth. All
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index bc61c50a90f..14b0cf9acbd 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -81,6 +81,7 @@ static const struct extension extension_table[] = {
    { "GL_ARB_blend_func_extended",                 o(ARB_blend_func_extended),                 GL,             2009 },
    { "GL_ARB_color_buffer_float",                  o(ARB_color_buffer_float),                  GL,             2004 },
    { "GL_ARB_copy_buffer",                         o(ARB_copy_buffer),                         GL,             2008 },
+   { "GL_ARB_conservative_depth",                  o(AMD_conservative_depth),                  GL,             2011 },
    { "GL_ARB_depth_buffer_float",                  o(ARB_depth_buffer_float),                  GL,             2008 },
    { "GL_ARB_depth_clamp",                         o(ARB_depth_clamp),                         GL,             2003 },
    { "GL_ARB_depth_texture",                       o(ARB_depth_texture),                       GL,             2001 },
-- 
cgit v1.2.3


From 515d9e88801e2e1e2a7ac74ccd43f8fedfb80a96 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 24 Aug 2011 13:24:25 +0100
Subject: glsl_to_tgsi: implement TXS/TXQ. (v2)

GLSL uses TXS, call the gallium TXQ opcode.

v2: fix indent from 4->3.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Bryan Cain <bryancain3@gmail.com>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index fff848cbdf6..85e4c662fea 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2426,16 +2426,18 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
    glsl_to_tgsi_instruction *inst = NULL;
    unsigned opcode = TGSI_OPCODE_NOP;
 
-   ir->coordinate->accept(this);
+   if (ir->coordinate) {
+      ir->coordinate->accept(this);
 
-   /* Put our coords in a temp.  We'll need to modify them for shadow,
-    * projection, or LOD, so the only case we'd use it as is is if
-    * we're doing plain old texturing.  The optimization passes on
-    * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
-    */
-   coord = get_temp(glsl_type::vec4_type);
-   coord_dst = st_dst_reg(coord);
-   emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+      /* Put our coords in a temp.  We'll need to modify them for shadow,
+       * projection, or LOD, so the only case we'd use it as is is if
+       * we're doing plain old texturing.  The optimization passes on
+       * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
+       */
+      coord = get_temp(glsl_type::vec4_type);
+      coord_dst = st_dst_reg(coord);
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+   }
 
    if (ir->projector) {
       ir->projector->accept(this);
@@ -2470,6 +2472,10 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       dy = this->result;
       break;
    case ir_txs:
+      opcode = TGSI_OPCODE_TXQ;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      break;
    case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
       assert(!"GLSL 1.30 features unsupported");
       break;
@@ -2544,6 +2550,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
 
    if (opcode == TGSI_OPCODE_TXD)
       inst = emit(ir, opcode, result_dst, coord, dx, dy);
+   else if (opcode == TGSI_OPCODE_TXQ)
+      inst = emit(ir, opcode, result_dst, lod_info);
    else
       inst = emit(ir, opcode, result_dst, coord);
 
@@ -4276,6 +4284,7 @@ compile_tgsi_instruction(struct st_translate *t,
    case TGSI_OPCODE_TXD:
    case TGSI_OPCODE_TXL:
    case TGSI_OPCODE_TXP:
+   case TGSI_OPCODE_TXQ:
       src[num_src++] = t->samplers[inst->sampler];
       ureg_tex_insn(ureg,
                     inst->op,
-- 
cgit v1.2.3


From 5f3de17ef0f8b6280a6bf331ea6686a260f0d0d4 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 25 Aug 2011 13:38:43 +0100
Subject: glsl_to_tgsi: add TXF support. (v2)

This adds texelFetch support to translate from GLSL to TGSI TXF opcode.

I've tested this works with an r600g and softpipe backend.

v2: drop comments, fix title,

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Bryan Cain <bryancain3@gmail.com>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 85e4c662fea..9cac30995af 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2476,8 +2476,10 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       ir->lod_info.lod->accept(this);
       lod_info = this->result;
       break;
-   case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
-      assert(!"GLSL 1.30 features unsupported");
+   case ir_txf:
+      opcode = TGSI_OPCODE_TXF;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
       break;
    }
 
@@ -2541,7 +2543,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       coord_dst.writemask = WRITEMASK_XYZW;
    }
 
-   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) {
+   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
+       opcode == TGSI_OPCODE_TXF) {
       /* TGSI stores LOD or LOD bias in the last channel of the coords. */
       coord_dst.writemask = WRITEMASK_W;
       emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
@@ -4285,6 +4288,7 @@ compile_tgsi_instruction(struct st_translate *t,
    case TGSI_OPCODE_TXL:
    case TGSI_OPCODE_TXP:
    case TGSI_OPCODE_TXQ:
+   case TGSI_OPCODE_TXF:
       src[num_src++] = t->samplers[inst->sampler];
       ureg_tex_insn(ureg,
                     inst->op,
-- 
cgit v1.2.3


From 116680ddc28c2c3b04fd78acdaa3ef2108c43872 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 2 Aug 2011 19:29:52 -0700
Subject: i965: Remove all bits of NRM3 and NRM4 code

Nothing in Mesa generates these opcodes, and i965 hardware cannot
support it natively.  If support were ever added for this opcode in
Mesa, there had better be a lowering pass for hardware that doesn't
support it natively.
---
 src/mesa/drivers/dri/i965/brw_vs_emit.c | 31 -------------------------------
 1 file changed, 31 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index a06a2bbec52..bfee811e13d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1096,31 +1096,6 @@ static void emit_lrp_noalias(struct brw_vs_compile *c,
    brw_MAC(p, dst, arg0, arg1);
 }
 
-/** 3 or 4-component vector normalization */
-static void emit_nrm( struct brw_vs_compile *c, 
-                      struct brw_reg dst,
-                      struct brw_reg arg0,
-                      int num_comps)
-{
-   struct brw_compile *p = &c->func;
-   struct brw_reg tmp = get_tmp(c);
-
-   /* tmp = dot(arg0, arg0) */
-   if (num_comps == 3)
-      brw_DP3(p, tmp, arg0, arg0);
-   else
-      brw_DP4(p, tmp, arg0, arg0);
-
-   /* tmp = 1 / sqrt(tmp) */
-   emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
-
-   /* dst = arg0 * tmp */
-   brw_MUL(p, dst, arg0, tmp);
-
-   release_tmp(c, tmp);
-}
-
-
 static struct brw_reg
 get_constant(struct brw_vs_compile *c,
              const struct prog_instruction *inst,
@@ -2045,12 +2020,6 @@ void brw_old_vs_emit(struct brw_vs_compile *c )
       case OPCODE_DPH:
 	 brw_DPH(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_NRM3:
-	 emit_nrm(c, dst, args[0], 3);
-	 break;
-      case OPCODE_NRM4:
-	 emit_nrm(c, dst, args[0], 4);
-	 break;
       case OPCODE_DST:
 	 unalias2(c, dst, args[0], args[1], emit_dst_noalias); 
 	 break;
-- 
cgit v1.2.3