From e6e5c1f46d374015d924522ed0b2bf2443c3e6d4 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 24 Feb 2011 11:17:02 -0800
Subject: i965: Increase Sandybridge point size clamp in the clip state.

255.875 matches the hardware documentation.  Presumably this was a typo.

NOTE: This is a candidate for the 7.10 branch, along with
      commit 2bfc23fb86964e4153f57f2a56248760f6066033.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/gen6_clip_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index 38c98f30efb..d6c1f1c893d 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -68,7 +68,7 @@ upload_clip_state(struct brw_context *brw)
 	     depth_clamp |
 	     provoking);
    OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
-             U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
+             U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
              GEN6_CLIP_FORCE_ZERO_RTAINDEX);
    ADVANCE_BATCH();
 }
-- 
cgit v1.2.3


From 79ad6f5375253faff89bbc7eb6dc5949ba63e0ef Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 25 Feb 2011 09:14:53 +1000
Subject: r300g: explicit sign bits on RGTC textures

---
 src/gallium/drivers/r300/r300_texture.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 354144cac79..2db5ab9a287 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -214,11 +214,13 @@ uint32_t r300_translate_texformat(enum pipe_format format,
     /* RGTC formats. */
     if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
         switch (format) {
-            case PIPE_FORMAT_RGTC1_UNORM:
             case PIPE_FORMAT_RGTC1_SNORM:
+                result |= sign_bit[0];
+            case PIPE_FORMAT_RGTC1_UNORM:
                 return R500_TX_FORMAT_ATI1N | result;
-            case PIPE_FORMAT_RGTC2_UNORM:
             case PIPE_FORMAT_RGTC2_SNORM:
+                result |= sign_bit[0] | sign_bit[1];
+            case PIPE_FORMAT_RGTC2_UNORM:
                 return R400_TX_FORMAT_ATI2N | result;
             default:
                 return ~0; /* Unsupported/unknown. */
-- 
cgit v1.2.3


From c9bca01819ef0fa3c64a28e26bd63953cf7f1364 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 24 Feb 2011 13:55:25 +1000
Subject: r600g: bc 4/5 or rgtc textures need to be tiled as well.

Make the s3tc upload code more generic.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_blit.c    | 16 ++++++++--------
 src/gallium/drivers/r600/r600_texture.c |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 9865ea17ae5..04408a5cc8e 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -225,7 +225,7 @@ struct texture_orig_info {
 	unsigned height0;
 };
 
-static void r600_s3tc_to_blittable(struct pipe_resource *tex,
+static void r600_compressed_to_blittable(struct pipe_resource *tex,
 				   unsigned level,
 				   struct texture_orig_info *orig)
 {
@@ -253,7 +253,7 @@ static void r600_s3tc_to_blittable(struct pipe_resource *tex,
 
 }
 
-static void r600_reset_blittable_to_s3tc(struct pipe_resource *tex,
+static void r600_reset_blittable_to_compressed(struct pipe_resource *tex,
 					 unsigned level,
 					 struct texture_orig_info *orig)
 {
@@ -282,13 +282,13 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
 
 	restore_orig[0] = restore_orig[1] = FALSE;
 
-	if (util_format_is_s3tc(src->format)) {
-		r600_s3tc_to_blittable(src, src_level, &orig_info[0]);
+	if (util_format_is_compressed(src->format)) {
+		r600_compressed_to_blittable(src, src_level, &orig_info[0]);
 		restore_orig[0] = TRUE;
 	}
 
-	if (util_format_is_s3tc(dst->format)) {
-		r600_s3tc_to_blittable(dst, dst_level, &orig_info[1]);
+	if (util_format_is_compressed(dst->format)) {
+		r600_compressed_to_blittable(dst, dst_level, &orig_info[1]);
 		restore_orig[1] = TRUE;
 		/* translate the dst box as well */
 		dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
@@ -299,10 +299,10 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
 			    src, src_level, src_box);
 
 	if (restore_orig[0])
-		r600_reset_blittable_to_s3tc(src, src_level, &orig_info[0]);
+		r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]);
 
 	if (restore_orig[1])
-		r600_reset_blittable_to_s3tc(dst, dst_level, &orig_info[1]);
+		r600_reset_blittable_to_compressed(dst, dst_level, &orig_info[1]);
 }
 
 void r600_init_blit_functions(struct r600_pipe_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 095558d0337..87143ee54cc 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -292,7 +292,7 @@ static boolean permit_hardware_blit(struct pipe_screen *screen,
 		bind = PIPE_BIND_RENDER_TARGET;
 
 	/* hackaround for S3TC */
-	if (util_format_is_s3tc(res->format))
+	if (util_format_is_compressed(res->format))
 		return TRUE;
 	    
 	if (!screen->is_format_supported(screen,
@@ -424,7 +424,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
 	}
 
 	if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
-	    util_format_is_s3tc(templ->format))
+	    util_format_is_compressed(templ->format))
 		array_mode = V_038000_ARRAY_1D_TILED_THIN1;
 
 	return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
-- 
cgit v1.2.3


From b2413de91682e3908d8ab1635956a290f603681c Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 25 Feb 2011 09:13:14 +1000
Subject: r600g: explicity set sign bits for RGTC

---
 src/gallium/drivers/r600/r600_texture.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 87143ee54cc..3a85a25065a 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -878,12 +878,14 @@ uint32_t r600_translate_texformat(enum pipe_format format,
 			goto out_unknown;
 
 		switch (format) {
-		case PIPE_FORMAT_RGTC1_UNORM:
 		case PIPE_FORMAT_RGTC1_SNORM:
+			word4 |= sign_bit[0];
+		case PIPE_FORMAT_RGTC1_UNORM:
 			result = FMT_BC4;
 			goto out_word4;
-		case PIPE_FORMAT_RGTC2_UNORM:
 		case PIPE_FORMAT_RGTC2_SNORM:
+			word4 |= sign_bit[0] | sign_bit[1];
+		case PIPE_FORMAT_RGTC2_UNORM:
 			result = FMT_BC5;
 			goto out_word4;
 		default:
-- 
cgit v1.2.3


From eb1780238621c7be0342d9129eb639e4892c004d Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Thu, 24 Feb 2011 19:49:37 -0800
Subject: scons: Reduce all Cygwin platform names to 'cygwin'.

platform.system in SCons on Cygwin includes the OS version number.
Windows XP - CYGWIN_NT-5.1
Windows Vista - CYGWIN_NT-6.0
Windows 7 - CYGWIN_NT-6.1

Reduce all Cygwin platform variants to just 'cygwin' so anything
downstream can simply use 'cygwin' instead of the different full
platform names.
---
 common.py                             | 4 +++-
 scons/gallium.py                      | 2 ++
 src/gallium/winsys/sw/xlib/SConscript | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/common.py b/common.py
index b44f20e8216..1d0c6a71fa5 100644
--- a/common.py
+++ b/common.py
@@ -15,6 +15,8 @@ import SCons.Script.SConscript
 # Defaults
 
 host_platform = _platform.system().lower()
+if host_platform.startswith('cygwin'):
+    host_platform = 'cygwin'
 
 # Search sys.argv[] for a "platform=foo" argument since we don't have
 # an 'env' variable at this point.
@@ -81,7 +83,7 @@ def AddOptions(opts):
 	opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine,
 											 allowed_values=('generic', 'ppc', 'x86', 'x86_64')))
 	opts.Add(EnumOption('platform', 'target platform', host_platform,
-											 allowed_values=('linux', 'cell', 'windows', 'winddk', 'wince', 'darwin', 'embedded', 'cygwin_nt-5.1', 'cygwin_nt-6.1', 'sunos5', 'freebsd8')))
+											 allowed_values=('linux', 'cell', 'windows', 'winddk', 'wince', 'darwin', 'embedded', 'cygwin', 'sunos5', 'freebsd8')))
 	opts.Add('toolchain', 'compiler toolchain', default_toolchain)
 	opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no'))
 	opts.Add(BoolOption('llvm', 'use LLVM', default_llvm))
diff --git a/scons/gallium.py b/scons/gallium.py
index 9118257ac05..112f6c89dca 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -195,6 +195,8 @@ def generate(env):
     # Determine whether we are cross compiling; in particular, whether we need
     # to compile code generators with a different compiler as the target code.
     host_platform = _platform.system().lower()
+    if host_platform.startswith('cygwin'):
+        host_platform = 'cygwin'
     host_machine = os.environ.get('PROCESSOR_ARCHITEW6432', os.environ.get('PROCESSOR_ARCHITECTURE', _platform.machine()))
     host_machine = {
         'x86': 'x86',
diff --git a/src/gallium/winsys/sw/xlib/SConscript b/src/gallium/winsys/sw/xlib/SConscript
index f6c47411831..df01a9ec2bf 100644
--- a/src/gallium/winsys/sw/xlib/SConscript
+++ b/src/gallium/winsys/sw/xlib/SConscript
@@ -4,7 +4,7 @@
 
 Import('*')
 
-if env['platform'] in ('cygwin_nt-5.1', 'cygwin_nt-6.1', 'linux'):
+if env['platform'] in ('cygwin', 'linux'):
 
     env = env.Clone()
 
-- 
cgit v1.2.3


From 179ff0551c4938e59f4b57fec0a10d63f012d7c6 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 25 Feb 2011 14:37:06 +1000
Subject: gallium/util: add 1d/2d mipmap generation support

so far only hw mipmap generation is testing on softpipe,
passes test added to piglit.

this requires another patch to mesa to let array textures mipmaps
even start to happen.
---
 src/gallium/auxiliary/util/u_gen_mipmap.c | 43 ++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 3b6342ad8d1..3698be77b2e 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -67,7 +67,7 @@ struct gen_mipmap_state
    struct pipe_vertex_element velem[2];
 
    void *vs;
-   void *fs1d, *fs2d, *fs3d, *fsCube;
+   void *fs1d, *fs2d, *fs3d, *fsCube, *fs1da, *fs2da;
 
    struct pipe_resource *vbuf;  /**< quad vertices */
    unsigned vbuf_slot;
@@ -1321,6 +1321,11 @@ util_create_gen_mipmap(struct pipe_context *pipe,
                                                TGSI_INTERPOLATE_LINEAR);
    ctx->fsCube = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE,
                                                TGSI_INTERPOLATE_LINEAR);
+   ctx->fs1da = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D_ARRAY,
+                                               TGSI_INTERPOLATE_LINEAR);
+   ctx->fs2da = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D_ARRAY,
+                                               TGSI_INTERPOLATE_LINEAR);
+
 
    /* vertex data that doesn't change */
    for (i = 0; i < 4; i++) {
@@ -1390,8 +1395,25 @@ set_vertex_data(struct gen_mipmap_state *ctx,
       util_map_texcoords2d_onto_cubemap(layer, &st[0][0], 2,
                                         &ctx->vertices[0][1][0], 8);
    }
-   else {
-      /* 1D/2D/3D */
+   else if (tex_target == PIPE_TEXTURE_1D_ARRAY) {
+      /* 1D texture array  */
+      ctx->vertices[0][1][0] = 0.0f; /*s*/
+      ctx->vertices[0][1][1] = r; /*t*/
+      ctx->vertices[0][1][2] = 0.0f;    /*r*/
+
+      ctx->vertices[1][1][0] = 1.0f;
+      ctx->vertices[1][1][1] = r;
+      ctx->vertices[1][1][2] = 0.0f;
+
+      ctx->vertices[2][1][0] = 1.0f;
+      ctx->vertices[2][1][1] = r;
+      ctx->vertices[2][1][2] = 0.0f;
+
+      ctx->vertices[3][1][0] = 0.0f;
+      ctx->vertices[3][1][1] = r;
+      ctx->vertices[3][1][2] = 0.0f;
+   } else {
+      /* 1D/2D/3D/2D array */
       ctx->vertices[0][1][0] = 0.0f; /*s*/
       ctx->vertices[0][1][1] = 0.0f; /*t*/
       ctx->vertices[0][1][2] = r;    /*r*/
@@ -1427,6 +1449,8 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
 {
    struct pipe_context *pipe = ctx->pipe;
 
+   pipe->delete_fs_state(pipe, ctx->fs2da);
+   pipe->delete_fs_state(pipe, ctx->fs1da);
    pipe->delete_fs_state(pipe, ctx->fsCube);
    pipe->delete_fs_state(pipe, ctx->fs3d);
    pipe->delete_fs_state(pipe, ctx->fs2d);
@@ -1499,7 +1523,11 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
       fs = ctx->fsCube;
       break;
    case PIPE_TEXTURE_1D_ARRAY:
+      fs = ctx->fs1da;
+      break;
    case PIPE_TEXTURE_2D_ARRAY:
+      fs = ctx->fs2da;
+      break;
    default:
       assert(0);
       fs = ctx->fs2d;
@@ -1555,6 +1583,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
 
       if (pt->target == PIPE_TEXTURE_3D)
          nr_layers = u_minify(pt->depth0, dstLevel);
+      else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY)
+	 nr_layers = pt->array_size;
       else
          nr_layers = 1;
 
@@ -1564,11 +1594,12 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
             /* in theory with geom shaders and driver with full layer support
                could do that in one go. */
             layer = i;
-            offset = 1.0f / (float)(nr_layers * 2);
             /* XXX hmm really? */
             rcoord = (float)layer / (float)nr_layers + 1.0f / (float)(nr_layers * 2);
-         }
-         else
+         } else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY) {
+	    layer = i;
+	    rcoord = (float)layer;
+	 } else
             layer = face;
 
          memset(&surf_templ, 0, sizeof(surf_templ));
-- 
cgit v1.2.3


From 8e17adfdbd96ba1a11cda329ddfd2b997255ea20 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Fri, 25 Feb 2011 11:49:23 -0500
Subject: gallium/st: place value check before value is use

7.9 & 7.10 candidate

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 src/mesa/state_tracker/st_mesa_to_tgsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 5c68fd78c30..c07739f9d53 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -224,9 +224,9 @@ src_register( struct st_translate *t,
 
    case PROGRAM_TEMPORARY:
       assert(index >= 0);
+      assert(index < Elements(t->temps));
       if (ureg_dst_is_undef(t->temps[index]))
          t->temps[index] = ureg_DECL_temporary( t->ureg );
-      assert(index < Elements(t->temps));
       return ureg_src(t->temps[index]);
 
    case PROGRAM_NAMED_PARAM:
-- 
cgit v1.2.3


From b0e8aec5ab7f0e81dc0ea6c79ac7db2cca4788ed Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Fri, 25 Feb 2011 11:56:29 -0500
Subject: gallium/tgsi: shuffle ureg_src structure to work around gcc4.6.0
 issue

There is an issue with gcc 4.6.0 that leads to segfault/assert with mesa
due to ureg_src size, reshuffling the structure member to better better
alignment work around the issue.

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47893

7.9 + 7.10 candidate

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 src/gallium/auxiliary/tgsi/tgsi_ureg.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index b8d193f3f89..9d5553f0ea0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -43,24 +43,24 @@ struct ureg_program;
  */
 struct ureg_src
 {
-   unsigned File        : 4;  /* TGSI_FILE_ */
-   unsigned SwizzleX    : 2;  /* TGSI_SWIZZLE_ */
-   unsigned SwizzleY    : 2;  /* TGSI_SWIZZLE_ */
-   unsigned SwizzleZ    : 2;  /* TGSI_SWIZZLE_ */
-   unsigned SwizzleW    : 2;  /* TGSI_SWIZZLE_ */
-   unsigned Indirect    : 1;  /* BOOL */
-   unsigned DimIndirect : 1;  /* BOOL */
-   unsigned Dimension   : 1;  /* BOOL */
-   unsigned Absolute    : 1;  /* BOOL */
-   unsigned Negate      : 1;  /* BOOL */
-   int      Index       : 16; /* SINT */
+   unsigned File             : 4;  /* TGSI_FILE_ */
+   unsigned SwizzleX         : 2;  /* TGSI_SWIZZLE_ */
+   unsigned SwizzleY         : 2;  /* TGSI_SWIZZLE_ */
+   unsigned SwizzleZ         : 2;  /* TGSI_SWIZZLE_ */
+   unsigned SwizzleW         : 2;  /* TGSI_SWIZZLE_ */
+   unsigned Indirect         : 1;  /* BOOL */
+   unsigned DimIndirect      : 1;  /* BOOL */
+   unsigned Dimension        : 1;  /* BOOL */
+   unsigned Absolute         : 1;  /* BOOL */
+   unsigned Negate           : 1;  /* BOOL */
    unsigned IndirectFile     : 4;  /* TGSI_FILE_ */
-   int      IndirectIndex    : 16; /* SINT */
    unsigned IndirectSwizzle  : 2;  /* TGSI_SWIZZLE_ */
-   int      DimensionIndex   : 16; /* SINT */
    unsigned DimIndFile       : 4;  /* TGSI_FILE_ */
-   int      DimIndIndex      : 16; /* SINT */
    unsigned DimIndSwizzle    : 2;  /* TGSI_SWIZZLE_ */
+   int      Index            : 16; /* SINT */
+   int      IndirectIndex    : 16; /* SINT */
+   int      DimensionIndex   : 16; /* SINT */
+   int      DimIndIndex      : 16; /* SINT */
 };
 
 /* Very similar to a tgsi_dst_register, removing unsupported fields
-- 
cgit v1.2.3


From a3cd542894606e3f2c892a4218cc6f7370c827a5 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 28 Oct 2010 13:04:41 -0700
Subject: i965: Add #defines for gen4 SIMD8 TXB/TXL with shadow comparison.

From volume 4, page 161 of the public i965 documentation.
---
 src/mesa/drivers/dri/i965/brw_defines.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index a413c02b573..5496b4fdd3b 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -679,6 +679,8 @@
 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2
 #define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0
 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
 #define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO             2
 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
-- 
cgit v1.2.3


From e54d62b89677624b5806442cc5053c0ceedd79b0 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 28 Oct 2010 13:07:11 -0700
Subject: i965/fs: Use a properly named constant in TXB handling.

The old value, BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE makes it sound like we're
doing a non-bias texture lookup.  It has the same value as the new constant
BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE, so there should be no
functional changes.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 30e3bd54469..d67b449b0c0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2325,7 +2325,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
       case FS_OPCODE_TXB:
 	 if (inst->shadow_compare) {
 	    assert(inst->mlen == 6);
-	    msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+	    msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
 	 } else {
 	    assert(inst->mlen == 9);
 	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
-- 
cgit v1.2.3


From 4ddd11aad6a396e98ae30e3e78f6736804eae541 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 28 Oct 2010 13:12:20 -0700
Subject: i965/fs: Complete TXL support on gen4.

Initial plumbing existed to turn the ir_txl into OPCODE_TXL, but it was
never handled.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d67b449b0c0..87c06aa22bf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2332,6 +2332,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	    simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
 	 }
 	 break;
+      case FS_OPCODE_TXL:
+	 if (inst->shadow_compare) {
+	    assert(inst->mlen == 6);
+	    msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
+	 } else {
+	    assert(inst->mlen == 9);
+	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
+	    simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+	 }
+	 break;
       }
    }
    assert(msg_type != -1);
-- 
cgit v1.2.3


From 2830b1ae9032666e62460de5aece8db843c51c14 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 28 Oct 2010 12:53:21 -0700
Subject: i965/fs: Complete TXL support on gen5+.

Initial plumbing existed to turn the ir_txl into OPCODE_TXL, but it was
never handled.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 87c06aa22bf..552641b623d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2308,6 +2308,13 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
 	 }
 	 break;
+      case FS_OPCODE_TXL:
+	 if (inst->shadow_compare) {
+	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5;
+	 } else {
+	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5;
+	 }
+	 break;
       }
    } else {
       switch (inst->opcode) {
-- 
cgit v1.2.3


From 58f7c9c72ee52527610b26ca8a137dd88c082c89 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 24 Feb 2011 17:49:07 -0800
Subject: i965/fs: Initial plumbing to support TXD.

This adds the opcode and the code to convert ir_txd to OPCODE_TXD;
it doesn't actually add support yet.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 12 ++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h   |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 552641b623d..9bdcda780ef 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -213,6 +213,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
       return 2;
    case FS_OPCODE_TEX:
    case FS_OPCODE_TXB:
+   case FS_OPCODE_TXD:
    case FS_OPCODE_TXL:
       return 1;
    case FS_OPCODE_FB_WRITE:
@@ -1200,6 +1201,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
       }
       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
       mlen += 3;
+   } else if (ir->op == ir_txd) {
+      assert(!"TXD isn't supported on gen4 yet.");
    } else {
       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
        * instructions.  We'll need to do SIMD16 here.
@@ -1253,6 +1256,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
       inst = emit(fs_inst(FS_OPCODE_TXL, dst));
       break;
    case ir_txd:
+      inst = emit(fs_inst(FS_OPCODE_TXD, dst));
+      break;
    case ir_txf:
       assert(!"GLSL 1.30 features unsupported");
       break;
@@ -2315,6 +2320,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5;
 	 }
 	 break;
+      case FS_OPCODE_TXD:
+	 assert(!"TXD isn't supported on gen5+ yet.");
+	 break;
       }
    } else {
       switch (inst->opcode) {
@@ -2349,6 +2357,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	    simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
 	 }
 	 break;
+      case FS_OPCODE_TXD:
+	 assert(!"TXD isn't supported on gen4 yet.");
+	 break;
       }
    }
    assert(msg_type != -1);
@@ -3624,6 +3635,7 @@ fs_visitor::generate_code()
 	 break;
       case FS_OPCODE_TEX:
       case FS_OPCODE_TXB:
+      case FS_OPCODE_TXD:
       case FS_OPCODE_TXL:
 	 generate_tex(inst, dst, src[0]);
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8352760acf7..dc030ae5b50 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -71,6 +71,7 @@ enum fs_opcodes {
    FS_OPCODE_LINTERP,
    FS_OPCODE_TEX,
    FS_OPCODE_TXB,
+   FS_OPCODE_TXD,
    FS_OPCODE_TXL,
    FS_OPCODE_DISCARD_NOT,
    FS_OPCODE_DISCARD_AND,
@@ -309,6 +310,7 @@ public:
    {
       return (opcode == FS_OPCODE_TEX ||
 	      opcode == FS_OPCODE_TXB ||
+	      opcode == FS_OPCODE_TXD ||
 	      opcode == FS_OPCODE_TXL);
    }
 
-- 
cgit v1.2.3


From a08e612fd8e7ca2ac2fef8961e56e5b094033717 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Sat, 26 Feb 2011 00:50:52 +0100
Subject: util: Don't create array texture shaders if the driver doesn't
 support it

---
 src/gallium/auxiliary/util/u_gen_mipmap.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 3698be77b2e..6fba6000ba8 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -1321,10 +1321,12 @@ util_create_gen_mipmap(struct pipe_context *pipe,
                                                TGSI_INTERPOLATE_LINEAR);
    ctx->fsCube = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE,
                                                TGSI_INTERPOLATE_LINEAR);
-   ctx->fs1da = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D_ARRAY,
-                                               TGSI_INTERPOLATE_LINEAR);
-   ctx->fs2da = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D_ARRAY,
-                                               TGSI_INTERPOLATE_LINEAR);
+   if (pipe->screen->get_param(pipe->screen, PIPE_CAP_ARRAY_TEXTURES)) {
+      ctx->fs1da = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D_ARRAY,
+                                                 TGSI_INTERPOLATE_LINEAR);
+      ctx->fs2da = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D_ARRAY,
+                                                 TGSI_INTERPOLATE_LINEAR);
+   }
 
 
    /* vertex data that doesn't change */
-- 
cgit v1.2.3


From ca8a91ff7eb7e3fb4595763ea71f427b97e426c6 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Sat, 26 Feb 2011 02:32:22 +0100
Subject: util: Don't destroy shaders null shaders

Fixes regression from a08e612fd8e7ca2ac2fef8961e56e5b094033717
---
 src/gallium/auxiliary/util/u_gen_mipmap.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 6fba6000ba8..4f1b0e71934 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -1451,8 +1451,10 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
 {
    struct pipe_context *pipe = ctx->pipe;
 
-   pipe->delete_fs_state(pipe, ctx->fs2da);
-   pipe->delete_fs_state(pipe, ctx->fs1da);
+   if (ctx->fs2da)
+      pipe->delete_fs_state(pipe, ctx->fs2da);
+   if (ctx->fs1da)
+      pipe->delete_fs_state(pipe, ctx->fs1da);
    pipe->delete_fs_state(pipe, ctx->fsCube);
    pipe->delete_fs_state(pipe, ctx->fs3d);
    pipe->delete_fs_state(pipe, ctx->fs2d);
-- 
cgit v1.2.3


From 53fe5b334ee2d373cec861580121f3ece06bdd07 Mon Sep 17 00:00:00 2001
From: Alan Hourihane <alanh@vmware.com>
Date: Sat, 26 Feb 2011 10:30:19 +0000
Subject: Check for out of memory when creating fence

---
 src/gallium/drivers/llvmpipe/lp_fence.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c
index 3a55e76bc35..a21a3c74484 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.c
+++ b/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -47,6 +47,9 @@ lp_fence_create(unsigned rank)
    static int fence_id;
    struct lp_fence *fence = CALLOC_STRUCT(lp_fence);
 
+   if (!fence)
+      return NULL;
+
    pipe_reference_init(&fence->reference, 1);
 
    pipe_mutex_init(fence->mutex);
-- 
cgit v1.2.3


From 9a371b938c16d4fcb43e4e5ce2fbc2756202752a Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <wallbraker@gmail.com>
Date: Sat, 26 Feb 2011 01:42:19 +0100
Subject: i915g: Use unchecked writes in sw winsys batchbuffer

---
 src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
index 8085591c8eb..d246bf64138 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
+++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
@@ -107,16 +107,16 @@ i915_sw_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch,
 
 #ifdef INTEL_ALWAYS_FLUSH
    /* MI_FLUSH | FLUSH_MAP_CACHE */
-   i915_winsys_batchbuffer_dword(ibatch, (0x4<<23)|(1<<0));
+   i915_winsys_batchbuffer_dword_unchecked(ibatch, (0x4<<23)|(1<<0));
    used += 4;
 #endif
 
    if ((used & 4) == 0) {
       /* MI_NOOP */
-      i915_winsys_batchbuffer_dword(ibatch, 0);
+      i915_winsys_batchbuffer_dword_unchecked(ibatch, 0);
    }
    /* MI_BATCH_BUFFER_END */
-   i915_winsys_batchbuffer_dword(ibatch, (0xA<<23));
+   i915_winsys_batchbuffer_dword_unchecked(ibatch, (0xA<<23));
 
    used = batch->base.ptr - batch->base.map;
    assert((used & 4) == 0);
-- 
cgit v1.2.3


From acc290aff0944c3b5d1a5f40b67ddade6d2a6894 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <wallbraker@gmail.com>
Date: Sat, 26 Feb 2011 15:24:13 +0100
Subject: i915g: Use the same debug env vars in drm and sw winsys

---
 src/gallium/winsys/i915/sw/i915_sw_winsys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/winsys/i915/sw/i915_sw_winsys.c b/src/gallium/winsys/i915/sw/i915_sw_winsys.c
index 058ddc44aaf..fc48da6fb92 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_winsys.c
+++ b/src/gallium/winsys/i915/sw/i915_sw_winsys.c
@@ -50,7 +50,7 @@ i915_sw_winsys_create()
    isws->base.pci_id = deviceID;
    isws->max_batch_size = 16 * 4096;
 
-   isws->dump_cmd = debug_get_bool_option("INTEL_DUMP_CMD", FALSE);
+   isws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE);
 
    return &isws->base;
 }
-- 
cgit v1.2.3


From 1df1e0841de7da9587e8b1f5d43304627305d22d Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 25 Feb 2011 18:03:16 +0100
Subject: i915g: simplify math in constants emission

The old code even falls apart for nr == 0 (which is caught earlier, but)!

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 src/gallium/drivers/i915/i915_state_emit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 509d487b498..510fb1ccc63 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -362,7 +362,7 @@ i915_emit_hardware_state(struct i915_context *i915 )
          uint i;
 
          OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
-         OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) );
+	 OUT_BATCH((1 << nr) - 1);
 
          for (i = 0; i < nr; i++) {
             const uint *c;
-- 
cgit v1.2.3


From b8e44f648eac07d9a8c113b19a9097626c24a61f Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 25 Feb 2011 23:32:48 +0100
Subject: i915g: fix null deref in draw_rect emission

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 src/gallium/drivers/i915/i915_state_emit.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 510fb1ccc63..a9a9d7fcb4e 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -413,7 +413,6 @@ i915_emit_hardware_state(struct i915_context *i915 )
    {
       uint w, h;
       struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
-      struct i915_texture *tex = i915_texture(cbuf_surface->texture);
       unsigned x, y;
       int layer;
       uint32_t draw_offset;
@@ -422,10 +421,15 @@ i915_emit_hardware_state(struct i915_context *i915 )
       ret = framebuffer_size(&i915->framebuffer, &w, &h);
       assert(ret);
 
-      layer = cbuf_surface->u.tex.first_layer;
+      if (cbuf_surface) {
+	 struct i915_texture *tex = i915_texture(cbuf_surface->texture);
+	 layer = cbuf_surface->u.tex.first_layer;
+
+	 x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx;
+	 y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy;
 
-      x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx;
-      y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy;
+      } else
+	 x = y = 0;
 
       draw_offset = x | (y << 16);
 
-- 
cgit v1.2.3


From 052122a8cd759e8749fee3412bae5d905ee0d965 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <wallbraker@gmail.com>
Date: Sat, 26 Feb 2011 15:45:13 +0100
Subject: i915g: Handle null constants properly

---
 src/gallium/drivers/i915/i915_context.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index 707b2e9f956..f970f8a7009 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -73,10 +73,13 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    draw_set_mapped_index_buffer(draw, mapped_indices);
 
    if (cbuf_dirty) {
-      draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
-                                      i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data,
-                                      (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * 
+      if (i915->constants[PIPE_SHADER_VERTEX])
+         draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
+                                         i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data,
+                                         (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * 
                                          4 * sizeof(float)));
+      else
+         draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0);
    }
 
    /*
-- 
cgit v1.2.3


From 99b9019716d0a5cfc7438677d2e11090d676c054 Mon Sep 17 00:00:00 2001
From: Arkadiusz Miskiewicz <arekm@maven.pl>
Date: Sat, 26 Feb 2011 10:26:09 -0800
Subject: glsl/Makefile: Remove builtin_function.cpp if generation fails.

Fixes bug #34346.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 876f0dfc2a5..df031d2d548 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -208,6 +208,6 @@ builtin_compiler: $(GLSL2_OBJECTS) $(OBJECTS) builtin_stubs.o
 
 builtin_function.cpp: builtins/profiles/* builtins/ir/* builtins/tools/generate_builtins.py builtins/tools/texture_builtins.py builtin_compiler
 	@echo Regenerating builtin_function.cpp...
-	$(PYTHON2) $(PYTHON_FLAGS) builtins/tools/generate_builtins.py ./builtin_compiler > builtin_function.cpp
+	$(PYTHON2) $(PYTHON_FLAGS) builtins/tools/generate_builtins.py ./builtin_compiler > builtin_function.cpp || rm -f builtin_function.cpp
 
 -include depend
-- 
cgit v1.2.3


From 11f9ec5422b45f2bbcffec26f692a6a22e0aaef2 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Sat, 26 Feb 2011 20:12:27 +0100
Subject: gallivm: Initialize stack values

valgrind gives me a warning with llvmpipe with profile builds but
not debug builds, this seems to fix the issue at least.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index e685f4b73f0..1fec3adf5b1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -832,14 +832,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                        LLVMValueRef *colors_out)
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
-   LLVMValueRef size0;
-   LLVMValueRef size1;
-   LLVMValueRef row_stride0_vec;
-   LLVMValueRef row_stride1_vec;
-   LLVMValueRef img_stride0_vec;
-   LLVMValueRef img_stride1_vec;
-   LLVMValueRef data_ptr0;
-   LLVMValueRef data_ptr1;
+   LLVMValueRef size0 = NULL;
+   LLVMValueRef size1 = NULL;
+   LLVMValueRef row_stride0_vec = NULL;
+   LLVMValueRef row_stride1_vec = NULL;
+   LLVMValueRef img_stride0_vec = NULL;
+   LLVMValueRef img_stride1_vec = NULL;
+   LLVMValueRef data_ptr0 = NULL;
+   LLVMValueRef data_ptr1 = NULL;
    LLVMValueRef colors0[4], colors1[4];
    unsigned chan;
 
-- 
cgit v1.2.3


From 132dc0b6d2aef79920d750d8f4f3852f7e2d599a Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sat, 26 Feb 2011 19:11:32 +0100
Subject: i915g: make dynamic state emission actually lazy

Premature semicolon.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 src/gallium/drivers/i915/i915_state_emit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index a9a9d7fcb4e..1c77de1acce 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -223,7 +223,7 @@ i915_emit_hardware_state(struct i915_context *i915 )
    {
       int i;
       for (i = 0; i < I915_MAX_DYNAMIC; i++) {
-         if (i915->dynamic_dirty & (1 << i));
+         if (i915->dynamic_dirty & (1 << i))
             OUT_BATCH(i915->current.dynamic[i]);
       }
    }
-- 
cgit v1.2.3


From 49d7e48b33264d94e30af6129c281b6acafa9427 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 3 Feb 2011 17:26:02 -0800
Subject: mesa: Add new MESA_multithread_makecurrent extension.

This extension allows a client to bind one context in multiple threads
simultaneously.  It is then up to the client to manage synchronization of
access to the GL, just as normal multithreaded GL from multiple contexts
requires synchronization management to shared objects.
---
 docs/MESA_multithread_makecurrent.spec | 135 +++++++++++++++++++++++++++++++++
 src/glx/glxclient.h                    |   4 +-
 src/glx/glxcurrent.c                   |  55 ++++++++------
 src/glx/glxextensions.c                |   1 +
 src/glx/glxextensions.h                |   1 +
 5 files changed, 172 insertions(+), 24 deletions(-)
 create mode 100644 docs/MESA_multithread_makecurrent.spec

diff --git a/docs/MESA_multithread_makecurrent.spec b/docs/MESA_multithread_makecurrent.spec
new file mode 100644
index 00000000000..68ade762436
--- /dev/null
+++ b/docs/MESA_multithread_makecurrent.spec
@@ -0,0 +1,135 @@
+Name
+
+    MESA_multithread_makecurrent
+
+Name Strings
+
+    GLX_MESA_multithread_makecurrent
+
+Contact
+
+    Eric Anholt (eric@anholt.net)
+
+Status
+
+    Not shipping.
+
+Version
+
+    Last Modified Date:  21 February 2011
+
+Number
+
+    TBD
+
+Dependencies
+
+    OpenGL 1.0 or later is required.
+    GLX 1.3 or later is required.
+
+Overview
+
+    The GLX context setup encourages multithreaded applications to
+    create a context per thread which each operate on their own
+    objects in parallel, and leaves synchronization for write access
+    to shared objects up to the application.
+
+    For some applications, maintaining per-thread contexts and
+    ensuring that the glFlush happens in one thread before another
+    thread starts working on that object is difficult.  For them,
+    using the same context across multiple threads and protecting its
+    usage with a mutex is both higher performance and easier to
+    implement.  This extension gives those applications that option by
+    relaxing the context binding requirements.
+
+    This new behavior matches the requirements of AGL, while providing
+    a feature not specified in WGL.
+
+IP Status
+
+    Open-source; freely implementable.
+
+Issues
+
+    None.
+
+New Procedures and Functions
+
+    None.
+
+New Tokens
+
+    None.
+
+Changes to Chapter 3 of the GLX 1.3 Specification (Functions and Errors)
+
+    Remove the following sentence from section 3.3.7 Rendering Contexts:
+	If ctx is current to some other thread, then
+	glXMakeContextCurrent will generate a BadAccess error.
+
+    Remove the following sentence from section 3.5 Rendering Contexts:
+	If ctx is current to some other thread, then
+	glXMakeCurrent will generate a BadAccess error.
+
+GLX Protocol
+
+    None.  The GLX extension is client-side.
+
+Errors
+
+    None.
+
+New State
+
+    None.
+
+Issues
+
+    (1) What happens if the app binds a context/drawable in multiple
+	threads, then binds a different context/thread in one of them?
+
+    As with binding a new context from the current thread, the old
+    context's refcount is reduced and the new context's refcount is
+    increased.
+
+    (2) What happens if the app binds a context/drawable in multiple
+	threads, then binds None/None in one of them?
+
+    The GLX context is unreferenced from that thread, and the other
+    threads retain their GLX context binding.
+
+    (3) What happens if the app binds a context/drawable in 7 threads,
+	then destroys the context in one of them?
+
+    As with GLX context destruction previously, the XID is destroyed
+    but the context remains usable by threads that have the context
+    current.
+
+    (4) What happens if the app binds a new drawable/readable with
+        glXMakeCurrent() when it is already bound to another thread?
+
+    The context becomes bound to the new drawable/readable, and
+    further rendering in either thread will use the new
+    drawable/readable.
+
+    (5) What requirements should be placed on the user managing contexts
+        from multiple threads?
+
+    The intention is to allow multithreaded access to the GL at the
+    minimal performance cost, so requiring that the GL do general
+    synchronization (beyond that already required by context sharing)
+    is not an option, and synchronizing of GL's access to the GL
+    context between multiple threads is left to the application to do
+    across GL calls.  However, it would be unfortunate for a library
+    doing multithread_makecurrent to require that other libraries
+    share in synchronization for binding of their own contexts, so the
+    refcounting of the contexts is required to be threadsafe.
+
+Revision History
+
+    20 November 2009 Eric Anholt - initial specification
+    22 November 2009 Eric Anholt - added issues from Ian Romanick.
+    3 February 2011 Eric Anholt - updated with resolution to issues 1-3
+    3 February 2011 Eric Anholt - added issue 4, 5
+    21 February 2011 Eric Anholt - Include glXMakeCurrent() sentence
+    along with glXMakeContextCurrent() for removal.
diff --git a/src/glx/glxclient.h b/src/glx/glxclient.h
index fdcef8075a8..2b6966f2e08 100644
--- a/src/glx/glxclient.h
+++ b/src/glx/glxclient.h
@@ -419,9 +419,9 @@ struct glx_context
    /*@} */
 
    /**
-    * Thread ID we're currently current in. Zero if none.
+    * Number of threads we're currently current in.
     */
-   unsigned long thread_id;
+   unsigned long thread_refcount;
 
    char gl_extension_bits[__GL_EXT_BYTES];
 };
diff --git a/src/glx/glxcurrent.c b/src/glx/glxcurrent.c
index 36317383544..9a6499037b1 100644
--- a/src/glx/glxcurrent.c
+++ b/src/glx/glxcurrent.c
@@ -216,6 +216,16 @@ MakeContextCurrent(Display * dpy, GLXDrawable draw,
    struct glx_context *oldGC = __glXGetCurrentContext();
    int ret = Success;
 
+   /* XXX: If this is left out, then libGL ends up not having this
+    * symbol, and drivers using it fail to load.  Compare the
+    * implementation of this symbol to _glapi_noop_enable_warnings(),
+    * though, which gets into the library despite no callers, the same
+    * prototypes, and the same compile flags to the files containing
+    * them.  Moving the definition to glapi_nop.c gets it into the
+    * library, though.
+    */
+   (void)_glthread_GetID();
+
    /* Make sure that the new context has a nonzero ID.  In the request,
     * a zero context ID is used only to mean that we bind to no current
     * context.
@@ -236,41 +246,42 @@ MakeContextCurrent(Display * dpy, GLXDrawable draw,
 
    _glapi_check_multithread();
 
-   if (gc != NULL && gc->thread_id != 0 && gc->thread_id != _glthread_GetID()) {
-      __glXGenerateError(dpy, gc, gc->xid,
-                         BadAccess, X_GLXMakeContextCurrent);
-      return False;
-   }
-
+   __glXLock();
    if (oldGC == gc &&
-       gc->currentDrawable == draw && gc->currentReadable == read)
+       gc->currentDrawable == draw && gc->currentReadable == read) {
+      __glXUnlock();
       return True;
+   }
 
    if (oldGC != &dummyContext) {
-      oldGC->vtable->unbind(oldGC, gc);
-      oldGC->currentDpy = 0;
-      oldGC->currentDrawable = None;
-      oldGC->currentReadable = None;
-      oldGC->thread_id = 0;
+      if (--oldGC->thread_refcount == 0) {
+	 oldGC->vtable->unbind(oldGC, gc);
+	 oldGC->currentDpy = 0;
+	 oldGC->currentDrawable = None;
+	 oldGC->currentReadable = None;
+
+	 if (oldGC->xid == None && oldGC != gc) {
+	    /* We are switching away from a context that was
+	     * previously destroyed, so we need to free the memory
+	     * for the old handle. */
+	    oldGC->vtable->destroy(oldGC);
+	 }
+      }
    }
 
    if (gc) {
-      gc->currentDpy = dpy;
-      gc->currentDrawable = draw;
-      gc->currentReadable = read;
-      gc->thread_id = _glthread_GetID();
+      if (gc->thread_refcount++ == 0) {
+	 gc->currentDpy = dpy;
+	 gc->currentDrawable = draw;
+	 gc->currentReadable = read;
+      }
       __glXSetCurrentContext(gc);
       ret = gc->vtable->bind(gc, oldGC, draw, read);
    } else {
       __glXSetCurrentContextNull();
    }
 
-   if (oldGC != &dummyContext && oldGC->xid == None && oldGC != gc) {
-      /* We are switching away from a context that was
-       * previously destroyed, so we need to free the memory
-       * for the old handle. */
-      oldGC->vtable->destroy(oldGC);
-   }
+   __glXUnlock();
 
    if (ret) {
       __glXGenerateError(dpy, gc, None, ret, X_GLXMakeContextCurrent);
diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c
index 3a0e64c46d1..ffd466479b4 100644
--- a/src/glx/glxextensions.c
+++ b/src/glx/glxextensions.c
@@ -90,6 +90,7 @@ static const struct extension_info known_glx_extensions[] = {
    { GLX(MESA_agp_offset),             VER(0,0), N, N, N, Y }, /* Deprecated */
    { GLX(MESA_copy_sub_buffer),        VER(0,0), Y, N, N, N },
 #endif
+   { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N },
    { GLX(MESA_pixmap_colormap),        VER(0,0), N, N, N, N }, /* Deprecated */
    { GLX(MESA_release_buffers),        VER(0,0), N, N, N, N }, /* Deprecated */
 #ifdef GLX_USE_APPLEGL
diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h
index 78776618338..333b3f9adbd 100644
--- a/src/glx/glxextensions.h
+++ b/src/glx/glxextensions.h
@@ -43,6 +43,7 @@ enum
    MESA_agp_offset_bit,
    MESA_copy_sub_buffer_bit,
    MESA_depth_float_bit,
+   MESA_multithread_makecurrent_bit,
    MESA_pixmap_colormap_bit,
    MESA_release_buffers_bit,
    MESA_swap_control_bit,
-- 
cgit v1.2.3


From 4d01bea808592aec74be2d2a4bbb6488b9299cda Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 14 Feb 2011 18:38:33 -0800
Subject: glx: Don't do the implicit glFlush in SwapBuffers if it's the wrong
 drawable.

The GLX Spec says you only implicitly glFlush if the drawable being
swapped is the current context's drawable.
---
 src/glx/glxcmds.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
index 80eaf72b7d5..22bebab26bc 100644
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -727,11 +727,16 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable)
    xGLXSwapBuffersReq *req;
 #endif
 
+   gc = __glXGetCurrentContext();
+
 #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
    __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable);
 
    if (pdraw != NULL) {
-      glFlush();
+      if (gc && drawable == gc->currentDrawable) {
+	 glFlush();
+      }
+
       (*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0);
       return;
    }
@@ -746,7 +751,6 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable)
     ** The calling thread may or may not have a current context.  If it
     ** does, send the context tag so the server can do a flush.
     */
-   gc = __glXGetCurrentContext();
    if ((gc != NULL) && (dpy == gc->currentDpy) &&
        ((drawable == gc->currentDrawable)
         || (drawable == gc->currentReadable))) {
-- 
cgit v1.2.3


From 74cde6505c233f388e902d1daa0e9f186dd012a9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 14 Feb 2011 19:03:37 -0800
Subject: dri2: Don't call the dri2 flush hook for swapbuffers unless we have a
 context.

The driver only has one reasonable place to look for its context to
flush anything, which is the current context.  Don't bother it with
having to check.
---
 src/glx/dri2_glx.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index a275ba5b9fe..2c28bc27150 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -535,8 +535,13 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
     CARD64 ret = 0;
 
 #ifdef __DRI2_FLUSH
-    if (psc->f)
-    	(*psc->f->flush)(priv->driDrawable);
+    if (psc->f) {
+       struct glx_context *gc = __glXGetCurrentContext();
+
+       if (gc) {
+	  (*psc->f->flush)(priv->driDrawable);
+       }
+    }
 #endif
 
     /* Old servers don't send invalidate events */
-- 
cgit v1.2.3


From dea5e57861ec998cb7ee913a8819752cb9fa946b Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 14 Feb 2011 18:57:49 -0800
Subject: intel: Use the current context rather than last bound context for a
 drawable.

If another thread bound a context to the drawable then unbound it, the
driContextPriv would end up NULL.

With the previous two fixes, this fixes glx-multithread-makecurrent-2,
despite the issue not being about the multithreaded makecurrent.
---
 src/mesa/drivers/dri/intel/intel_screen.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 356d5f72d89..746da462ee2 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -104,7 +104,8 @@ static const __DRItexBufferExtension intelTexBufferExtension = {
 static void
 intelDRI2Flush(__DRIdrawable *drawable)
 {
-   struct intel_context *intel = drawable->driContextPriv->driverPrivate;
+   GET_CURRENT_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
 
    if (intel->gen < 4)
       INTEL_FIREVERTICES(intel);
-- 
cgit v1.2.3


From 5f889c5bf5221f2af2f34e47f20bd1b98c061fbe Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 24 Feb 2011 15:25:59 -0800
Subject: glx: Adjust the MESA_multithread_makecurrent spec to match
 implementation.

This came out of discussion at the office today, and we agreed that
solving this for indirect wasn't really interesting, though the
server-side change would be of a similar level of difficulty.
---
 docs/MESA_multithread_makecurrent.spec | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/docs/MESA_multithread_makecurrent.spec b/docs/MESA_multithread_makecurrent.spec
index 68ade762436..5065c2fc0a3 100644
--- a/docs/MESA_multithread_makecurrent.spec
+++ b/docs/MESA_multithread_makecurrent.spec
@@ -61,19 +61,37 @@ New Tokens
 
     None.
 
+Changes to Chapter 2 of the GLX 1.3 Specification (Functions and Errors)
+
+    Replace the following sentence from section 2.2 Rendering Contexts:
+	In addition, a rendering context can be current for only one
+	thread at a time.
+    with:
+	In addition, an indirect rendering context can be current for
+	only one thread at a time.  A direct rendering context may be
+	current to multiple threads, with synchronization of access to
+	the context thruogh the GL managed by the application through
+	mutexes.
+
 Changes to Chapter 3 of the GLX 1.3 Specification (Functions and Errors)
 
-    Remove the following sentence from section 3.3.7 Rendering Contexts:
+    Replace the following sentence from section 3.3.7 Rendering Contexts:
 	If ctx is current to some other thread, then
 	glXMakeContextCurrent will generate a BadAccess error.
+    with:
+	If ctx is an indirect context current to some other thread,
+	then glXMakeContextCurrent will generate a BadAccess error.
 
-    Remove the following sentence from section 3.5 Rendering Contexts:
+    Replace the following sentence from section 3.5 Rendering Contexts:
 	If ctx is current to some other thread, then
 	glXMakeCurrent will generate a BadAccess error.
+    with:
+	If ctx is an indirect context current to some other thread,
+	then glXMakeCurrent will generate a BadAccess error.
 
 GLX Protocol
 
-    None.  The GLX extension is client-side.
+    None.  The GLX extension only extends to direct rendering contexts.
 
 Errors
 
@@ -125,6 +143,11 @@ Issues
     share in synchronization for binding of their own contexts, so the
     refcounting of the contexts is required to be threadsafe.
 
+    (6) Does this apply to indirect contexts?
+
+    This was ignored in the initial revision of the spec.  Behavior
+    for indirect contexts is left as-is.
+
 Revision History
 
     20 November 2009 Eric Anholt - initial specification
-- 
cgit v1.2.3


From a385ac62070fa68052c77df7be62685bf6a58992 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 25 Feb 2011 16:33:40 -0800
Subject: glsl/builtins: Fix return type for textureSize sampler2DArray
 variants.

A copy and paste error.
---
 src/glsl/builtins/profiles/130.frag | 4 ++--
 src/glsl/builtins/profiles/130.vert | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/glsl/builtins/profiles/130.frag b/src/glsl/builtins/profiles/130.frag
index 43653a906f8..0e3c7ac4199 100644
--- a/src/glsl/builtins/profiles/130.frag
+++ b/src/glsl/builtins/profiles/130.frag
@@ -491,8 +491,8 @@ ivec2 textureSize( sampler1DArray sampler, int lod);
 ivec2 textureSize(isampler1DArray sampler, int lod);
 ivec2 textureSize(usampler1DArray sampler, int lod);
 ivec3 textureSize( sampler2DArray sampler, int lod);
-ivec2 textureSize(isampler2DArray sampler, int lod);
-ivec2 textureSize(usampler2DArray sampler, int lod);
+ivec3 textureSize(isampler2DArray sampler, int lod);
+ivec3 textureSize(usampler2DArray sampler, int lod);
 
 ivec2 textureSize(sampler1DArrayShadow sampler, int lod);
 ivec3 textureSize(sampler2DArrayShadow sampler, int lod);
diff --git a/src/glsl/builtins/profiles/130.vert b/src/glsl/builtins/profiles/130.vert
index 742dec6e6d5..f85b27f8f8c 100644
--- a/src/glsl/builtins/profiles/130.vert
+++ b/src/glsl/builtins/profiles/130.vert
@@ -493,8 +493,8 @@ ivec2 textureSize( sampler1DArray sampler, int lod);
 ivec2 textureSize(isampler1DArray sampler, int lod);
 ivec2 textureSize(usampler1DArray sampler, int lod);
 ivec3 textureSize( sampler2DArray sampler, int lod);
-ivec2 textureSize(isampler2DArray sampler, int lod);
-ivec2 textureSize(usampler2DArray sampler, int lod);
+ivec3 textureSize(isampler2DArray sampler, int lod);
+ivec3 textureSize(usampler2DArray sampler, int lod);
 
 ivec2 textureSize(sampler1DArrayShadow sampler, int lod);
 ivec3 textureSize(sampler2DArrayShadow sampler, int lod);
-- 
cgit v1.2.3


From beaf039f972490bc62ec87401441d4b754ae86b0 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sat, 26 Feb 2011 15:24:51 +0100
Subject: i915g: cleanup static state calculation, part 1

Move it to i915_state_static.c This way i915_emit_state.c only emits
state and doesn't (re)calculate it.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 src/gallium/drivers/i915/i915_context.h      |  7 ++
 src/gallium/drivers/i915/i915_state_emit.c   | 95 ++--------------------------
 src/gallium/drivers/i915/i915_state_static.c | 90 +++++++++++++++++++++++++-
 3 files changed, 103 insertions(+), 89 deletions(-)

diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 2cf53424f06..46c09871ffd 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -150,6 +150,13 @@ struct i915_state
    /** Describes the current hardware vertex layout */
    struct vertex_info vertex_info;
 
+   /* static state (dst/depth buffer state) */
+   struct i915_winsys_buffer *cbuf_bo;
+   unsigned cbuf_flags;
+   struct i915_winsys_buffer *depth_bo;
+   unsigned depth_flags;
+   unsigned dst_buf_vars;
+
    unsigned id;			/* track lost context events */
 };
 
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 1c77de1acce..1b2cffc91ee 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -37,34 +37,6 @@
 
 #include "util/u_math.h"
 
-static unsigned translate_format( enum pipe_format format )
-{
-   switch (format) {
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-      return COLOR_BUF_ARGB8888;
-   case PIPE_FORMAT_B5G6R5_UNORM:
-      return COLOR_BUF_RGB565;
-   default:
-      assert(0);
-      return 0;
-   }
-}
-
-static unsigned translate_depth_format( enum pipe_format zformat )
-{
-   switch (zformat) {
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
-      return DEPTH_FRMT_24_FIXED_8_OTHER;
-   case PIPE_FORMAT_Z16_UNORM:
-      return DEPTH_FRMT_16_FIXED;
-   default:
-      assert(0);
-      return 0;
-   }
-}
-
-
 /**
  * Examine framebuffer state to determine width, height.
  */
@@ -88,22 +60,6 @@ framebuffer_size(const struct pipe_framebuffer_state *fb,
    }
 }
 
-static inline uint32_t
-buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling)
-{
-         uint32_t tiling_bits = 0;
-
-         switch (tiling) {
-         case I915_TILE_Y:
-            tiling_bits |= BUF_3D_TILE_WALK_Y;
-         case I915_TILE_X:
-            tiling_bits |= BUF_3D_TILED_SURFACE;
-         case I915_TILE_NONE:
-            break;
-         }
-
-         return tiling_bits;
-}
 
 /* Push the state into the sarea and/or texture memory.
  */
@@ -233,64 +189,27 @@ i915_emit_hardware_state(struct i915_context *i915 )
    /* 8 dwords, 2 relocs */
    if (i915->hardware_dirty & I915_HW_STATIC)
    {
-      struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
-      struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
-
-      if (cbuf_surface) {
-         struct i915_texture *tex = i915_texture(cbuf_surface->texture);
-         assert(tex);
-
+      if (i915->current.cbuf_bo) {
          OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
-
-         OUT_BATCH(BUF_3D_ID_COLOR_BACK |
-                   BUF_3D_PITCH(tex->stride) |  /* pitch in bytes */
-                   buf_3d_tiling_bits(tex->tiling));
-
-         OUT_RELOC(tex->buffer,
+         OUT_BATCH(i915->current.cbuf_flags);
+         OUT_RELOC(i915->current.cbuf_bo,
                    I915_USAGE_RENDER,
                    0);
       }
 
       /* What happens if no zbuf??
        */
-      if (depth_surface) {
-         struct i915_texture *tex = i915_texture(depth_surface->texture);
-         unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level,
-                                               depth_surface->u.tex.first_layer);
-         assert(tex);
-         assert(offset == 0);
-
+      if (i915->current.depth_bo) {
          OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
-
-         assert(tex);
-         OUT_BATCH(BUF_3D_ID_DEPTH |
-                   BUF_3D_PITCH(tex->stride) |  /* pitch in bytes */
-                   buf_3d_tiling_bits(tex->tiling));
-
-         OUT_RELOC(tex->buffer,
+         OUT_BATCH(i915->current.depth_flags);
+         OUT_RELOC(i915->current.depth_bo,
                    I915_USAGE_RENDER,
                    0);
       }
 
       {
-         unsigned cformat, zformat = 0;
-
-         if (cbuf_surface)
-            cformat = cbuf_surface->format;
-         else
-            cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */
-         cformat = translate_format(cformat);
-
-         if (depth_surface) 
-            zformat = translate_depth_format( i915->framebuffer.zsbuf->format );
-
          OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
-         OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */
-                   DSTORG_VERT_BIAS(0x8) | /* .5 */
-                   LOD_PRECLAMP_OGL |
-                   TEX_DEFAULT_COLOR_OGL |
-                   cformat |
-                   zformat );
+         OUT_BATCH(i915->current.dst_buf_vars);
       }
    }
 #endif
diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c
index dc9a4c1e2fd..94bbf3f5fd4 100644
--- a/src/gallium/drivers/i915/i915_state_static.c
+++ b/src/gallium/drivers/i915/i915_state_static.c
@@ -27,16 +27,104 @@
 #include "i915_reg.h"
 #include "i915_context.h"
 #include "i915_state.h"
+#include "i915_resource.h"
 
 
 /***********************************************************************
  * Update framebuffer state
  */
+static unsigned translate_format(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+      return COLOR_BUF_ARGB8888;
+   case PIPE_FORMAT_B5G6R5_UNORM:
+      return COLOR_BUF_RGB565;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+static unsigned translate_depth_format(enum pipe_format zformat)
+{
+   switch (zformat) {
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+      return DEPTH_FRMT_24_FIXED_8_OTHER;
+   case PIPE_FORMAT_Z16_UNORM:
+      return DEPTH_FRMT_16_FIXED;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+static inline uint32_t
+buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling)
+{
+   uint32_t tiling_bits = 0;
+
+   switch (tiling) {
+   case I915_TILE_Y:
+      tiling_bits |= BUF_3D_TILE_WALK_Y;
+   case I915_TILE_X:
+      tiling_bits |= BUF_3D_TILED_SURFACE;
+   case I915_TILE_NONE:
+      break;
+   }
+
+   return tiling_bits;
+}
+
 static void update_framebuffer(struct i915_context *i915)
 {
-   /* HW emit currently references framebuffer state directly:
+   struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
+   struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
+   unsigned cformat, zformat;
+
+   if (cbuf_surface) {
+      struct i915_texture *tex = i915_texture(cbuf_surface->texture);
+      assert(tex);
+
+      i915->current.cbuf_bo = tex->buffer;
+      i915->current.cbuf_flags = BUF_3D_ID_COLOR_BACK |
+                                 BUF_3D_PITCH(tex->stride) |  /* pitch in bytes */
+                                 buf_3d_tiling_bits(tex->tiling);
+      cformat = cbuf_surface->format;
+   } else {
+      i915->current.cbuf_bo = NULL;
+      cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */
+   }
+   cformat = translate_format(cformat);
+
+   /* What happens if no zbuf??
     */
+   if (depth_surface) {
+      struct i915_texture *tex = i915_texture(depth_surface->texture);
+      unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level,
+                                            depth_surface->u.tex.first_layer);
+      assert(tex);
+      assert(offset == 0);
+
+      i915->current.depth_bo = tex->buffer;
+      i915->current.depth_flags = BUF_3D_ID_DEPTH |
+                                  BUF_3D_PITCH(tex->stride) |  /* pitch in bytes */
+                                  buf_3d_tiling_bits(tex->tiling);
+      zformat = translate_depth_format(depth_surface->format);
+   } else {
+      i915->current.depth_bo = NULL;
+      zformat = 0;
+   }
+
+   i915->current.dst_buf_vars = DSTORG_HORT_BIAS(0x8) | /* .5 */
+                                DSTORG_VERT_BIAS(0x8) | /* .5 */
+                                LOD_PRECLAMP_OGL |
+                                TEX_DEFAULT_COLOR_OGL |
+                                cformat |
+                                zformat;
+
    i915->hardware_dirty |= I915_HW_STATIC;
 }
 
-- 
cgit v1.2.3


From f58c11af72303a7b8ec480b4f216e4a17882e0f4 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sat, 26 Feb 2011 15:53:01 +0100
Subject: i915g: cleanup static state calculation, part 2

Now also for the DRAW_RECT command

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 src/gallium/drivers/i915/i915_context.h      |  2 ++
 src/gallium/drivers/i915/i915_state_emit.c   | 51 ++--------------------------
 src/gallium/drivers/i915/i915_state_static.c | 43 +++++++++++++++++++++++
 3 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 46c09871ffd..4243dfaff89 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -156,6 +156,8 @@ struct i915_state
    struct i915_winsys_buffer *depth_bo;
    unsigned depth_flags;
    unsigned dst_buf_vars;
+   uint32_t draw_offset;
+   uint32_t draw_size;
 
    unsigned id;			/* track lost context events */
 };
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 1b2cffc91ee..f7c8ed10183 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -37,29 +37,6 @@
 
 #include "util/u_math.h"
 
-/**
- * Examine framebuffer state to determine width, height.
- */
-static boolean
-framebuffer_size(const struct pipe_framebuffer_state *fb,
-                 uint *width, uint *height)
-{
-   if (fb->cbufs[0]) {
-      *width = fb->cbufs[0]->width;
-      *height = fb->cbufs[0]->height;
-      return TRUE;
-   }
-   else if (fb->zsbuf) {
-      *width = fb->zsbuf->width;
-      *height = fb->zsbuf->height;
-      return TRUE;
-   }
-   else {
-      *width = *height = 0;
-      return FALSE;
-   }
-}
-
 
 /* Push the state into the sarea and/or texture memory.
  */
@@ -330,35 +307,13 @@ i915_emit_hardware_state(struct i915_context *i915 )
    /* 6 dwords, 0 relocs */
    if (i915->hardware_dirty & I915_HW_STATIC)
    {
-      uint w, h;
-      struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
-      unsigned x, y;
-      int layer;
-      uint32_t draw_offset;
-      boolean ret;
-
-      ret = framebuffer_size(&i915->framebuffer, &w, &h);
-      assert(ret);
-
-      if (cbuf_surface) {
-	 struct i915_texture *tex = i915_texture(cbuf_surface->texture);
-	 layer = cbuf_surface->u.tex.first_layer;
-
-	 x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx;
-	 y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy;
-
-      } else
-	 x = y = 0;
-
-      draw_offset = x | (y << 16);
-
       /* XXX flush only required when the draw_offset changes! */
       OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
       OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
       OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
-      OUT_BATCH(draw_offset);
-      OUT_BATCH((w - 1 + x) | ((h - 1 + y) << 16));
-      OUT_BATCH(draw_offset);
+      OUT_BATCH(i915->current.draw_offset);
+      OUT_BATCH(i915->current.draw_size);
+      OUT_BATCH(i915->current.draw_offset);
    }
 #endif
 
diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c
index 94bbf3f5fd4..fd10cbc3786 100644
--- a/src/gallium/drivers/i915/i915_state_static.c
+++ b/src/gallium/drivers/i915/i915_state_static.c
@@ -78,11 +78,38 @@ buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling)
    return tiling_bits;
 }
 
+/**
+ * Examine framebuffer state to determine width, height.
+ */
+static boolean
+framebuffer_size(const struct pipe_framebuffer_state *fb,
+                 uint *width, uint *height)
+{
+   if (fb->cbufs[0]) {
+      *width = fb->cbufs[0]->width;
+      *height = fb->cbufs[0]->height;
+      return TRUE;
+   }
+   else if (fb->zsbuf) {
+      *width = fb->zsbuf->width;
+      *height = fb->zsbuf->height;
+      return TRUE;
+   }
+   else {
+      *width = *height = 0;
+      return FALSE;
+   }
+}
+
 static void update_framebuffer(struct i915_context *i915)
 {
    struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
    struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
    unsigned cformat, zformat;
+   unsigned x, y, w, h;
+   int layer;
+   uint32_t draw_offset;
+   boolean ret;
 
    if (cbuf_surface) {
       struct i915_texture *tex = i915_texture(cbuf_surface->texture);
@@ -93,9 +120,15 @@ static void update_framebuffer(struct i915_context *i915)
                                  BUF_3D_PITCH(tex->stride) |  /* pitch in bytes */
                                  buf_3d_tiling_bits(tex->tiling);
       cformat = cbuf_surface->format;
+
+      layer = cbuf_surface->u.tex.first_layer;
+
+      x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx;
+      y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy;
    } else {
       i915->current.cbuf_bo = NULL;
       cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */
+      x = y = 0;
    }
    cformat = translate_format(cformat);
 
@@ -125,6 +158,16 @@ static void update_framebuffer(struct i915_context *i915)
                                 cformat |
                                 zformat;
 
+   /* drawing rect calculations */
+   draw_offset = x | (y << 16);
+   ret = framebuffer_size(&i915->framebuffer, &w, &h);
+   assert(ret);
+   if (i915->current.draw_offset != draw_offset) {
+      i915->current.draw_offset = draw_offset;
+      /* XXX: only emit flush on change and not always in emit */
+   }
+   i915->current.draw_size = (w - 1 + x) | ((h - 1 + y) << 16);
+
    i915->hardware_dirty |= I915_HW_STATIC;
 }
 
-- 
cgit v1.2.3


From e20c3255e29b9f94840d1f23439ab29060c94e3f Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 27 Feb 2011 16:32:38 +0100
Subject: i915g: add raw batchbuffer dumping in drm winsys

These files can be decoded with intel_dump_decode from the intel-gpu-tools
available at:

http://cgit.freedesktop.org/xorg/app/intel-gpu-tools/

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c | 9 +++++++++
 src/gallium/winsys/i915/drm/i915_drm_winsys.c      | 1 +
 src/gallium/winsys/i915/drm/i915_drm_winsys.h      | 1 +
 3 files changed, 11 insertions(+)

diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
index afeab5eef42..54b2d7af2eb 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
@@ -5,6 +5,7 @@
 #include "i915_drm.h"
 #include "i915/i915_debug.h"
 #include <xf86drm.h>
+#include <stdio.h>
 
 #define BATCH_RESERVED 16
 
@@ -169,6 +170,14 @@ i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch,
       assert(ret == 0);
    }
 
+   if (i915_drm_winsys(ibatch->iws)->dump_raw_file) {
+      FILE *file = fopen(i915_drm_winsys(ibatch->iws)->dump_raw_file, "a");
+      if (file) {
+	 fwrite(batch->base.map, used, 1, file);
+	 fclose(file);
+      }
+   }
+
 #ifdef INTEL_RUN_SYNC
    drm_intel_bo_wait_rendering(batch->bo);
 #endif
diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.c b/src/gallium/winsys/i915/drm/i915_drm_winsys.c
index 2288b48b2bd..2c3b508d056 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_winsys.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.c
@@ -72,6 +72,7 @@ i915_drm_winsys_create(int drmFD)
    drm_intel_bufmgr_gem_enable_fenced_relocs(idws->gem_manager);
 
    idws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE);
+   idws->dump_raw_file = debug_get_option("I915_DUMP_RAW_FILE", NULL);
    idws->send_cmd = !debug_get_bool_option("I915_NO_HW", FALSE);
 
    return &idws->base;
diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.h b/src/gallium/winsys/i915/drm/i915_drm_winsys.h
index 0d74d0270c7..dae53c3e801 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_winsys.h
+++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.h
@@ -18,6 +18,7 @@ struct i915_drm_winsys
    struct i915_winsys base;
 
    boolean dump_cmd;
+   char *dump_raw_file;
    boolean send_cmd;
 
    int fd; /**< Drm file discriptor */
-- 
cgit v1.2.3


From 3c59b3eb4b6f4f673106d738b62458e6b0b38d46 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 27 Feb 2011 17:51:48 +0100
Subject: i915g/winsys: buffer validation support

v2: Add the batch bo to the libdrm validation lost, for otherwise
libdrm won't take previously used buffers into account.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 src/gallium/drivers/i915/i915_winsys.h             | 12 ++++++++++++
 src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c | 21 +++++++++++++++++++++
 src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c   |  9 +++++++++
 3 files changed, 42 insertions(+)

diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h
index e915a886c9b..4ac2f5b9777 100644
--- a/src/gallium/drivers/i915/i915_winsys.h
+++ b/src/gallium/drivers/i915/i915_winsys.h
@@ -94,6 +94,18 @@ struct i915_winsys {
    struct i915_winsys_batchbuffer *
       (*batchbuffer_create)(struct i915_winsys *iws);
 
+   /**
+    * Validate buffers for usage in this batchbuffer.
+    * Does space-checking and asorted other book-keeping.
+    *
+    * @batch
+    * @buffers array to buffers to validate
+    * @num_of_buffers size of the passed array
+    */
+   boolean (*validate_buffers)(struct i915_winsys_batchbuffer *batch,
+	 		       struct i915_winsys_buffer **buffers,
+			       int num_of_buffers);
+
    /**
     * Emit a relocation to a buffer.
     * Target position in batchbuffer is the same as ptr.
diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
index 54b2d7af2eb..7cc5af89639 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
@@ -72,6 +72,26 @@ i915_drm_batchbuffer_create(struct i915_winsys *iws)
    return &batch->base;
 }
 
+static boolean
+i915_drm_batchbuffer_validate_buffers(struct i915_winsys_batchbuffer *batch,
+				      struct i915_winsys_buffer **buffer,
+				      int num_of_buffers)
+{
+   struct i915_drm_batchbuffer *drm_batch = i915_drm_batchbuffer(batch);
+   drm_intel_bo *bos[num_of_buffers + 1];
+   int i, ret;
+
+   bos[0] = drm_batch->bo;
+   for (i = 0; i < num_of_buffers; i++)
+      bos[i+1] = intel_bo(buffer[i]);
+
+   ret = drm_intel_bufmgr_check_aperture_space(bos, num_of_buffers);
+   if (ret != 0)
+      return FALSE;
+
+   return TRUE;
+}
+
 static int
 i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
                             struct i915_winsys_buffer *buffer,
@@ -211,6 +231,7 @@ i915_drm_batchbuffer_destroy(struct i915_winsys_batchbuffer *ibatch)
 void i915_drm_winsys_init_batchbuffer_functions(struct i915_drm_winsys *idws)
 {
    idws->base.batchbuffer_create = i915_drm_batchbuffer_create;
+   idws->base.validate_buffers = i915_drm_batchbuffer_validate_buffers;
    idws->base.batchbuffer_reloc = i915_drm_batchbuffer_reloc;
    idws->base.batchbuffer_flush = i915_drm_batchbuffer_flush;
    idws->base.batchbuffer_destroy = i915_drm_batchbuffer_destroy;
diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
index d246bf64138..3d0c1fa6224 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
+++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
@@ -58,6 +58,14 @@ i915_sw_batchbuffer_create(struct i915_winsys *iws)
    return &batch->base;
 }
 
+static boolean
+i915_sw_batchbuffer_validate_buffers(struct i915_winsys_batchbuffer *batch,
+				     struct i915_winsys_buffer **buffer,
+				     int num_of_buffers)
+{
+   return TRUE;
+}
+
 static int
 i915_sw_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
                           struct i915_winsys_buffer *buffer,
@@ -146,6 +154,7 @@ i915_sw_batchbuffer_destroy(struct i915_winsys_batchbuffer *ibatch)
 void i915_sw_winsys_init_batchbuffer_functions(struct i915_sw_winsys *isws)
 {
    isws->base.batchbuffer_create = i915_sw_batchbuffer_create;
+   isws->base.validate_buffers = i915_sw_batchbuffer_validate_buffers;
    isws->base.batchbuffer_reloc = i915_sw_batchbuffer_reloc;
    isws->base.batchbuffer_flush = i915_sw_batchbuffer_flush;
    isws->base.batchbuffer_destroy = i915_sw_batchbuffer_destroy;
-- 
cgit v1.2.3


From 342016010a515ccc4492924ca260d7ff76ecb1b5 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 27 Feb 2011 21:57:31 +0100
Subject: i915g: buffer validation for render state

Also contains the first few bits for hw state atoms.

v2: Implement suggestion by Jakob Bornecrantz.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 src/gallium/drivers/i915/i915_batchbuffer.h |  8 +++
 src/gallium/drivers/i915/i915_context.h     |  3 ++
 src/gallium/drivers/i915/i915_state_emit.c  | 76 +++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+)

diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h
index d92b2ccb31e..b4a91dabb37 100644
--- a/src/gallium/drivers/i915/i915_batchbuffer.h
+++ b/src/gallium/drivers/i915/i915_batchbuffer.h
@@ -75,6 +75,14 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch,
    batch->ptr += size;
 }
 
+static INLINE boolean
+i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch,
+			     struct i915_winsys_buffer **buffers,
+			     int num_of_buffers)
+{
+   return batch->iws->validate_buffers(batch, buffers, num_of_buffers);
+}
+
 static INLINE int
 i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch,
                               struct i915_winsys_buffer *buffer,
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 4243dfaff89..f264b0611e6 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -247,6 +247,9 @@ struct i915_context {
    unsigned immediate_dirty;
    unsigned dynamic_dirty;
 
+   struct i915_winsys_buffer *validation_buffers[2 + 1 + I915_TEX_UNITS];
+   int num_validation_buffers;
+
    struct util_slab_mempool transfer_pool;
 };
 
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index f7c8ed10183..610d20a0c3f 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -36,7 +36,77 @@
 #include "pipe/p_defines.h"
 
 #include "util/u_math.h"
+#include "util/u_memory.h"
 
+struct i915_tracked_hw_state {
+   const char *name;
+   void (*validate)(struct i915_context *);
+   void (*emit)(struct i915_context *);
+   unsigned dirty;
+};
+
+
+static void
+validate_immediate(struct i915_context *i915)
+{
+   if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0))
+      i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;
+}
+
+static void
+validate_static(struct i915_context *i915)
+{
+   if (i915->current.cbuf_bo)
+      i915->validation_buffers[i915->num_validation_buffers++]
+         = i915->current.cbuf_bo;
+
+   if (i915->current.depth_bo)
+      i915->validation_buffers[i915->num_validation_buffers++]
+         = i915->current.depth_bo;
+}
+
+static void
+validate_map(struct i915_context *i915)
+{
+   const uint enabled = i915->current.sampler_enable_flags;
+   uint unit;
+   struct i915_texture *tex;
+
+
+   for (unit = 0; unit < I915_TEX_UNITS; unit++) {
+      if (enabled & (1 << unit)) {
+	 tex = i915_texture(i915->fragment_sampler_views[unit]->texture);
+	 i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;
+      }
+   }
+}
+
+const static struct i915_tracked_hw_state hw_atoms[] = {
+   { "immediate", validate_immediate, NULL, I915_HW_IMMEDIATE },
+   { "static", validate_static, NULL, I915_HW_STATIC },
+   { "map", validate_map, NULL, I915_HW_MAP }
+};
+
+static boolean
+i915_validate_state(struct i915_context *i915)
+{
+   int i;
+
+   i915->num_validation_buffers = 0;
+
+   for (i = 0; i < Elements(hw_atoms); i++)
+      if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].validate)
+	 hw_atoms[i].validate(i915);
+
+   if (i915->num_validation_buffers == 0)
+      return TRUE;
+
+   if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers,
+				     i915->num_validation_buffers))
+      return FALSE;
+
+   return TRUE;
+}
 
 /* Push the state into the sarea and/or texture memory.
  */
@@ -68,8 +138,14 @@ i915_emit_hardware_state(struct i915_context *i915 )
    if (I915_DBG_ON(DBG_ATOMS))
       i915_dump_hardware_dirty(i915, __FUNCTION__);
 
+   if (!i915_validate_state(i915)) {
+      FLUSH_BATCH(NULL);
+      assert(i915_validate_state(i915));
+   }
+
    if(!BEGIN_BATCH(dwords, relocs)) {
       FLUSH_BATCH(NULL);
+      assert(i915_validate_state(i915));
       assert(BEGIN_BATCH(dwords, relocs));
    }
 
-- 
cgit v1.2.3


From f90fa55347c641cd0bcdde121909045f0dedbd66 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 25 Feb 2011 22:51:03 +0100
Subject: i915g: buffer validation for blitter

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 src/gallium/drivers/i915/i915_blit.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c
index 97c25665156..9a390e51341 100644
--- a/src/gallium/drivers/i915/i915_blit.c
+++ b/src/gallium/drivers/i915/i915_blit.c
@@ -49,6 +49,11 @@ i915_fill_blit(struct i915_context *i915,
    I915_DBG(DBG_BLIT, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
             __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);
 
+   if(!i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1)) {
+      FLUSH_BATCH(NULL);
+      assert(i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1));
+   }
+
    switch (cpp) {
    case 1:
    case 2:
@@ -94,6 +99,7 @@ i915_copy_blit(struct i915_context *i915,
    unsigned CMD, BR13;
    int dst_y2 = dst_y + h;
    int dst_x2 = dst_x + w;
+   struct i915_winsys_buffer *buffers[2] = {src_buffer, dst_buffer};
 
 
    I915_DBG(DBG_BLIT,
@@ -102,6 +108,11 @@ i915_copy_blit(struct i915_context *i915,
             src_buffer, src_pitch, src_offset, src_x, src_y,
             dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
 
+   if(!i915_winsys_validate_buffers(i915->batch, buffers, 2)) {
+      FLUSH_BATCH(NULL);
+      assert(i915_winsys_validate_buffers(i915->batch, buffers, 2));
+   }
+
    switch (cpp) {
    case 1:
    case 2:
-- 
cgit v1.2.3


From d42c9433b0a3d9b3a198261d8037ce0d4595452d Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 25 Feb 2011 23:40:27 +0100
Subject: i915g: implement cache flushing

With an extremely dumb strategy. But it's the same i915c employs.

Also improve the hw_atom code slightly by statically specifying the
required batch space. For extremely variably stuff (shaders, constants)
it would probably be better to add a new parameter to the hw_atom->validate
function.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 src/gallium/drivers/i915/i915_blit.c         |  4 +++
 src/gallium/drivers/i915/i915_context.c      |  1 +
 src/gallium/drivers/i915/i915_context.h      | 13 ++++++++
 src/gallium/drivers/i915/i915_flush.c        |  2 ++
 src/gallium/drivers/i915/i915_state_emit.c   | 47 +++++++++++++++++++++++-----
 src/gallium/drivers/i915/i915_state_static.c |  3 ++
 6 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c
index 9a390e51341..f885417f8ed 100644
--- a/src/gallium/drivers/i915/i915_blit.c
+++ b/src/gallium/drivers/i915/i915_blit.c
@@ -81,6 +81,8 @@ i915_fill_blit(struct i915_context *i915,
    OUT_BATCH(((y + h) << 16) | (x + w));
    OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
    OUT_BATCH(color);
+
+   i915_set_flush_dirty(i915, I915_FLUSH_CACHE);
 }
 
 void
@@ -153,4 +155,6 @@ i915_copy_blit(struct i915_context *i915,
    OUT_BATCH((src_y << 16) | src_x);
    OUT_BATCH(((int) src_pitch & 0xffff));
    OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset);
+
+   i915_set_flush_dirty(i915, I915_FLUSH_CACHE);
 }
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index f970f8a7009..cbf919754e5 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -168,6 +168,7 @@ i915_create_context(struct pipe_screen *screen, void *priv)
    i915->hardware_dirty = ~0;
    i915->immediate_dirty = ~0;
    i915->dynamic_dirty = ~0;
+   i915->flush_dirty = 0;
 
    /* Batch stream debugging is a bit hacked up at the moment:
     */
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index f264b0611e6..1da637d068e 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -246,6 +246,7 @@ struct i915_context {
    unsigned hardware_dirty;
    unsigned immediate_dirty;
    unsigned dynamic_dirty;
+   unsigned flush_dirty;
 
    struct i915_winsys_buffer *validation_buffers[2 + 1 + I915_TEX_UNITS];
    int num_validation_buffers;
@@ -289,6 +290,18 @@ struct i915_context {
 #define I915_HW_CONSTANTS         (1<<I915_CACHE_CONSTANTS)
 #define I915_HW_IMMEDIATE         (1<<(I915_MAX_CACHE+0))
 #define I915_HW_INVARIANT         (1<<(I915_MAX_CACHE+1))
+#define I915_HW_FLUSH             (1<<(I915_MAX_CACHE+1))
+
+/* hw flush handling */
+#define I915_FLUSH_CACHE		1
+#define I915_PIPELINE_FLUSH		2
+
+static INLINE
+void i915_set_flush_dirty(struct i915_context *i915, unsigned flush)
+{
+   i915->hardware_dirty |= I915_HW_FLUSH;
+   i915->flush_dirty |= flush;
+}
 
 
 /***********************************************************************
diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c
index 911c051d1f2..22a2c7b2cb4 100644
--- a/src/gallium/drivers/i915/i915_flush.c
+++ b/src/gallium/drivers/i915/i915_flush.c
@@ -96,4 +96,6 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence)
    i915->hardware_dirty = ~0;
    i915->immediate_dirty = ~0;
    i915->dynamic_dirty = ~0;
+   /* kernel emits flushes in between batchbuffers */
+   i915->flush_dirty = 0;
 }
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 610d20a0c3f..0323ad940f9 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -42,10 +42,25 @@ struct i915_tracked_hw_state {
    const char *name;
    void (*validate)(struct i915_context *);
    void (*emit)(struct i915_context *);
-   unsigned dirty;
+   unsigned dirty, batch_space;
 };
 
 
+static void
+emit_flush(struct i915_context *i915)
+{
+   /* Cache handling is very cheap atm. State handling can request to flushes:
+    * - I915_FLUSH_CACHE which is a flush everything request and
+    * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush.
+    * Because the cache handling is so dumb, no explicit "invalidate map cache".
+    * Also, the first is a strict superset of the latter, so the following logic
+    * works. */
+   if (i915->flush_dirty & I915_FLUSH_CACHE)
+      OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE);
+   else if (i915->flush_dirty & I915_PIPELINE_FLUSH)
+      OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
+}
+
 static void
 validate_immediate(struct i915_context *i915)
 {
@@ -82,21 +97,25 @@ validate_map(struct i915_context *i915)
 }
 
 const static struct i915_tracked_hw_state hw_atoms[] = {
+   { "flush", NULL, emit_flush, I915_HW_FLUSH, 1 },
    { "immediate", validate_immediate, NULL, I915_HW_IMMEDIATE },
    { "static", validate_static, NULL, I915_HW_STATIC },
    { "map", validate_map, NULL, I915_HW_MAP }
 };
 
 static boolean
-i915_validate_state(struct i915_context *i915)
+i915_validate_state(struct i915_context *i915, unsigned *batch_space)
 {
    int i;
 
    i915->num_validation_buffers = 0;
+   *batch_space = 0;
 
    for (i = 0; i < Elements(hw_atoms); i++)
-      if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].validate)
+      if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].validate) {
 	 hw_atoms[i].validate(i915);
+	 *batch_space += hw_atoms[i].batch_space;
+      }
 
    if (i915->num_validation_buffers == 0)
       return TRUE;
@@ -108,11 +127,22 @@ i915_validate_state(struct i915_context *i915)
    return TRUE;
 }
 
+static void
+emit_state(struct i915_context *i915)
+{
+   int i;
+
+   for (i = 0; i < Elements(hw_atoms); i++)
+      if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].emit)
+	 hw_atoms[i].emit(i915);
+}
+
 /* Push the state into the sarea and/or texture memory.
  */
 void
 i915_emit_hardware_state(struct i915_context *i915 )
 {
+   unsigned batch_space;
    /* XXX: there must be an easier way */
    const unsigned dwords = ( 14 + 
                              7 + 
@@ -138,20 +168,21 @@ i915_emit_hardware_state(struct i915_context *i915 )
    if (I915_DBG_ON(DBG_ATOMS))
       i915_dump_hardware_dirty(i915, __FUNCTION__);
 
-   if (!i915_validate_state(i915)) {
+   if (!i915_validate_state(i915, &batch_space)) {
       FLUSH_BATCH(NULL);
-      assert(i915_validate_state(i915));
+      assert(i915_validate_state(i915, &batch_space));
    }
 
-   if(!BEGIN_BATCH(dwords, relocs)) {
+   if(!BEGIN_BATCH(batch_space + dwords, relocs)) {
       FLUSH_BATCH(NULL);
-      assert(i915_validate_state(i915));
-      assert(BEGIN_BATCH(dwords, relocs));
+      assert(i915_validate_state(i915, &batch_space));
+      assert(BEGIN_BATCH(batch_space + dwords, relocs));
    }
 
    save_ptr = (uintptr_t)i915->batch->ptr;
    save_relocs = i915->batch->relocs;
 
+   emit_state(i915);
    /* 14 dwords, 0 relocs */
    if (i915->hardware_dirty & I915_HW_INVARIANT)
    {
diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c
index fd10cbc3786..97044499990 100644
--- a/src/gallium/drivers/i915/i915_state_static.c
+++ b/src/gallium/drivers/i915/i915_state_static.c
@@ -169,6 +169,9 @@ static void update_framebuffer(struct i915_context *i915)
    i915->current.draw_size = (w - 1 + x) | ((h - 1 + y) << 16);
 
    i915->hardware_dirty |= I915_HW_STATIC;
+
+   /* flush the cache in case we sample from the old renderbuffers */
+   i915_set_flush_dirty(i915, I915_FLUSH_CACHE);
 }
 
 struct i915_tracked_state i915_hw_framebuffer = {
-- 
cgit v1.2.3


From b6d40213935da702570eca2c0861bd4b1d7f5254 Mon Sep 17 00:00:00 2001
From: Fabian Bieler <der.fabe@gmx.net>
Date: Fri, 25 Feb 2011 10:11:37 +0100
Subject: r600g: Don't negate result of ABS instruction

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_shader.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 13ccc3fdc1f..cc4491c0f75 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1014,6 +1014,8 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
 			break;
 		case TGSI_OPCODE_ABS:
 			alu.src[0].abs = 1;
+			/* negation is performed after absolute value is taken */
+			alu.src[0].neg = 0;
 			break;
 		default:
 			break;
-- 
cgit v1.2.3


From 0ab7dcddb35560626c1aab4e8e6181dc4b4703a6 Mon Sep 17 00:00:00 2001
From: Fabian Bieler <der.fabe@gmx.net>
Date: Sun, 27 Feb 2011 16:10:55 +0100
Subject: r600g: Process TRUNC with tgis_op2

TRUNC is neither a scalar instruction nor exclusive to the Trans unit.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_shader.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index cc4491c0f75..e64d396dcca 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2876,7 +2876,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
+	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
 	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	/* gap */
 	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -3034,7 +3034,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
+	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
 	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	/* gap */
 	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-- 
cgit v1.2.3


From 0a17444133e74de7bc5d04d8ffc8f29b89f0cf58 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 28 Feb 2011 11:10:10 +1000
Subject: Revert "r600g: Don't negate result of ABS instruction"

This reverts commit b6d40213935da702570eca2c0861bd4b1d7f5254.

This actually breaks gears here on my rv670.
---
 src/gallium/drivers/r600/r600_shader.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index e64d396dcca..9fcb1d75f09 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1014,8 +1014,6 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
 			break;
 		case TGSI_OPCODE_ABS:
 			alu.src[0].abs = 1;
-			/* negation is performed after absolute value is taken */
-			alu.src[0].neg = 0;
 			break;
 		default:
 			break;
-- 
cgit v1.2.3


From bce4f9ac395986ee0acae2702ed73448333d81b8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 28 Feb 2011 01:54:36 +0100
Subject: st/mesa & v_bug_mgr: two small instanced drawing fixes

---
 src/gallium/auxiliary/util/u_vbuf_mgr.c | 5 ++++-
 src/mesa/state_tracker/st_draw.c        | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c
index dec8dd717e8..3cf8ee0831d 100644
--- a/src/gallium/auxiliary/util/u_vbuf_mgr.c
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c
@@ -531,7 +531,10 @@ static void u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr,
          unsigned first, size;
          boolean flushed;
 
-         if (vb->stride) {
+         if (mgr->ve->ve[i].instance_divisor) {
+            first = 0;
+            size = vb->buffer->width0;
+         } else if (vb->stride) {
             first = vb->stride * min_index;
             size = vb->stride * count;
          } else {
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 6530a06ade4..c99eafbadf3 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -579,6 +579,7 @@ st_validate_varrays(struct gl_context *ctx,
    if (is_interleaved_arrays(vp, vpv, arrays)) {
       setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements,
                                 max_index);
+
       num_vbuffers = 1;
       num_velements = vpv->num_inputs;
       if (num_velements == 0)
@@ -645,6 +646,7 @@ st_draw_vbo(struct gl_context *ctx,
       for (i = 0; i < nr_prims; i++) {
          min_index = MIN2(min_index, prims[i].start);
          max_index = MAX2(max_index, prims[i].start + prims[i].count - 1);
+         max_index = MAX2(max_index, prims[i].num_instances);
       }
    }
 
-- 
cgit v1.2.3


From 96bbc627f369c0100b950f81531b1fe9ef586c34 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 28 Feb 2011 02:00:01 +0100
Subject: r600g: implement instanced drawing support

---
 src/gallium/drivers/r600/eg_asm.c            |  26 +--
 src/gallium/drivers/r600/r600_asm.c          | 230 ++++++++++++++++++---------
 src/gallium/drivers/r600/r600_asm.h          |   3 +-
 src/gallium/drivers/r600/r600_pipe.c         |   2 +-
 src/gallium/drivers/r600/r600_shader.c       |  31 +++-
 src/gallium/drivers/r600/r600_state_common.c |   2 +-
 6 files changed, 191 insertions(+), 103 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 80c5de39750..8190df725df 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -94,31 +94,9 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	return 0;
 }
 
-void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+void eg_cf_vtx(struct r600_vertex_element *ve)
 {
-	struct r600_pipe_state *rstate;
-	unsigned i = 0;
-
-	if (count > 8) {
-		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
-		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-				S_SQ_CF_WORD1_BARRIER(1) |
-				S_SQ_CF_WORD1_COUNT(8 - 1);
-		bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
-		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-				S_SQ_CF_WORD1_BARRIER(1) |
-				S_SQ_CF_WORD1_COUNT(count - 8 - 1);
-	} else {
-		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
-		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-				S_SQ_CF_WORD1_BARRIER(1) |
-				S_SQ_CF_WORD1_COUNT(count - 1);
-	}
-	bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
-	bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
-			S_SQ_CF_WORD1_BARRIER(1);
-
-	rstate = &ve->rstate;
+	struct r600_pipe_state *rstate = &ve->rstate;
 	rstate->id = R600_PIPE_STATE_FETCH_SHADER;
 	rstate->nregs = 0;
 	r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index de796188fde..5d59356bf70 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -83,6 +83,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
 		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
 		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
 		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
 		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
 		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
 			return 1;
@@ -1374,7 +1375,8 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign
 				S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
 				S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
 				S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
-	bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
+	bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) |
+				S_SQ_VTX_WORD2_MEGA_FETCH(1);
 	bc->bytecode[id++] = 0;
 	return 0;
 }
@@ -1894,12 +1896,13 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z);
 			fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w);
 			fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields);
-			fprintf(stderr, "DATA_FORMAT:%d ", vtx->data_format);
-			fprintf(stderr, "NUM_FORMAT_ALL:%d ", vtx->num_format_all);
-			fprintf(stderr, "FORMAT_COMP_ALL:%d ", vtx->format_comp_all);
-			fprintf(stderr, "SRF_MODE_ALL:%d\n", vtx->srf_mode_all);
+			fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format);
+			fprintf(stderr, "NUM:%d ", vtx->num_format_all);
+			fprintf(stderr, "COMP:%d ", vtx->format_comp_all);
+			fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all);
 			id++;
-			fprintf(stderr, "%04d %08X   \n", id, bc->bytecode[id]);
+			fprintf(stderr, "%04d %08X   ", id, bc->bytecode[id]);
+			fprintf(stderr, "OFFSET:%d\n", vtx->offset);
 			//TODO
 			id++;
 			fprintf(stderr, "%04d %08X   \n", id, bc->bytecode[id]);
@@ -1910,29 +1913,9 @@ void r600_bc_dump(struct r600_bc *bc)
 	fprintf(stderr, "--------------------------------------\n");
 }
 
-static void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+static void r600_cf_vtx(struct r600_vertex_element *ve)
 {
 	struct r600_pipe_state *rstate;
-	unsigned i = 0;
-
-	if (count > 8) {
-		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
-		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-						S_SQ_CF_WORD1_BARRIER(1) |
-						S_SQ_CF_WORD1_COUNT(8 - 1);
-		bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
-		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-						S_SQ_CF_WORD1_BARRIER(1) |
-						S_SQ_CF_WORD1_COUNT(count - 8 - 1);
-	} else {
-		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
-		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-						S_SQ_CF_WORD1_BARRIER(1) |
-						S_SQ_CF_WORD1_COUNT(count - 1);
-	}
-	bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
-	bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
-			S_SQ_CF_WORD1_BARRIER(1);
 
 	rstate = &ve->rstate;
 	rstate->id = R600_PIPE_STATE_FETCH_SHADER;
@@ -2078,37 +2061,19 @@ out_unknown:
 
 int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
 {
-	unsigned ndw, i;
-	u32 *bytecode;
-	unsigned fetch_resource_start = 0, format, num_format, format_comp;
+	static int dump_shaders = -1;
+
+	struct r600_bc bc;
+	struct r600_bc_vtx vtx;
 	struct pipe_vertex_element *elements = ve->elements;
 	const struct util_format_description *desc;
-
-	/* 2 dwords for cf aligned to 4 + 4 dwords per input */
-	ndw = 8 + ve->count * 4;
-	ve->fs_size = ndw * 4;
-
-	/* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
-	ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
-	if (ve->fetch_shader == NULL) {
-		return -ENOMEM;
-	}
-
-	bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
-	if (bytecode == NULL) {
-		r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
-		return -ENOMEM;
-	}
-
-	if (rctx->family >= CHIP_CEDAR) {
-		eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
-	} else {
-		r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
-		fetch_resource_start = 160;
-	}
+	unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160;
+	unsigned format, num_format, format_comp;
+	u32 *bytecode;
+        int i, r;
 
 	/* vertex elements offset need special handling, if offset is bigger
-	 * than what we can put in fetch instruction then we need to alterate
+	+ * than what we can put in fetch instruction then we need to alterate
 	 * the vertex resource offset. In such case in order to simplify code
 	 * we will bound one resource per elements. It's a worst case scenario.
 	 */
@@ -2119,40 +2084,155 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 		}
 	}
 
+	memset(&bc, 0, sizeof(bc));
+	r = r600_bc_init(&bc, r600_get_family(rctx->radeon));
+	if (r)
+		return r;
+
+	for (i = 0; i < ve->count; i++) {
+	        if (elements[i].instance_divisor > 1) {
+			struct r600_bc_alu alu;
+
+			memset(&alu, 0, sizeof(alu));
+                        alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+                        alu.src[0].sel = 0;
+                        alu.src[0].chan = 3;
+
+			alu.dst.sel = i + 1;
+			alu.dst.chan = 3;
+			alu.dst.write = 1;
+			alu.last = 1;
+
+                        if ((r = r600_bc_add_alu(&bc, &alu))) {
+				r600_bc_clear(&bc);
+                                return r;
+                        }
+
+			memset(&alu, 0, sizeof(alu));
+			alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
+			alu.src[0].sel = i + 1;
+			alu.src[0].chan = 3;
+
+			alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+			alu.src[1].value = fui(1.0f / (float)elements[i].instance_divisor);
+
+			alu.dst.sel = i + 1;
+			alu.dst.chan = 3;
+			alu.dst.write = 1;
+			alu.last = 1;
+
+                        if ((r = r600_bc_add_alu(&bc, &alu))) {
+				r600_bc_clear(&bc);
+                                return r;
+                        }
+
+			memset(&alu, 0, sizeof(alu));
+			alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
+			alu.src[0].sel = i + 1;
+			alu.src[0].chan = 3;
+
+			alu.dst.sel = i + 1;
+			alu.dst.chan = 3;
+			alu.dst.write = 1;
+			alu.last = 1;
+
+                        if ((r = r600_bc_add_alu(&bc, &alu))) {
+				r600_bc_clear(&bc);
+                                return r;
+                        }
+
+			memset(&alu, 0, sizeof(alu));
+                        alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT);
+                        alu.src[0].sel = i + 1;
+                        alu.src[0].chan = 3;
+
+			alu.dst.sel = i + 1;
+			alu.dst.chan = 3;
+			alu.dst.write = 1;
+			alu.last = 1;
+
+                        if ((r = r600_bc_add_alu(&bc, &alu))) {
+				r600_bc_clear(&bc);
+                                return r;
+                        }
+	        }
+	}
+
 	for (i = 0; i < ve->count; i++) {
 		unsigned vbuffer_index;
 		r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp);
 		desc = util_format_description(ve->elements[i].src_format);
 		if (desc == NULL) {
+			r600_bc_clear(&bc);
 			R600_ERR("unknown format %d\n", ve->elements[i].src_format);
-			r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
 			return -EINVAL;
 		}
 
 		/* see above for vbuffer_need_offset explanation */
 		vbuffer_index = elements[i].vertex_buffer_index;
-		if (ve->vbuffer_need_offset) {
-			bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start);
-		} else {
-			bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start);
+		memset(&vtx, 0, sizeof(vtx));
+		vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) + fetch_resource_start;
+		vtx.fetch_type = elements[i].instance_divisor ? 1 : 0;
+		vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
+		vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
+		vtx.mega_fetch_count = 16;
+		vtx.dst_gpr = i + 1;
+		vtx.dst_sel_x = desc->swizzle[0];
+		vtx.dst_sel_y = desc->swizzle[1];
+		vtx.dst_sel_z = desc->swizzle[2];
+		vtx.dst_sel_w = desc->swizzle[3];
+		vtx.data_format = format;
+		vtx.num_format_all = num_format;
+		vtx.format_comp_all = format_comp;
+		vtx.srf_mode_all = 1;
+		vtx.offset = elements[i].src_offset;
+
+		if ((r = r600_bc_add_vtx(&bc, &vtx))) {
+			r600_bc_clear(&bc);
+			return r;
 		}
-		bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) |
-					S_SQ_VTX_WORD0_SRC_SEL_X(0) |
-					S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
-		bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) |
-					S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) |
-					S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) |
-					S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) |
-					S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) |
-					S_SQ_VTX_WORD1_DATA_FORMAT(format) |
-					S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) |
-					S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) |
-					S_SQ_VTX_WORD1_SRF_MODE_ALL(1) |
-					S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1);
-		bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) |
-					S_SQ_VTX_WORD2_MEGA_FETCH(1);
-		bytecode[8 + i * 4 + 3] = 0;
 	}
+
+	r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
+
+	/* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
+	ve->fetch_shader = r600_bo(rctx->radeon, bc.ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
+	if (ve->fetch_shader == NULL) {
+		r600_bc_clear(&bc);
+		return -ENOMEM;
+	}
+
+        ve->fs_size = bc.ndw*4;
+	if ((r = r600_bc_build(&bc))) {
+		r600_bc_clear(&bc);
+		return r;
+	}
+
+        if (dump_shaders == -1)
+                dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+
+	if (dump_shaders) {
+		fprintf(stderr, "--------------------------------------------------------------\n");
+		r600_bc_dump(&bc);
+		fprintf(stderr, "______________________________________________________________\n");
+	}
+
+	bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
+	if (bytecode == NULL) {
+		r600_bc_clear(&bc);
+		r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+		return -ENOMEM;
+	}
+
+	memcpy(bytecode, bc.bytecode, ve->fs_size);
+
 	r600_bo_unmap(rctx->radeon, ve->fetch_shader);
+	r600_bc_clear(&bc);
+
+	if (rctx->family >= CHIP_CEDAR)
+		eg_cf_vtx(ve);
+	else
+		r600_cf_vtx(ve);
+
 	return 0;
 }
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 921d0d98454..b22c21d1e23 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -103,6 +103,7 @@ struct r600_bc_vtx {
 	unsigned			num_format_all;
 	unsigned			format_comp_all;
 	unsigned			srf_mode_all;
+	unsigned			offset;
 };
 
 struct r600_bc_output {
@@ -189,7 +190,7 @@ struct r600_bc {
 
 /* eg_asm.c */
 int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
-void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
+void eg_cf_vtx(struct r600_vertex_element *ve);
 
 /* r600_asm.c */
 int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 62d108f3518..adcd74aec76 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -285,13 +285,13 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
 	case PIPE_CAP_DEPTH_CLAMP:
 	case PIPE_CAP_SHADER_STENCIL_EXPORT:
+	case PIPE_CAP_INSTANCED_DRAWING:
 		return 1;
 
 	/* Unsupported features (boolean caps). */
 	case PIPE_CAP_STREAM_OUTPUT:
 	case PIPE_CAP_PRIMITIVE_RESTART:
 	case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */
-	case PIPE_CAP_INSTANCED_DRAWING:
 		return 0;
 
 	case PIPE_CAP_ARRAY_TEXTURES:
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 9fcb1d75f09..65923fb9648 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -420,6 +420,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
 	unsigned i;
+	int r;
 
 	switch (d->Declaration.File) {
 	case TGSI_FILE_INPUT:
@@ -451,6 +452,26 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 	case TGSI_FILE_SAMPLER:
 	case TGSI_FILE_ADDRESS:
 		break;
+
+        case TGSI_FILE_SYSTEM_VALUE:
+                if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+                        struct r600_bc_alu alu;
+                        memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+                        alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+                        alu.src[0].sel = 0;
+                        alu.src[0].chan = 3;
+
+			alu.dst.sel = 0;
+			alu.dst.chan = 3;
+			alu.dst.write = 1;
+                        alu.last = 1;
+
+                        if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+                                return r;
+                        break;
+                }
+
 	default:
 		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
 		return -EINVAL;
@@ -521,6 +542,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
 	r600_src->neg = tgsi_src->Register.Negate;
 	r600_src->abs = tgsi_src->Register.Absolute;
+
 	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
 		int index;
 		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
@@ -535,7 +557,14 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 		index = tgsi_src->Register.Index;
 		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
 		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
-	} else {
+	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
+                /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
+                r600_src->swizzle[0] = 3;
+                r600_src->swizzle[1] = 3;
+                r600_src->swizzle[2] = 3;
+                r600_src->swizzle[3] = 3;
+                r600_src->sel = 0;
+        } else {
 		if (tgsi_src->Register.Indirect)
 			r600_src->rel = V_SQ_REL_RELATIVE;
 		r600_src->sel = tgsi_src->Register.Index;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 72707fbd8b8..677e2209340 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -520,7 +520,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	r600_context_pipe_state_set(&rctx->ctx, &vgt);
 
 	rdraw.vgt_num_indices = draw.info.count;
-	rdraw.vgt_num_instances = 1;
+	rdraw.vgt_num_instances = draw.info.instance_count;
 	rdraw.vgt_index_type = vgt_dma_index_type;
 	rdraw.vgt_draw_initiator = vgt_draw_initiator;
 	rdraw.indices = NULL;
-- 
cgit v1.2.3


From 0495425dc3d9f1c12e30df3f8f7f450687d13d6b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sun, 27 Feb 2011 17:00:50 +1000
Subject: r300g: force swizzles for RGTC

still can't get signed to work
---
 src/gallium/drivers/r300/r300_texture.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 2db5ab9a287..b97c45ac198 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -217,10 +217,15 @@ uint32_t r300_translate_texformat(enum pipe_format format,
             case PIPE_FORMAT_RGTC1_SNORM:
                 result |= sign_bit[0];
             case PIPE_FORMAT_RGTC1_UNORM:
+                result &= ~(0xfff << 9); /* mask off swizzle */
+                result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT;
                 return R500_TX_FORMAT_ATI1N | result;
             case PIPE_FORMAT_RGTC2_SNORM:
                 result |= sign_bit[0] | sign_bit[1];
             case PIPE_FORMAT_RGTC2_UNORM:
+                result &= ~(0xfff << 9); /* mask off swizzle */
+                result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT |
+                          R300_TX_FORMAT_X << R300_TX_FORMAT_G_SHIFT;
                 return R400_TX_FORMAT_ATI2N | result;
             default:
                 return ~0; /* Unsupported/unknown. */
-- 
cgit v1.2.3


From e3709c26a643604053561729ba26cb03e3a772e3 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 28 Feb 2011 13:33:17 +1000
Subject: rgtc: llvmpipe/softpipe refuse RGTC until u_format has support.

So far I haven't implemented the u_format code for these.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/llvmpipe/lp_screen.c | 5 +++++
 src/gallium/drivers/softpipe/sp_screen.c | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 21e8012d46a..2c32aa93cdf 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -278,6 +278,11 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
       return util_format_s3tc_enabled;
    }
 
+   /* u_format doesn't support RGTC yet */
+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+      return FALSE;
+   }
+
    /*
     * Everything else should be supported by u_format.
     */
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 6d47fb96280..685fbfc3087 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -248,6 +248,11 @@ softpipe_is_format_supported( struct pipe_screen *screen,
       return util_format_s3tc_enabled;
    }
 
+   /* u_format doesn't implement RGTC yet */
+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+	return FALSE;
+   }
+
    /*
     * Everything else should be supported by u_format.
     */
-- 
cgit v1.2.3


From e792e79f5ae6be008d9521eccf1c647492cd682a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 25 Feb 2011 09:12:40 +1000
Subject: mesa: make_float_temp_image non-static

We need this to do signed stuff for RGTC.
---
 src/mesa/main/texstore.c | 46 +++++++++++++++++++++++-----------------------
 src/mesa/main/texstore.h |  9 +++++++++
 2 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 7dd4a1fa650..e8d8964e28a 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -310,15 +310,15 @@ compute_component_mapping(GLenum inFormat, GLenum outFormat,
  * \param srcPacking  source image pixel packing
  * \return resulting image with format = textureBaseFormat and type = GLfloat.
  */
-static GLfloat *
-make_temp_float_image(struct gl_context *ctx, GLuint dims,
-                      GLenum logicalBaseFormat,
-                      GLenum textureBaseFormat,
-                      GLint srcWidth, GLint srcHeight, GLint srcDepth,
-                      GLenum srcFormat, GLenum srcType,
-                      const GLvoid *srcAddr,
-                      const struct gl_pixelstore_attrib *srcPacking,
-                      GLbitfield transferOps)
+GLfloat *
+_mesa_make_temp_float_image(struct gl_context *ctx, GLuint dims,
+			    GLenum logicalBaseFormat,
+			    GLenum textureBaseFormat,
+			    GLint srcWidth, GLint srcHeight, GLint srcDepth,
+			    GLenum srcFormat, GLenum srcType,
+			    const GLvoid *srcAddr,
+			    const struct gl_pixelstore_attrib *srcPacking,
+			    GLbitfield transferOps)
 {
    GLfloat *tempImage;
    const GLint components = _mesa_components_in_format(logicalBaseFormat);
@@ -2065,7 +2065,7 @@ _mesa_texstore_argb2101010(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2317,7 +2317,7 @@ _mesa_texstore_unorm1616(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2394,7 +2394,7 @@ _mesa_texstore_unorm16(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2452,7 +2452,7 @@ _mesa_texstore_rgba_16(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2519,7 +2519,7 @@ _mesa_texstore_signed_rgba_16(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2901,7 +2901,7 @@ _mesa_texstore_signed_r8(TEXSTORE_PARAMS)
    /* XXX look at adding optimized paths */
    {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2946,7 +2946,7 @@ _mesa_texstore_signed_rg88(TEXSTORE_PARAMS)
    /* XXX look at adding optimized paths */
    {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2991,7 +2991,7 @@ _mesa_texstore_signed_rgbx8888(TEXSTORE_PARAMS)
 
    {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3104,7 +3104,7 @@ _mesa_texstore_signed_rgba8888(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3413,7 +3413,7 @@ _mesa_texstore_rgba_float32(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3483,7 +3483,7 @@ _mesa_texstore_rgba_float16(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3549,7 +3549,7 @@ _mesa_texstore_rgba_int8(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3614,7 +3614,7 @@ _mesa_texstore_rgba_int16(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3679,7 +3679,7 @@ _mesa_texstore_rgba_int32(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
diff --git a/src/mesa/main/texstore.h b/src/mesa/main/texstore.h
index 177ede423f5..2f3c4e821fc 100644
--- a/src/mesa/main/texstore.h
+++ b/src/mesa/main/texstore.h
@@ -81,6 +81,15 @@ _mesa_make_temp_chan_image(struct gl_context *ctx, GLuint dims,
                            const GLvoid *srcAddr,
                            const struct gl_pixelstore_attrib *srcPacking);
 
+GLfloat *
+_mesa_make_temp_float_image(struct gl_context *ctx, GLuint dims,
+			    GLenum logicalBaseFormat,
+			    GLenum textureBaseFormat,
+			    GLint srcWidth, GLint srcHeight, GLint srcDepth,
+			    GLenum srcFormat, GLenum srcType,
+			    const GLvoid *srcAddr,
+			    const struct gl_pixelstore_attrib *srcPacking,
+			    GLbitfield transferOps);
 
 extern void
 _mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level,
-- 
cgit v1.2.3


From 8d47c919859e9cd90cfc04eb220ac8a5aa87048e Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 22 Feb 2011 10:35:15 +1000
Subject: mesa: Add RGTC texture store/fetch support.

This adds support for the RGTC unsigned and signed
texture storage and fetch methods.

the code is a port of the DXT5 alpha compression code.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/main/formats.c          |   42 +-
 src/mesa/main/formats.h          |    6 +
 src/mesa/main/texcompress.c      |   20 +
 src/mesa/main/texcompress_rgtc.c | 1122 ++++++++++++++++++++++++++++++++++++++
 src/mesa/main/texcompress_rgtc.h |   60 ++
 src/mesa/main/texfetch.c         |   31 +-
 src/mesa/main/texstore.c         |    8 +-
 src/mesa/sources.mak             |    1 +
 8 files changed, 1287 insertions(+), 3 deletions(-)
 create mode 100644 src/mesa/main/texcompress_rgtc.c
 create mode 100644 src/mesa/main/texcompress_rgtc.h

diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index 1e395363475..947db84a69e 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -890,7 +890,43 @@ static struct gl_format_info format_info[MESA_FORMAT_COUNT] =
       16, 16, 16, 16,
       0, 0, 0, 0, 0,
       1, 1, 8
-   }
+   },
+   {
+     MESA_FORMAT_RED_RGTC1,
+     "MESA_FORMAT_RED_RGTC1",
+     GL_RED,
+     GL_UNSIGNED_NORMALIZED,
+     4, 0, 0, 0,
+     0, 0, 0, 0, 0,
+     4, 4, 8                     /* 8 bytes per 4x4 block */
+   },
+   {
+     MESA_FORMAT_SIGNED_RED_RGTC1,
+     "MESA_FORMAT_SIGNED_RED_RGTC1",
+     GL_RED,
+     GL_SIGNED_NORMALIZED,
+     4, 0, 0, 0,
+     0, 0, 0, 0, 0,
+     4, 4, 8                     /* 8 bytes per 4x4 block */
+   },
+   {
+     MESA_FORMAT_RG_RGTC2,
+     "MESA_FORMAT_RG_RGTC2",
+     GL_RG,
+     GL_UNSIGNED_NORMALIZED,
+     4, 4, 0, 0,
+     0, 0, 0, 0, 0,
+     4, 4, 16                     /* 16 bytes per 4x4 block */
+   },
+   {
+     MESA_FORMAT_SIGNED_RG_RGTC2,
+     "MESA_FORMAT_SIGNED_RG_RGTC2",
+     GL_RG,
+     GL_SIGNED_NORMALIZED,
+     4, 4, 0, 0,
+     0, 0, 0, 0, 0,
+     4, 4, 16                     /* 16 bytes per 4x4 block */
+   },
 };
 
 
@@ -1530,6 +1566,10 @@ _mesa_format_to_type_and_comps(gl_format format,
    case MESA_FORMAT_SRGBA_DXT5:
 #endif
 #endif
+   case MESA_FORMAT_RED_RGTC1:
+   case MESA_FORMAT_SIGNED_RED_RGTC1:
+   case MESA_FORMAT_RG_RGTC2:
+   case MESA_FORMAT_SIGNED_RG_RGTC2:
       /* XXX generate error instead? */
       *datatype = GL_UNSIGNED_BYTE;
       *comps = 0;
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
index 9a5cef37788..e21967e2b0c 100644
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -179,6 +179,12 @@ typedef enum
    MESA_FORMAT_RGBA_16,           /* ... */
    /*@}*/
 
+   /*@{*/
+   MESA_FORMAT_RED_RGTC1,
+   MESA_FORMAT_SIGNED_RED_RGTC1,
+   MESA_FORMAT_RG_RGTC2,
+   MESA_FORMAT_SIGNED_RG_RGTC2,
+   /*@}*/
    MESA_FORMAT_COUNT
 } gl_format;
 
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 7a0b522a2d8..82d02ed0ecf 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -64,6 +64,7 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a
          n += 2;
       }
    }
+   /* don't return RGTC - ARB_texture_compression_rgtc query 19 */
    if (ctx->Extensions.EXT_texture_compression_s3tc) {
       if (formats) {
          formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
@@ -163,6 +164,15 @@ _mesa_glenum_to_compressed_format(GLenum format)
    case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
       return MESA_FORMAT_SRGBA_DXT5;
 
+   case GL_COMPRESSED_RED_RGTC1:
+      return MESA_FORMAT_RED_RGTC1;
+   case GL_COMPRESSED_SIGNED_RED_RGTC1:
+      return MESA_FORMAT_SIGNED_RED_RGTC1;
+   case GL_COMPRESSED_RG_RGTC2:
+      return MESA_FORMAT_RG_RGTC2;
+   case GL_COMPRESSED_SIGNED_RG_RGTC2:
+      return MESA_FORMAT_SIGNED_RG_RGTC2;
+
    default:
       return MESA_FORMAT_NONE;
    }
@@ -209,6 +219,16 @@ _mesa_compressed_format_to_glenum(struct gl_context *ctx, GLuint mesaFormat)
       return GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT;
 #endif
 #endif
+
+   case MESA_FORMAT_RED_RGTC1:
+      return GL_COMPRESSED_RED_RGTC1;
+   case MESA_FORMAT_SIGNED_RED_RGTC1:
+      return GL_COMPRESSED_SIGNED_RED_RGTC1;
+   case MESA_FORMAT_RG_RGTC2:
+      return GL_COMPRESSED_RG_RGTC2;
+   case MESA_FORMAT_SIGNED_RG_RGTC2:
+      return GL_COMPRESSED_SIGNED_RG_RGTC2;
+
    default:
       _mesa_problem(ctx, "Unexpected mesa texture format in"
                     " _mesa_compressed_format_to_glenum()");
diff --git a/src/mesa/main/texcompress_rgtc.c b/src/mesa/main/texcompress_rgtc.c
new file mode 100644
index 00000000000..b7725f4a98f
--- /dev/null
+++ b/src/mesa/main/texcompress_rgtc.c
@@ -0,0 +1,1122 @@
+/*
+ * Copyright (C) 2011 Red Hat Inc.
+ * 
+ * block compression parts are:
+ * Copyright (C) 2004  Roland Scheidegger   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author:
+ *    Dave Airlie
+ */
+
+/**
+ * \file texcompress_rgtc.c
+ * GL_EXT_texture_compression_rgtc support.
+ */
+
+
+#include "glheader.h"
+#include "imports.h"
+#include "colormac.h"
+#include "image.h"
+#include "macros.h"
+#include "mfeatures.h"
+#include "mipmap.h"
+#include "texcompress.h"
+#include "texcompress_rgtc.h"
+#include "texstore.h"
+
+#define RGTC_DEBUG 0
+
+static void encode_rgtc_chan_u(GLubyte *blkaddr, GLubyte srccolors[4][4],
+			     GLint numxpixels, GLint numypixels);
+static void encode_rgtc_chan_s(GLbyte *blkaddr, GLbyte srccolors[4][4],
+			     GLint numxpixels, GLint numypixels);
+
+static void extractsrc_u( GLubyte srcpixels[4][4], const GLchan *srcaddr,
+			  GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
+{
+   GLubyte i, j;
+   const GLchan *curaddr;
+   for (j = 0; j < numypixels; j++) {
+      curaddr = srcaddr + j * srcRowStride * comps;
+      for (i = 0; i < numxpixels; i++) {
+	 srcpixels[j][i] = *curaddr / (CHAN_MAX / 255);
+	 curaddr += comps;
+      }
+   }
+}
+
+static void extractsrc_s( GLbyte srcpixels[4][4], const GLfloat *srcaddr,
+			  GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
+{
+   GLubyte i, j;
+   const GLfloat *curaddr;
+   for (j = 0; j < numypixels; j++) {
+      curaddr = srcaddr + j * srcRowStride * comps;
+      for (i = 0; i < numxpixels; i++) {
+	 srcpixels[j][i] = FLOAT_TO_BYTE_TEX(*curaddr);
+	 curaddr += comps;
+      }
+   }
+}
+
+
+GLboolean
+_mesa_texstore_red_rgtc1(TEXSTORE_PARAMS)
+{
+   GLubyte *dst;
+   const GLint texWidth = dstRowStride * 4 / 8; /* a bit of a hack */
+   const GLchan *tempImage = NULL;
+   int i, j;
+   int numxpixels, numypixels;
+   const void *srcaddr;
+   GLubyte srcpixels[4][4];
+   GLubyte *blkaddr;
+   GLint dstRowDiff;
+   ASSERT(dstFormat == MESA_FORMAT_RED_RGTC1);
+   ASSERT(dstXoffset % 4 == 0);
+   ASSERT(dstYoffset % 4 == 0);
+   ASSERT(dstZoffset % 4 == 0);
+   (void) dstZoffset;
+   (void) dstImageOffsets;
+
+
+   tempImage = _mesa_make_temp_chan_image(ctx, dims,
+					  baseInternalFormat,
+					  _mesa_get_format_base_format(dstFormat),
+					  srcWidth, srcHeight, srcDepth,
+					  srcFormat, srcType, srcAddr,
+					  srcPacking);
+   if (!tempImage)
+      return GL_FALSE; /* out of memory */
+
+   dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+                                        dstFormat,
+                                        texWidth, (GLubyte *) dstAddr);
+
+   blkaddr = dst;
+   dstRowDiff = dstRowStride >= (srcWidth * 4) ? dstRowStride - (((srcWidth + 3) & ~3) * 4) : 0;
+   for (j = 0; j < srcHeight; j+=4) {
+      if (srcHeight > j + 3) numypixels = 4;
+      else numypixels = srcHeight - j;
+      srcaddr = tempImage + j * srcWidth;
+      for (i = 0; i < srcWidth; i += 4) {
+	 if (srcWidth > i + 3) numxpixels = 4;
+	 else numxpixels = srcWidth - i;
+	 extractsrc_u(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 1);
+	 encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+	 srcaddr += numxpixels;
+	 blkaddr += 8;
+      }
+      blkaddr += dstRowDiff;
+   }
+   if (tempImage)
+      free((void *) tempImage);
+
+   return GL_TRUE;
+}
+
+GLboolean
+_mesa_texstore_signed_red_rgtc1(TEXSTORE_PARAMS)
+{
+   GLbyte *dst;
+   const GLint texWidth = dstRowStride * 4 / 8; /* a bit of a hack */
+   const GLfloat *tempImage = NULL;
+   int i, j;
+   int numxpixels, numypixels;
+   const GLfloat *srcaddr;
+   GLbyte srcpixels[4][4];
+   GLbyte *blkaddr;
+   GLint dstRowDiff;
+   ASSERT(dstFormat == MESA_FORMAT_SIGNED_RED_RGTC1);
+   ASSERT(dstXoffset % 4 == 0);
+   ASSERT(dstYoffset % 4 == 0);
+   ASSERT(dstZoffset % 4 == 0);
+   (void) dstZoffset;
+   (void) dstImageOffsets;
+
+   tempImage = _mesa_make_temp_float_image(ctx, dims,
+					   baseInternalFormat,
+					   _mesa_get_format_base_format(dstFormat),
+					   srcWidth, srcHeight, srcDepth,
+					   srcFormat, srcType, srcAddr,
+					   srcPacking, 0x0);
+   if (!tempImage)
+      return GL_FALSE; /* out of memory */
+
+   dst = (GLbyte *)_mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+						  dstFormat,
+						  texWidth, (GLubyte *) dstAddr);
+
+   blkaddr = dst;
+   dstRowDiff = dstRowStride >= (srcWidth * 4) ? dstRowStride - (((srcWidth + 3) & ~3) * 4) : 0;
+   for (j = 0; j < srcHeight; j+=4) {
+      if (srcHeight > j + 3) numypixels = 4;
+      else numypixels = srcHeight - j;
+      srcaddr = tempImage + j * srcWidth;
+      for (i = 0; i < srcWidth; i += 4) {
+	 if (srcWidth > i + 3) numxpixels = 4;
+	 else numxpixels = srcWidth - i;
+	 extractsrc_s(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 1);
+	 encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+	 srcaddr += numxpixels;
+	 blkaddr += 8;
+      }
+      blkaddr += dstRowDiff;
+   }
+   if (tempImage)
+      free((void *) tempImage);
+
+   return GL_TRUE;
+}
+
+GLboolean
+_mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS)
+{
+   GLubyte *dst;
+   const GLint texWidth = dstRowStride * 4 / 16; /* a bit of a hack */
+   const GLchan *tempImage = NULL;
+   int i, j;
+   int numxpixels, numypixels;
+   const void *srcaddr;
+   GLubyte srcpixels[4][4];
+   GLubyte *blkaddr;
+   GLint dstRowDiff;
+
+   ASSERT(dstFormat == MESA_FORMAT_RG_RGTC2);
+   ASSERT(dstXoffset % 4 == 0);
+   ASSERT(dstYoffset % 4 == 0);
+   ASSERT(dstZoffset % 4 == 0);
+   (void) dstZoffset;
+   (void) dstImageOffsets;
+
+   tempImage = _mesa_make_temp_chan_image(ctx, dims,
+					  baseInternalFormat,
+					  _mesa_get_format_base_format(dstFormat),
+					  srcWidth, srcHeight, srcDepth,
+					  srcFormat, srcType, srcAddr,
+					  srcPacking);
+   if (!tempImage)
+      return GL_FALSE; /* out of memory */
+
+   dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+                                        dstFormat,
+                                        texWidth, (GLubyte *) dstAddr);
+
+   blkaddr = dst;
+   dstRowDiff = dstRowStride >= (srcWidth * 8) ? dstRowStride - (((srcWidth + 7) & ~7) * 8) : 0;
+   for (j = 0; j < srcHeight; j+=4) {
+      if (srcHeight > j + 3) numypixels = 4;
+      else numypixels = srcHeight - j;
+      srcaddr = tempImage + j * srcWidth * 2;
+      for (i = 0; i < srcWidth; i += 4) {
+	 if (srcWidth > i + 3) numxpixels = 4;
+	 else numxpixels = srcWidth - i;
+	 extractsrc_u(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 2);
+	 encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+
+	 blkaddr += 8;
+	 extractsrc_u(srcpixels, (GLchan *)srcaddr + 1, srcWidth, numxpixels, numypixels, 2);
+	 encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+
+	 blkaddr += 8;
+
+	 srcaddr += numxpixels * 2;
+      }
+      blkaddr += dstRowDiff;
+   }
+   if (tempImage)
+      free((void *) tempImage);
+
+   return GL_TRUE;
+}
+
+GLboolean
+_mesa_texstore_signed_rg_rgtc2(TEXSTORE_PARAMS)
+{
+   GLbyte *dst;
+   const GLint texWidth = dstRowStride * 4 / 16; /* a bit of a hack */
+   const GLfloat *tempImage = NULL;
+   int i, j;
+   int numxpixels, numypixels;
+   const GLfloat *srcaddr;
+   GLbyte srcpixels[4][4];
+   GLbyte *blkaddr;
+   GLint dstRowDiff;
+
+   ASSERT(dstFormat == MESA_FORMAT_SIGNED_RG_RGTC2);
+   ASSERT(dstXoffset % 4 == 0);
+   ASSERT(dstYoffset % 4 == 0);
+   ASSERT(dstZoffset % 4 == 0);
+   (void) dstZoffset;
+   (void) dstImageOffsets;
+
+   tempImage = _mesa_make_temp_float_image(ctx, dims,
+					   baseInternalFormat,
+					   _mesa_get_format_base_format(dstFormat),
+					   srcWidth, srcHeight, srcDepth,
+					   srcFormat, srcType, srcAddr,
+					   srcPacking, 0x0);
+   if (!tempImage)
+      return GL_FALSE; /* out of memory */
+
+   dst = (GLbyte *)_mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+						  dstFormat,
+						  texWidth, (GLubyte *) dstAddr);
+
+   blkaddr = dst;
+   dstRowDiff = dstRowStride >= (srcWidth * 8) ? dstRowStride - (((srcWidth + 7) & ~7) * 8) : 0;
+   for (j = 0; j < srcHeight; j += 4) {
+      if (srcHeight > j + 3) numypixels = 4;
+      else numypixels = srcHeight - j;
+      srcaddr = tempImage + j * srcWidth * 2;
+      for (i = 0; i < srcWidth; i += 4) {
+	 if (srcWidth > i + 3) numxpixels = 4;
+	 else numxpixels = srcWidth - i;
+
+	 extractsrc_s(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 2);
+	 encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+	 blkaddr += 8;
+
+	 extractsrc_s(srcpixels, srcaddr + 1, srcWidth, numxpixels, numypixels, 2);
+	 encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+	 blkaddr += 8;
+
+	 srcaddr += numxpixels * 2;
+
+      }
+      blkaddr += dstRowDiff;
+   }
+   if (tempImage)
+      free((void *) tempImage);
+
+   return GL_TRUE;
+}
+
+static void _fetch_texel_rgtc_u(GLint srcRowStride, const GLubyte *pixdata,
+				GLint i, GLint j, GLchan *value, int comps)
+{
+   GLchan decode;
+   const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8 * comps);
+   const GLubyte alpha0 = blksrc[0];
+   const GLubyte alpha1 = blksrc[1];
+   const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
+   const GLubyte acodelow = blksrc[2 + bit_pos / 8];
+   const GLubyte acodehigh = blksrc[3 + bit_pos / 8];
+   const GLubyte code = (acodelow >> (bit_pos & 0x7) |
+      (acodehigh  << (8 - (bit_pos & 0x7)))) & 0x7;
+
+   if (code == 0)
+      decode = UBYTE_TO_CHAN( alpha0 );
+   else if (code == 1)
+      decode = UBYTE_TO_CHAN( alpha1 );
+   else if (alpha0 > alpha1)
+      decode = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
+   else if (code < 6)
+      decode = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
+   else if (code == 6)
+      decode = 0;
+   else
+      decode = CHAN_MAX;
+
+   *value = decode;
+}
+
+
+static void _fetch_texel_rgtc_s(GLint srcRowStride, const GLbyte *pixdata,
+				GLint i, GLint j, GLbyte *value, int comps)
+{
+   GLbyte decode;
+   const GLbyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8 * comps);
+   const GLbyte alpha0 = blksrc[0];
+   const GLbyte alpha1 = blksrc[1];
+   const GLbyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
+   const GLbyte acodelow = blksrc[2 + bit_pos / 8];
+   const GLbyte acodehigh = blksrc[3 + bit_pos / 8];
+   const GLbyte code = (acodelow >> (bit_pos & 0x7) |
+      (acodehigh  << (8 - (bit_pos & 0x7)))) & 0x7;
+
+   if (code == 0)
+      decode = alpha0;
+   else if (code == 1)
+      decode = alpha1;
+   else if (alpha0 > alpha1)
+      decode = ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7);
+   else if (code < 6)
+      decode = ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5);
+   else if (code == 6)
+      decode = -128;
+   else
+      decode = 127;
+
+   *value = decode;
+}
+
+void
+_mesa_fetch_texel_2d_f_red_rgtc1(const struct gl_texture_image *texImage,
+				 GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLchan red;
+   _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data),
+		       i, j, &red, 1);
+   texel[RCOMP] = CHAN_TO_FLOAT(red);
+   texel[GCOMP] = 0.0;
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+void
+_mesa_fetch_texel_2d_f_signed_red_rgtc1(const struct gl_texture_image *texImage,
+					GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLbyte red;
+   _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data),
+		       i, j, &red, 1);
+   texel[RCOMP] = BYTE_TO_FLOAT_TEX(red);
+   texel[GCOMP] = 0.0;
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+void
+_mesa_fetch_texel_2d_f_rg_rgtc2(const struct gl_texture_image *texImage,
+				 GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLchan red, green;
+   _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data),
+		     i, j, &red, 2);
+   _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data) + 8,
+		     i, j, &green, 2);
+   texel[RCOMP] = CHAN_TO_FLOAT(red);
+   texel[GCOMP] = CHAN_TO_FLOAT(green);
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+void
+_mesa_fetch_texel_2d_f_signed_rg_rgtc2(const struct gl_texture_image *texImage,
+				       GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLbyte red, green;
+   _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data),
+		     i, j, &red, 2);
+   _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data) + 8,
+		     i, j, &green, 2);
+   texel[RCOMP] = BYTE_TO_FLOAT_TEX(red);
+   texel[GCOMP] = BYTE_TO_FLOAT_TEX(green);
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+static void write_rgtc_encoded_channel(GLubyte *blkaddr,
+				       GLubyte alphabase1,
+				       GLubyte alphabase2,
+				       GLubyte alphaenc[16])
+{
+   *blkaddr++ = alphabase1;
+   *blkaddr++ = alphabase2;
+   *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
+   *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
+   *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
+   *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
+   *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
+   *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
+}
+
+static void encode_rgtc_chan_u(GLubyte *blkaddr, GLubyte srccolors[4][4],
+			     GLint numxpixels, GLint numypixels)
+{
+   GLubyte alphabase[2], alphause[2];
+   GLshort alphatest[2] = { 0 };
+   GLuint alphablockerror1, alphablockerror2, alphablockerror3;
+   GLubyte i, j, aindex, acutValues[7];
+   GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
+   GLboolean alphaabsmin = GL_FALSE;
+   GLboolean alphaabsmax = GL_FALSE;
+   GLshort alphadist;
+
+   /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
+   alphabase[0] = 0xff; alphabase[1] = 0x0;
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+         if (srccolors[j][i] == 0)
+            alphaabsmin = GL_TRUE;
+         else if (srccolors[j][i] == 255)
+            alphaabsmax = GL_TRUE;
+         else {
+            if (srccolors[j][i] > alphabase[1])
+               alphabase[1] = srccolors[j][i];
+            if (srccolors[j][i] < alphabase[0])
+               alphabase[0] = srccolors[j][i];
+         }
+      }
+   }
+
+
+   if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
+      /* shortcut here since it is a very common case (and also avoids later problems) */
+      /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
+      /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
+
+      *blkaddr++ = srccolors[0][0];
+      blkaddr++;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+#if RGTC_DEBUG
+      fprintf(stderr, "enc0 used\n");
+#endif
+      return;
+   }
+
+   /* find best encoding for alpha0 > alpha1 */
+   /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
+   alphablockerror1 = 0x0;
+   alphablockerror2 = 0xffffffff;
+   alphablockerror3 = 0xffffffff;
+   if (alphaabsmin) alphause[0] = 0;
+   else alphause[0] = alphabase[0];
+   if (alphaabsmax) alphause[1] = 255;
+   else alphause[1] = alphabase[1];
+   /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
+   for (aindex = 0; aindex < 7; aindex++) {
+      /* don't forget here is always rounded down */
+      acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
+   }
+
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+         /* maybe it's overkill to have the most complicated calculation just for the error
+            calculation which we only need to figure out if encoding1 or encoding2 is better... */
+         if (srccolors[j][i] > acutValues[0]) {
+            alphaenc1[4*j + i] = 0;
+            alphadist = srccolors[j][i] - alphause[1];
+         }
+         else if (srccolors[j][i] > acutValues[1]) {
+            alphaenc1[4*j + i] = 2;
+            alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[2]) {
+            alphaenc1[4*j + i] = 3;
+            alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[3]) {
+            alphaenc1[4*j + i] = 4;
+            alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[4]) {
+            alphaenc1[4*j + i] = 5;
+            alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[5]) {
+            alphaenc1[4*j + i] = 6;
+            alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[6]) {
+            alphaenc1[4*j + i] = 7;
+            alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7;
+         }
+         else {
+            alphaenc1[4*j + i] = 1;
+            alphadist = srccolors[j][i] - alphause[0];
+         }
+         alphablockerror1 += alphadist * alphadist;
+      }
+   }
+
+#if RGTC_DEBUG
+   for (i = 0; i < 16; i++) {
+      fprintf(stderr, "%d ", alphaenc1[i]);
+   }
+   fprintf(stderr, "cutVals ");
+   for (i = 0; i < 8; i++) {
+      fprintf(stderr, "%d ", acutValues[i]);
+   }
+   fprintf(stderr, "srcVals ");
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+	 fprintf(stderr, "%d ", srccolors[j][i]);
+      }
+   }
+   fprintf(stderr, "\n");
+#endif
+
+   /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
+      are false but try it anyway */
+   if (alphablockerror1 >= 32) {
+
+      /* don't bother if encoding is already very good, this condition should also imply
+      we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
+      alphablockerror2 = 0;
+      for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+         acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
+      }
+      for (j = 0; j < numypixels; j++) {
+         for (i = 0; i < numxpixels; i++) {
+             /* maybe it's overkill to have the most complicated calculation just for the error
+               calculation which we only need to figure out if encoding1 or encoding2 is better... */
+            if (srccolors[j][i] == 0) {
+               alphaenc2[4*j + i] = 6;
+               alphadist = 0;
+            }
+            else if (srccolors[j][i] == 255) {
+               alphaenc2[4*j + i] = 7;
+               alphadist = 0;
+            }
+            else if (srccolors[j][i] <= acutValues[0]) {
+               alphaenc2[4*j + i] = 0;
+               alphadist = srccolors[j][i] - alphabase[0];
+            }
+            else if (srccolors[j][i] <= acutValues[1]) {
+               alphaenc2[4*j + i] = 2;
+               alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[2]) {
+               alphaenc2[4*j + i] = 3;
+               alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[3]) {
+               alphaenc2[4*j + i] = 4;
+               alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[4]) {
+               alphaenc2[4*j + i] = 5;
+               alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
+            }
+            else {
+               alphaenc2[4*j + i] = 1;
+               alphadist = srccolors[j][i] - alphabase[1];
+            }
+            alphablockerror2 += alphadist * alphadist;
+         }
+      }
+
+
+      /* skip this if the error is already very small
+         this encoding is MUCH better on average than #2 though, but expensive! */
+      if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
+         GLshort blockerrlin1 = 0;
+         GLshort blockerrlin2 = 0;
+         GLubyte nralphainrangelow = 0;
+         GLubyte nralphainrangehigh = 0;
+         alphatest[0] = 0xff;
+         alphatest[1] = 0x0;
+         /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+               if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (255 -(alphabase[1] - alphabase[0]) / 28)))
+                  alphatest[1] = srccolors[j][i];
+               if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28))
+                  alphatest[0] = srccolors[j][i];
+            }
+         }
+          /* shouldn't happen too often, don't really care about those degenerated cases */
+          if (alphatest[1] <= alphatest[0]) {
+             alphatest[0] = 1;
+             alphatest[1] = 254;
+         }
+         for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+         }
+
+         /* find the "average" difference between the alpha values and the next encoded value.
+            This is then used to calculate new base values.
+            Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
+            since they will see more improvement, and also because the values in the middle are somewhat
+            likely to get no improvement at all (because the base values might move in different directions)?
+            OTOH it would mean the values in the middle are even less likely to get an improvement
+         */
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+               if (srccolors[j][i] <= alphatest[0] / 2) {
+               }
+               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+               }
+               else if (srccolors[j][i] <= acutValues[0]) {
+                  blockerrlin1 += (srccolors[j][i] - alphatest[0]);
+                  nralphainrangelow += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[1]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[2]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[3]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[4]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+                  }
+               else {
+                  blockerrlin2 += (srccolors[j][i] - alphatest[1]);
+                  nralphainrangehigh += 1;
+               }
+            }
+         }
+         /* shouldn't happen often, needed to avoid div by zero */
+         if (nralphainrangelow == 0) nralphainrangelow = 1;
+         if (nralphainrangehigh == 0) nralphainrangehigh = 1;
+         alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
+#if RGTC_DEBUG
+         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
+         fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);
+#endif
+         /* again shouldn't really happen often... */
+         if (alphatest[0] < 0) {
+            alphatest[0] = 0;
+         }
+         alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
+         if (alphatest[1] > 255) {
+            alphatest[1] = 255;
+         }
+
+         alphablockerror3 = 0;
+         for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+         }
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+                /* maybe it's overkill to have the most complicated calculation just for the error
+                  calculation which we only need to figure out if encoding1 or encoding2 is better... */
+               if (srccolors[j][i] <= alphatest[0] / 2) {
+                  alphaenc3[4*j + i] = 6;
+                  alphadist = srccolors[j][i];
+               }
+               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+                  alphaenc3[4*j + i] = 7;
+                  alphadist = 255 - srccolors[j][i];
+               }
+               else if (srccolors[j][i] <= acutValues[0]) {
+                  alphaenc3[4*j + i] = 0;
+                  alphadist = srccolors[j][i] - alphatest[0];
+               }
+               else if (srccolors[j][i] <= acutValues[1]) {
+                 alphaenc3[4*j + i] = 2;
+                 alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[2]) {
+                  alphaenc3[4*j + i] = 3;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[3]) {
+                  alphaenc3[4*j + i] = 4;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[4]) {
+                  alphaenc3[4*j + i] = 5;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
+               }
+               else {
+                  alphaenc3[4*j + i] = 1;
+                  alphadist = srccolors[j][i] - alphatest[1];
+               }
+               alphablockerror3 += alphadist * alphadist;
+            }
+         }
+      }
+   }
+  /* write the alpha values and encoding back. */
+   if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
+#if RGTC_DEBUG
+      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);
+#endif
+      write_rgtc_encoded_channel( blkaddr, alphause[1], alphause[0], alphaenc1 );
+   }
+   else if (alphablockerror2 <= alphablockerror3) {
+#if RGTC_DEBUG
+      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);
+#endif
+      write_rgtc_encoded_channel( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
+   }
+   else {
+#if RGTC_DEBUG
+      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);
+#endif
+      write_rgtc_encoded_channel( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
+   }
+}
+
+
+static void write_rgtc_encoded_channel_s(GLbyte *blkaddr,
+					 GLbyte alphabase1,
+					 GLbyte alphabase2,
+					 GLbyte alphaenc[16])
+{
+   *blkaddr++ = alphabase1;
+   *blkaddr++ = alphabase2;
+   *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
+   *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
+   *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
+   *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
+   *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
+   *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
+}
+
+static void encode_rgtc_chan_s(GLbyte *blkaddr, GLbyte srccolors[4][4],
+			       GLint numxpixels, GLint numypixels)
+{
+   GLbyte alphabase[2], alphause[2];
+   GLshort alphatest[2] = { 0 };
+   GLuint alphablockerror1, alphablockerror2, alphablockerror3;
+   GLbyte i, j, aindex, acutValues[7];
+   GLbyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
+   GLboolean alphaabsmin = GL_FALSE;
+   GLboolean alphaabsmax = GL_FALSE;
+   GLshort alphadist;
+
+   /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
+   alphabase[0] = 0xff; alphabase[1] = 0x0;
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+         if (srccolors[j][i] == 0)
+            alphaabsmin = GL_TRUE;
+         else if (srccolors[j][i] == 255)
+            alphaabsmax = GL_TRUE;
+         else {
+            if (srccolors[j][i] > alphabase[1])
+               alphabase[1] = srccolors[j][i];
+            if (srccolors[j][i] < alphabase[0])
+               alphabase[0] = srccolors[j][i];
+         }
+      }
+   }
+
+
+   if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
+      /* shortcut here since it is a very common case (and also avoids later problems) */
+      /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
+      /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
+
+      *blkaddr++ = srccolors[0][0];
+      blkaddr++;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+#if RGTC_DEBUG
+      fprintf(stderr, "enc0 used\n");
+#endif
+      return;
+   }
+
+   /* find best encoding for alpha0 > alpha1 */
+   /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
+   alphablockerror1 = 0x0;
+   alphablockerror2 = 0xffffffff;
+   alphablockerror3 = 0xffffffff;
+   if (alphaabsmin) alphause[0] = 0;
+   else alphause[0] = alphabase[0];
+   if (alphaabsmax) alphause[1] = 255;
+   else alphause[1] = alphabase[1];
+   /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
+   for (aindex = 0; aindex < 7; aindex++) {
+      /* don't forget here is always rounded down */
+      acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
+   }
+
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+         /* maybe it's overkill to have the most complicated calculation just for the error
+            calculation which we only need to figure out if encoding1 or encoding2 is better... */
+         if (srccolors[j][i] > acutValues[0]) {
+            alphaenc1[4*j + i] = 0;
+            alphadist = srccolors[j][i] - alphause[1];
+         }
+         else if (srccolors[j][i] > acutValues[1]) {
+            alphaenc1[4*j + i] = 2;
+            alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[2]) {
+            alphaenc1[4*j + i] = 3;
+            alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[3]) {
+            alphaenc1[4*j + i] = 4;
+            alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[4]) {
+            alphaenc1[4*j + i] = 5;
+            alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[5]) {
+            alphaenc1[4*j + i] = 6;
+            alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[6]) {
+            alphaenc1[4*j + i] = 7;
+            alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7;
+         }
+         else {
+            alphaenc1[4*j + i] = 1;
+            alphadist = srccolors[j][i] - alphause[0];
+         }
+         alphablockerror1 += alphadist * alphadist;
+      }
+   }
+#if RGTC_DEBUG
+   for (i = 0; i < 16; i++) {
+      fprintf(stderr, "%d ", alphaenc1[i]);
+   }
+   fprintf(stderr, "cutVals ");
+   for (i = 0; i < 8; i++) {
+      fprintf(stderr, "%d ", acutValues[i]);
+   }
+   fprintf(stderr, "srcVals ");
+   for (j = 0; j < numypixels; j++)
+      for (i = 0; i < numxpixels; i++) {
+	 fprintf(stderr, "%d ", srccolors[j][i]);
+      }
+   
+   fprintf(stderr, "\n");
+#endif
+
+   /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
+      are false but try it anyway */
+   if (alphablockerror1 >= 32) {
+
+      /* don't bother if encoding is already very good, this condition should also imply
+      we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
+      alphablockerror2 = 0;
+      for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+         acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
+      }
+      for (j = 0; j < numypixels; j++) {
+         for (i = 0; i < numxpixels; i++) {
+             /* maybe it's overkill to have the most complicated calculation just for the error
+               calculation which we only need to figure out if encoding1 or encoding2 is better... */
+            if (srccolors[j][i] == 0) {
+               alphaenc2[4*j + i] = 6;
+               alphadist = 0;
+            }
+            else if (srccolors[j][i] == 255) {
+               alphaenc2[4*j + i] = 7;
+               alphadist = 0;
+            }
+            else if (srccolors[j][i] <= acutValues[0]) {
+               alphaenc2[4*j + i] = 0;
+               alphadist = srccolors[j][i] - alphabase[0];
+            }
+            else if (srccolors[j][i] <= acutValues[1]) {
+               alphaenc2[4*j + i] = 2;
+               alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[2]) {
+               alphaenc2[4*j + i] = 3;
+               alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[3]) {
+               alphaenc2[4*j + i] = 4;
+               alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[4]) {
+               alphaenc2[4*j + i] = 5;
+               alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
+            }
+            else {
+               alphaenc2[4*j + i] = 1;
+               alphadist = srccolors[j][i] - alphabase[1];
+            }
+            alphablockerror2 += alphadist * alphadist;
+         }
+      }
+
+
+      /* skip this if the error is already very small
+         this encoding is MUCH better on average than #2 though, but expensive! */
+      if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
+         GLshort blockerrlin1 = 0;
+         GLshort blockerrlin2 = 0;
+         GLubyte nralphainrangelow = 0;
+         GLubyte nralphainrangehigh = 0;
+         alphatest[0] = 0xff;
+         alphatest[1] = 0x0;
+         /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+               if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (255 -(alphabase[1] - alphabase[0]) / 28)))
+                  alphatest[1] = srccolors[j][i];
+               if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28))
+                  alphatest[0] = srccolors[j][i];
+            }
+         }
+          /* shouldn't happen too often, don't really care about those degenerated cases */
+          if (alphatest[1] <= alphatest[0]) {
+             alphatest[0] = 1;
+             alphatest[1] = 254;
+         }
+         for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+         }
+
+         /* find the "average" difference between the alpha values and the next encoded value.
+            This is then used to calculate new base values.
+            Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
+            since they will see more improvement, and also because the values in the middle are somewhat
+            likely to get no improvement at all (because the base values might move in different directions)?
+            OTOH it would mean the values in the middle are even less likely to get an improvement
+         */
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+               if (srccolors[j][i] <= alphatest[0] / 2) {
+               }
+               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+               }
+               else if (srccolors[j][i] <= acutValues[0]) {
+                  blockerrlin1 += (srccolors[j][i] - alphatest[0]);
+                  nralphainrangelow += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[1]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[2]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[3]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[4]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+                  }
+               else {
+                  blockerrlin2 += (srccolors[j][i] - alphatest[1]);
+                  nralphainrangehigh += 1;
+               }
+            }
+         }
+         /* shouldn't happen often, needed to avoid div by zero */
+         if (nralphainrangelow == 0) nralphainrangelow = 1;
+         if (nralphainrangehigh == 0) nralphainrangehigh = 1;
+         alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
+#if RGTC_DEBUG
+	 fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
+         fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);
+#endif
+         /* again shouldn't really happen often... */
+         if (alphatest[0] < 0) {
+            alphatest[0] = 0;
+         }
+         alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
+         if (alphatest[1] > 255) {
+            alphatest[1] = 255;
+         }
+
+         alphablockerror3 = 0;
+         for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+         }
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+                /* maybe it's overkill to have the most complicated calculation just for the error
+                  calculation which we only need to figure out if encoding1 or encoding2 is better... */
+               if (srccolors[j][i] <= alphatest[0] / 2) {
+                  alphaenc3[4*j + i] = 6;
+                  alphadist = srccolors[j][i];
+               }
+               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+                  alphaenc3[4*j + i] = 7;
+                  alphadist = 255 - srccolors[j][i];
+               }
+               else if (srccolors[j][i] <= acutValues[0]) {
+                  alphaenc3[4*j + i] = 0;
+                  alphadist = srccolors[j][i] - alphatest[0];
+               }
+               else if (srccolors[j][i] <= acutValues[1]) {
+                 alphaenc3[4*j + i] = 2;
+                 alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[2]) {
+                  alphaenc3[4*j + i] = 3;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[3]) {
+                  alphaenc3[4*j + i] = 4;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[4]) {
+                  alphaenc3[4*j + i] = 5;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
+               }
+               else {
+                  alphaenc3[4*j + i] = 1;
+                  alphadist = srccolors[j][i] - alphatest[1];
+               }
+               alphablockerror3 += alphadist * alphadist;
+            }
+         }
+      }
+   }
+  /* write the alpha values and encoding back. */
+   if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
+#if RGTC_DEBUG
+      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);
+#endif
+      write_rgtc_encoded_channel_s( blkaddr, alphause[1], alphause[0], alphaenc1 );
+   }
+   else if (alphablockerror2 <= alphablockerror3) {
+#if RGTC_DEBUG
+      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);
+#endif
+      write_rgtc_encoded_channel_s( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
+   }
+   else {
+#if RGTC_DEBUG
+      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);
+#endif
+      write_rgtc_encoded_channel_s( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
+   }
+}
diff --git a/src/mesa/main/texcompress_rgtc.h b/src/mesa/main/texcompress_rgtc.h
new file mode 100644
index 00000000000..424edc4581c
--- /dev/null
+++ b/src/mesa/main/texcompress_rgtc.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TEXCOMPRESS_RGTC_H
+#define TEXCOMPRESS_RGTC_H
+
+#include "glheader.h"
+#include "mfeatures.h"
+#include "texstore.h"
+
+struct gl_texture_image;
+
+extern GLboolean
+_mesa_texstore_red_rgtc1(TEXSTORE_PARAMS);
+
+extern GLboolean
+_mesa_texstore_signed_red_rgtc1(TEXSTORE_PARAMS);
+
+extern GLboolean
+_mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS);
+
+extern GLboolean
+_mesa_texstore_signed_rg_rgtc2(TEXSTORE_PARAMS);
+
+extern void
+_mesa_fetch_texel_2d_f_red_rgtc1(const struct gl_texture_image *texImage,
+				 GLint i, GLint j, GLint k, GLfloat *texel);
+
+extern void
+_mesa_fetch_texel_2d_f_signed_red_rgtc1(const struct gl_texture_image *texImage,
+					GLint i, GLint j, GLint k, GLfloat *texel);
+
+extern void
+_mesa_fetch_texel_2d_f_rg_rgtc2(const struct gl_texture_image *texImage,
+				 GLint i, GLint j, GLint k, GLfloat *texel);
+
+extern void
+_mesa_fetch_texel_2d_f_signed_rg_rgtc2(const struct gl_texture_image *texImage,
+				       GLint i, GLint j, GLint k, GLfloat *texel);
+#endif
diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c
index 8aa1e4970d5..550597e1cdf 100644
--- a/src/mesa/main/texfetch.c
+++ b/src/mesa/main/texfetch.c
@@ -38,6 +38,7 @@
 #include "texcompress.h"
 #include "texcompress_fxt1.h"
 #include "texcompress_s3tc.h"
+#include "texcompress_rgtc.h"
 #include "texfetch.h"
 #include "teximage.h"
 
@@ -756,7 +757,35 @@ texfetch_funcs[MESA_FORMAT_COUNT] =
       fetch_texel_2d_rgba_16,
       fetch_texel_3d_rgba_16,
       store_texel_rgba_16
-   }
+   },
+   {
+      MESA_FORMAT_RED_RGTC1,
+      NULL,
+      _mesa_fetch_texel_2d_f_red_rgtc1,
+      NULL,
+      NULL
+   },
+   {
+      MESA_FORMAT_SIGNED_RED_RGTC1,
+      NULL,
+      _mesa_fetch_texel_2d_f_signed_red_rgtc1,
+      NULL,
+      NULL
+   },
+   {
+      MESA_FORMAT_RG_RGTC2,
+      NULL,
+      _mesa_fetch_texel_2d_f_rg_rgtc2,
+      NULL,
+      NULL
+   },
+   {
+      MESA_FORMAT_SIGNED_RG_RGTC2,
+      NULL,
+      _mesa_fetch_texel_2d_f_signed_rg_rgtc2,
+      NULL,
+      NULL
+   },
 };
 
 
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index e8d8964e28a..8a3e5f77979 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -65,6 +65,7 @@
 #include "pack.h"
 #include "texcompress.h"
 #include "texcompress_fxt1.h"
+#include "texcompress_rgtc.h"
 #include "texcompress_s3tc.h"
 #include "teximage.h"
 #include "texstore.h"
@@ -4128,7 +4129,12 @@ texstore_funcs[MESA_FORMAT_COUNT] =
    { MESA_FORMAT_SIGNED_RG_16, _mesa_texstore_signed_rgba_16 },
    { MESA_FORMAT_SIGNED_RGB_16, _mesa_texstore_signed_rgba_16 },
    { MESA_FORMAT_SIGNED_RGBA_16, _mesa_texstore_signed_rgba_16 },
-   { MESA_FORMAT_RGBA_16, _mesa_texstore_rgba_16 }
+   { MESA_FORMAT_RGBA_16, _mesa_texstore_rgba_16 },
+
+   { MESA_FORMAT_RED_RGTC1, _mesa_texstore_red_rgtc1 },
+   { MESA_FORMAT_SIGNED_RED_RGTC1, _mesa_texstore_signed_red_rgtc1 },
+   { MESA_FORMAT_RG_RGTC2, _mesa_texstore_rg_rgtc2 },
+   { MESA_FORMAT_SIGNED_RG_RGTC2, _mesa_texstore_signed_rg_rgtc2 }
 };
 
 
diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index 9a78a23aa7e..bdf4126cf58 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -78,6 +78,7 @@ MAIN_SOURCES = \
 	main/stencil.c \
 	main/syncobj.c \
 	main/texcompress.c \
+	main/texcompress_rgtc.c \
 	main/texcompress_s3tc.c \
 	main/texcompress_fxt1.c \
 	main/texenv.c \
-- 
cgit v1.2.3


From 903726d2859d6ce51e4bc96038be1830bd276f1a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 24 Feb 2011 14:27:18 +1000
Subject: swrast: add RGTC support

---
 src/mesa/main/extensions.c |  1 +
 src/mesa/main/texformat.c  | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 7504b8a85db..310740b0dd1 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -428,6 +428,7 @@ _mesa_enable_sw_extensions(struct gl_context *ctx)
    ctx->Extensions.ARB_texture_mirrored_repeat = GL_TRUE;
    ctx->Extensions.ARB_texture_non_power_of_two = GL_TRUE;
    ctx->Extensions.ARB_texture_rg = GL_TRUE;
+   ctx->Extensions.ARB_texture_compression_rgtc = GL_TRUE;
    ctx->Extensions.ARB_vertex_array_object = GL_TRUE;
 #if FEATURE_ARB_vertex_program
    ctx->Extensions.ARB_vertex_program = GL_TRUE;
diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
index 2542cea856b..72025cf828e 100644
--- a/src/mesa/main/texformat.c
+++ b/src/mesa/main/texformat.c
@@ -602,6 +602,25 @@ _mesa_choose_tex_format( struct gl_context *ctx, GLint internalFormat,
       }
    }
 
+   if (ctx->Extensions.ARB_texture_compression_rgtc) {
+      switch (internalFormat) {
+         case GL_COMPRESSED_RED_RGTC1:
+	    RETURN_IF_SUPPORTED(MESA_FORMAT_RED_RGTC1);
+	    break;
+         case GL_COMPRESSED_SIGNED_RED_RGTC1:
+	    RETURN_IF_SUPPORTED(MESA_FORMAT_SIGNED_RED_RGTC1);
+	    break;
+         case GL_COMPRESSED_RG_RGTC2:
+	    RETURN_IF_SUPPORTED(MESA_FORMAT_RG_RGTC2);
+	    break;
+         case GL_COMPRESSED_SIGNED_RG_RGTC2:
+	    RETURN_IF_SUPPORTED(MESA_FORMAT_SIGNED_RG_RGTC2);
+	    break;
+         default:
+            ; /* fallthrough */
+      }
+   }
+
    _mesa_problem(ctx, "unexpected format in _mesa_choose_tex_format()");
    return MESA_FORMAT_NONE;
 }
-- 
cgit v1.2.3


From 83ebc01c1dd1cd8a960a2a61fc7203f8312644d5 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 25 Feb 2011 09:24:15 +1000
Subject: mesa/st: add RGTC format support.

this just adds a format check + format conversion.
---
 src/mesa/state_tracker/st_extensions.c | 16 ++++++++++++++++
 src/mesa/state_tracker/st_format.c     | 17 +++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 2f45f470334..d2098987d1d 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -416,6 +416,22 @@ void st_init_extensions(struct st_context *st)
       ctx->Extensions.S3_s3tc = GL_TRUE;
    }
 
+   if (screen->is_format_supported(screen, PIPE_FORMAT_RGTC1_UNORM,
+                                   PIPE_TEXTURE_2D, 0,
+                                   PIPE_BIND_SAMPLER_VIEW, 0) &&
+       screen->is_format_supported(screen, PIPE_FORMAT_RGTC1_SNORM,
+				   PIPE_TEXTURE_2D, 0,
+                                   PIPE_BIND_SAMPLER_VIEW, 0) &&
+       screen->is_format_supported(screen, PIPE_FORMAT_RGTC2_UNORM,
+                                   PIPE_TEXTURE_2D, 0,
+                                   PIPE_BIND_SAMPLER_VIEW, 0) &&
+       screen->is_format_supported(screen, PIPE_FORMAT_RGTC2_SNORM,
+                                   PIPE_TEXTURE_2D, 0,
+                                   PIPE_BIND_SAMPLER_VIEW, 0)
+       ) {
+     ctx->Extensions.ARB_texture_compression_rgtc = GL_TRUE;
+   }
+
    /* ycbcr support */
    if (screen->is_format_supported(screen, PIPE_FORMAT_UYVY, 
                                    PIPE_TEXTURE_2D, 0,
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 577ee6189bd..c58ec9267dc 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -241,6 +241,14 @@ st_mesa_format_to_pipe_format(gl_format mesaFormat)
    case MESA_FORMAT_RGBA_UINT32:
       return PIPE_FORMAT_R32G32B32A32_USCALED;
 
+   case MESA_FORMAT_RED_RGTC1:
+      return PIPE_FORMAT_RGTC1_UNORM;
+   case MESA_FORMAT_SIGNED_RED_RGTC1:
+      return PIPE_FORMAT_RGTC1_SNORM;
+   case MESA_FORMAT_RG_RGTC2:
+      return PIPE_FORMAT_RGTC2_UNORM;
+   case MESA_FORMAT_SIGNED_RG_RGTC2:
+      return PIPE_FORMAT_RGTC2_SNORM;
    default:
       assert(0);
       return PIPE_FORMAT_NONE;
@@ -380,6 +388,15 @@ st_pipe_format_to_mesa_format(enum pipe_format format)
    case PIPE_FORMAT_R32G32B32A32_USCALED:
       return MESA_FORMAT_RGBA_UINT32;
 
+   case PIPE_FORMAT_RGTC1_UNORM:
+      return MESA_FORMAT_RED_RGTC1;
+   case PIPE_FORMAT_RGTC1_SNORM:
+      return MESA_FORMAT_SIGNED_RED_RGTC1;
+   case PIPE_FORMAT_RGTC2_UNORM:
+      return MESA_FORMAT_RG_RGTC2;
+   case PIPE_FORMAT_RGTC2_SNORM:
+      return MESA_FORMAT_SIGNED_RG_RGTC2;
+
    default:
       assert(0);
       return MESA_FORMAT_NONE;
-- 
cgit v1.2.3


From e107a3aa08cc3106fd7b939ea203c4b5195f1b5b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 28 Feb 2011 13:43:32 +1000
Subject: rgtc: update docs

---
 docs/GL3.txt            | 2 +-
 docs/relnotes-7.11.html | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 9ff25a95297..0c53bc42d54 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -27,7 +27,7 @@ Non-normalized Integer texture/framebuffer formats    ~50% done
 1D/2D Texture arrays                                  core Mesa, swrast done
 Packed depth/stencil formats                          DONE
 Per-buffer blend and masks (GL_EXT_draw_buffers2)     DONE
-GL_EXT_texture_compression_rgtc                       not started
+GL_EXT_texture_compression_rgtc                       DONE (swrast, gallium r600)
 Red and red/green texture formats                     DONE (swrast, i965, gallium)
 Transform feedback (GL_EXT_transform_feedback)        ~50% done
    glBindFragDataLocation, glGetFragDataLocation,
diff --git a/docs/relnotes-7.11.html b/docs/relnotes-7.11.html
index 6c6622ed3f4..4b1730b17ec 100644
--- a/docs/relnotes-7.11.html
+++ b/docs/relnotes-7.11.html
@@ -38,6 +38,7 @@ tbd
 <ul>
 <li>GL_ARB_draw_instanced extension (gallium drivers, swrast)
 <li>GL_ARB_instanced_arrays extension (gallium drivers)
+<li>GL_ARB_texture_compression_rgtc (gallium r600, swrast)
 <li>GL_ARB_draw_buffers_blend (gallium)
 <li>GL_EXT_texture_sRGB_decode (gallium drivers, swrast, i965)
 </ul>
-- 
cgit v1.2.3


From 93893139a4e81d0ac59c30ce7d1a124902bd36ef Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sun, 27 Feb 2011 23:17:49 -0800
Subject: mesa: Add texcompress_rgtc.c to SConscript.

---
 src/mesa/SConscript | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index ea04fb1a0ee..90fec124af9 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -106,6 +106,7 @@ main_sources = [
     'main/stencil.c',
     'main/syncobj.c',
     'main/texcompress.c',
+    'main/texcompress_rgtc.c',
     'main/texcompress_s3tc.c',
     'main/texcompress_fxt1.c',
     'main/texenv.c',
-- 
cgit v1.2.3


From d1dbbf7bf41959df489195d11eb50f8222d293d3 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Mon, 28 Feb 2011 12:27:22 +0100
Subject: r300g: disable hyper-z on rs6xx+

It doesn't work.
---
 src/gallium/drivers/r300/r300_chipset.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 1968d0feb35..990acea9f44 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -255,8 +255,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
             caps->family = CHIP_FAMILY_RS690;
             caps->has_tcl = FALSE;
             caps->is_r400 = TRUE;
-            caps->hiz_ram = R300_HIZ_LIMIT;
-            caps->zmask_ram = PIPE_ZMASK_SIZE;
             break;
 
         case 0x793F:
@@ -265,8 +263,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
             caps->family = CHIP_FAMILY_RS600;
             caps->has_tcl = FALSE;
             caps->is_r400 = TRUE;
-            caps->hiz_ram = R300_HIZ_LIMIT;
-            caps->zmask_ram = PIPE_ZMASK_SIZE;
             break;
 
         case 0x796C:
@@ -276,8 +272,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
             caps->family = CHIP_FAMILY_RS740;
             caps->has_tcl = FALSE;
             caps->is_r400 = TRUE;
-            caps->hiz_ram = R300_HIZ_LIMIT;
-            caps->zmask_ram = PIPE_ZMASK_SIZE;
             break;
 
         case 0x7100:
-- 
cgit v1.2.3


From ab824a0722d3a7ec14d559fc07d924fedf0534bc Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Mon, 28 Feb 2011 12:43:26 +0100
Subject: r300g: initialize SC_SCREENDOOR

---
 src/gallium/drivers/r300/r300_context.c | 3 ++-
 src/gallium/drivers/r300/r300_context.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 9f85bd4ce5f..d422ffe03f8 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -203,7 +203,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
     /* SC. */
     R300_INIT_ATOM(scissor_state, 3);
     /* GB, FG, GA, SU, SC, RB3D. */
-    R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0));
+    R300_INIT_ATOM(invariant_state, 18 + (is_rv350 ? 4 : 0));
     /* VAP. */
     R300_INIT_ATOM(viewport_state, 9);
     R300_INIT_ATOM(pvs_flush, 2);
@@ -353,6 +353,7 @@ static void r300_init_states(struct pipe_context *pipe)
         OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
         OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0);
         OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525);
+        OUT_CB_REG(R300_SC_SCREENDOOR, 0xffffff);
 
         if (r300->screen->caps.is_rv350) {
             OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 30073759476..e9c7d7bf63f 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -220,7 +220,7 @@ struct r300_vertex_stream_state {
 };
 
 struct r300_invariant_state {
-    uint32_t cb[20];
+    uint32_t cb[22];
 };
 
 struct r300_vap_invariant_state {
-- 
cgit v1.2.3


From c33e091d17b90df61f7b3873a2f124c4f26adf06 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Mon, 28 Feb 2011 13:33:13 -0500
Subject: r600g: indentation fixes

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 src/gallium/drivers/r600/r600.h              |  6 ++--
 src/gallium/drivers/r600/r600_asm.c          | 48 ++++++++++++++--------------
 src/gallium/drivers/r600/r600_buffer.c       | 16 +++++-----
 src/gallium/drivers/r600/r600_pipe.c         |  3 +-
 src/gallium/drivers/r600/r600_shader.c       | 46 +++++++++++++-------------
 src/gallium/drivers/r600/r600_state_common.c | 12 +++----
 6 files changed, 65 insertions(+), 66 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 64c52bca795..1b76f0098dd 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -118,10 +118,10 @@ unsigned r600_get_clock_crystal_freq(struct radeon *radeon);
 /* r600_bo.c */
 struct r600_bo;
 struct r600_bo *r600_bo(struct radeon *radeon,
-                        unsigned size, unsigned alignment,
-                        unsigned binding, unsigned usage);
+			unsigned size, unsigned alignment,
+			unsigned binding, unsigned usage);
 struct r600_bo *r600_bo_handle(struct radeon *radeon,
-			       unsigned handle, unsigned *array_mode);
+				unsigned handle, unsigned *array_mode);
 void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
 void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
 void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst,
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 5d59356bf70..a3197475933 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2070,7 +2070,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 	unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160;
 	unsigned format, num_format, format_comp;
 	u32 *bytecode;
-        int i, r;
+	int i, r;
 
 	/* vertex elements offset need special handling, if offset is bigger
 	+ * than what we can put in fetch instruction then we need to alterate
@@ -2090,23 +2090,23 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 		return r;
 
 	for (i = 0; i < ve->count; i++) {
-	        if (elements[i].instance_divisor > 1) {
+		if (elements[i].instance_divisor > 1) {
 			struct r600_bc_alu alu;
 
 			memset(&alu, 0, sizeof(alu));
-                        alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
-                        alu.src[0].sel = 0;
-                        alu.src[0].chan = 3;
+			alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+			alu.src[0].sel = 0;
+			alu.src[0].chan = 3;
 
 			alu.dst.sel = i + 1;
 			alu.dst.chan = 3;
 			alu.dst.write = 1;
 			alu.last = 1;
 
-                        if ((r = r600_bc_add_alu(&bc, &alu))) {
+			if ((r = r600_bc_add_alu(&bc, &alu))) {
 				r600_bc_clear(&bc);
-                                return r;
-                        }
+				return r;
+			}
 
 			memset(&alu, 0, sizeof(alu));
 			alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
@@ -2121,10 +2121,10 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 			alu.dst.write = 1;
 			alu.last = 1;
 
-                        if ((r = r600_bc_add_alu(&bc, &alu))) {
+			if ((r = r600_bc_add_alu(&bc, &alu))) {
 				r600_bc_clear(&bc);
-                                return r;
-                        }
+				return r;
+			}
 
 			memset(&alu, 0, sizeof(alu));
 			alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
@@ -2136,26 +2136,26 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 			alu.dst.write = 1;
 			alu.last = 1;
 
-                        if ((r = r600_bc_add_alu(&bc, &alu))) {
+			if ((r = r600_bc_add_alu(&bc, &alu))) {
 				r600_bc_clear(&bc);
-                                return r;
-                        }
+				return r;
+			}
 
 			memset(&alu, 0, sizeof(alu));
-                        alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT);
-                        alu.src[0].sel = i + 1;
-                        alu.src[0].chan = 3;
+			alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT);
+			alu.src[0].sel = i + 1;
+			alu.src[0].chan = 3;
 
 			alu.dst.sel = i + 1;
 			alu.dst.chan = 3;
 			alu.dst.write = 1;
 			alu.last = 1;
 
-                        if ((r = r600_bc_add_alu(&bc, &alu))) {
+			if ((r = r600_bc_add_alu(&bc, &alu))) {
 				r600_bc_clear(&bc);
-                                return r;
-                        }
-	        }
+				return r;
+			}
+		}
 	}
 
 	for (i = 0; i < ve->count; i++) {
@@ -2202,14 +2202,14 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 		return -ENOMEM;
 	}
 
-        ve->fs_size = bc.ndw*4;
+	ve->fs_size = bc.ndw*4;
 	if ((r = r600_bc_build(&bc))) {
 		r600_bc_clear(&bc);
 		return r;
 	}
 
-        if (dump_shaders == -1)
-                dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+	if (dump_shaders == -1)
+		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
 
 	if (dump_shaders) {
 		fprintf(stderr, "--------------------------------------------------------------\n");
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 0c5d7133c7a..2363cd1ebc5 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -132,13 +132,13 @@ static void r600_transfer_destroy(struct pipe_context *ctx,
 }
 
 static void r600_buffer_transfer_inline_write(struct pipe_context *pipe,
-                                              struct pipe_resource *resource,
-                                              unsigned level,
-                                              unsigned usage,
-                                              const struct pipe_box *box,
-                                              const void *data,
-                                              unsigned stride,
-                                              unsigned layer_stride)
+						struct pipe_resource *resource,
+						unsigned level,
+						unsigned usage,
+						const struct pipe_box *box,
+						const void *data,
+						unsigned stride,
+						unsigned layer_stride)
 {
 	struct radeon *ws = (struct radeon*)pipe->winsys;
 	struct r600_resource_buffer *rbuffer = r600_buffer(resource);
@@ -224,7 +224,7 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
 	rbuffer->r.b.b.b.depth0 = 1;
 	rbuffer->r.b.b.b.array_size = 1;
 	rbuffer->r.b.b.b.flags = 0;
-        rbuffer->r.b.user_ptr = ptr;
+	rbuffer->r.b.user_ptr = ptr;
 	rbuffer->r.bo = NULL;
 	rbuffer->r.bo_size = 0;
 	return &rbuffer->r.b.b.b;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index adcd74aec76..db3afad2b84 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -76,8 +76,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
 	u_upload_flush(rctx->vbuf_mgr->uploader);
 }
 
-static void r600_update_num_contexts(struct r600_screen *rscreen,
-                                     int diff)
+static void r600_update_num_contexts(struct r600_screen *rscreen, int diff)
 {
 	pipe_mutex_lock(rscreen->mutex_num_contexts);
 	if (diff > 0) {
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 65923fb9648..4146cb3c5fd 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -241,10 +241,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	int r;
 
-        /* Would like some magic "get_bool_option_once" routine.
-         */
-        if (dump_shaders == -1)
-                dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+	/* Would like some magic "get_bool_option_once" routine.
+	*/
+	if (dump_shaders == -1)
+		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
 
 	if (dump_shaders) {
 		fprintf(stderr, "--------------------------------------------------------------\n");
@@ -453,24 +453,24 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 	case TGSI_FILE_ADDRESS:
 		break;
 
-        case TGSI_FILE_SYSTEM_VALUE:
-                if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
-                        struct r600_bc_alu alu;
-                        memset(&alu, 0, sizeof(struct r600_bc_alu));
+	case TGSI_FILE_SYSTEM_VALUE:
+		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+			struct r600_bc_alu alu;
+			memset(&alu, 0, sizeof(struct r600_bc_alu));
 
-                        alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
-                        alu.src[0].sel = 0;
-                        alu.src[0].chan = 3;
+			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+			alu.src[0].sel = 0;
+			alu.src[0].chan = 3;
 
 			alu.dst.sel = 0;
 			alu.dst.chan = 3;
 			alu.dst.write = 1;
-                        alu.last = 1;
+			alu.last = 1;
 
-                        if ((r = r600_bc_add_alu(ctx->bc, &alu)))
-                                return r;
-                        break;
-                }
+			if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+				return r;
+			break;
+		}
 
 	default:
 		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
@@ -558,13 +558,13 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
 		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
 	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
-                /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
-                r600_src->swizzle[0] = 3;
-                r600_src->swizzle[1] = 3;
-                r600_src->swizzle[2] = 3;
-                r600_src->swizzle[3] = 3;
-                r600_src->sel = 0;
-        } else {
+		/* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
+		r600_src->swizzle[0] = 3;
+		r600_src->swizzle[1] = 3;
+		r600_src->swizzle[2] = 3;
+		r600_src->swizzle[3] = 3;
+		r600_src->sel = 0;
+	} else {
 		if (tgsi_src->Register.Indirect)
 			r600_src->rel = V_SQ_REL_RELATIVE;
 		r600_src->sel = tgsi_src->Register.Index;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 677e2209340..3c072fe7ca9 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -299,13 +299,13 @@ void r600_spi_update(struct r600_pipe_context *rctx)
 			tmp |= S_028644_PT_SPRITE_TEX(1);
 		}
 
-                if (rctx->family < CHIP_CEDAR) {
-                    if (rshader->input[i].centroid)
-                            tmp |= S_028644_SEL_CENTROID(1);
+		if (rctx->family < CHIP_CEDAR) {
+			if (rshader->input[i].centroid)
+				tmp |= S_028644_SEL_CENTROID(1);
 
-                    if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
-                            tmp |= S_028644_SEL_LINEAR(1);
-                }
+			if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+				tmp |= S_028644_SEL_LINEAR(1);
+		}
 
 		r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
 	}
-- 
cgit v1.2.3


From eb639349e289a6b8be06a54f5e9e0ce18c71d511 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sun, 27 Feb 2011 01:17:29 -0800
Subject: glsl: Use reralloc instead of plain realloc.

Plugs a memory leak when compiling shaders with user defined structures.

NOTE: This is a candidate for the 7.9 and 7.10 branches.
---
 src/glsl/ast_to_hir.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index bef099cca3b..fd1f0b49f42 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -3445,11 +3445,9 @@ ast_struct_specifier::hir(exec_list *instructions,
    if (!state->symbols->add_type(name, t)) {
       _mesa_glsl_error(& loc, state, "struct `%s' previously defined", name);
    } else {
-
-      const glsl_type **s = (const glsl_type **)
-	 realloc(state->user_structures,
-		 sizeof(state->user_structures[0]) *
-		 (state->num_user_structures + 1));
+      const glsl_type **s = reralloc(state, state->user_structures,
+				     const glsl_type *,
+				     state->num_user_structures + 1);
       if (s != NULL) {
 	 s[state->num_user_structures] = t;
 	 state->user_structures = s;
-- 
cgit v1.2.3


From 0a163cf56d1e412629cb802480998a982a47bb3c Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 25 Feb 2011 23:11:20 -0800
Subject: glsl: Enable GL_OES_texture_3D extension for ES2.

---
 src/glsl/builtin_types.h                       |  6 ++++--
 src/glsl/builtins/profiles/OES_texture_3D.frag |  7 +++++++
 src/glsl/builtins/profiles/OES_texture_3D.vert |  7 +++++++
 src/glsl/glsl_parser_extras.cpp                |  5 +++++
 src/glsl/glsl_parser_extras.h                  |  2 ++
 src/glsl/glsl_types.cpp                        | 12 ++++++++++++
 src/glsl/glsl_types.h                          |  2 ++
 src/mesa/main/extensions.c                     |  3 +--
 8 files changed, 40 insertions(+), 4 deletions(-)
 create mode 100644 src/glsl/builtins/profiles/OES_texture_3D.frag
 create mode 100644 src/glsl/builtins/profiles/OES_texture_3D.vert

diff --git a/src/glsl/builtin_types.h b/src/glsl/builtin_types.h
index 8ccbf6e312f..58b9a81273a 100644
--- a/src/glsl/builtin_types.h
+++ b/src/glsl/builtin_types.h
@@ -27,6 +27,10 @@ const glsl_type glsl_type::_error_type =
 const glsl_type glsl_type::_void_type =
    glsl_type(GL_INVALID_ENUM, GLSL_TYPE_VOID, 0, 0, "void");
 
+const glsl_type glsl_type::_sampler3D_type =
+   glsl_type(GL_SAMPLER_3D, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT,
+	     "sampler3D");
+
 const glsl_type *const glsl_type::error_type = & glsl_type::_error_type;
 const glsl_type *const glsl_type::void_type = & glsl_type::_void_type;
 
@@ -181,8 +185,6 @@ const glsl_type glsl_type::builtin_110_types[] = {
 	     "sampler1DShadow"),
    glsl_type(GL_SAMPLER_2D_SHADOW, GLSL_SAMPLER_DIM_2D, 1, 0, GLSL_TYPE_FLOAT,
 	     "sampler2DShadow"),
-   glsl_type(GL_SAMPLER_3D,   GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT,
-	     "sampler3D"),
 };
 /*@}*/
 
diff --git a/src/glsl/builtins/profiles/OES_texture_3D.frag b/src/glsl/builtins/profiles/OES_texture_3D.frag
new file mode 100644
index 00000000000..b6ebd6a311f
--- /dev/null
+++ b/src/glsl/builtins/profiles/OES_texture_3D.frag
@@ -0,0 +1,7 @@
+#version 100
+#extension GL_OES_texture_3D : enable
+
+vec4 texture3D (sampler3D sampler, vec3 coord);
+vec4 texture3DProj (sampler3D sampler, vec4 coord);
+vec4 texture3D (sampler3D sampler, vec3 coord, float bias);
+vec4 texture3DProj (sampler3D sampler, vec4 coord, float bias);
diff --git a/src/glsl/builtins/profiles/OES_texture_3D.vert b/src/glsl/builtins/profiles/OES_texture_3D.vert
new file mode 100644
index 00000000000..81d12f51e9f
--- /dev/null
+++ b/src/glsl/builtins/profiles/OES_texture_3D.vert
@@ -0,0 +1,7 @@
+#version 100
+#extension GL_OES_texture_3D : enable
+
+vec4 texture3D (sampler3D sampler, vec3 coord);
+vec4 texture3DProj (sampler3D sampler, vec4 coord);
+vec4 texture3DLod (sampler3D sampler, vec3 coord, float lod);
+vec4 texture3DProjLod (sampler3D sampler, vec4 coord, float lod);
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index d7a37aef46d..e8c60936fb6 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -256,6 +256,11 @@ _mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp,
       state->AMD_conservative_depth_enable = (ext_mode != extension_disable);
       state->AMD_conservative_depth_warn = (ext_mode == extension_warn);
       unsupported = !state->extensions->AMD_conservative_depth;
+   } else if (strcmp(name, "GL_OES_texture_3D") == 0 && state->es_shader) {
+      state->OES_texture_3D_enable = (ext_mode != extension_disable);
+      state->OES_texture_3D_warn = (ext_mode == extension_warn);
+
+      unsupported = !state->extensions->EXT_texture3D;
    } else {
       unsupported = true;
    }
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 10cb673c694..b5c016fb399 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -172,6 +172,8 @@ struct _mesa_glsl_parse_state {
    unsigned ARB_shader_stencil_export_warn:1;
    unsigned AMD_conservative_depth_enable:1;
    unsigned AMD_conservative_depth_warn:1;
+   unsigned OES_texture_3D_enable:1;
+   unsigned OES_texture_3D_warn:1;
    /*@}*/
 
    /** Extensions supported by the OpenGL implementation. */
diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index 76b4f3e4cb0..78d10bd9380 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -131,6 +131,7 @@ glsl_type::generate_110_types(glsl_symbol_table *symtab)
    add_types_to_symbol_table(symtab, builtin_110_types,
 			     Elements(builtin_110_types),
 			     false);
+   add_types_to_symbol_table(symtab, &_sampler3D_type, 1, false);
    add_types_to_symbol_table(symtab, builtin_110_deprecated_structure_types,
 			     Elements(builtin_110_deprecated_structure_types),
 			     false);
@@ -178,6 +179,13 @@ glsl_type::generate_EXT_texture_array_types(glsl_symbol_table *symtab,
 }
 
 
+void
+glsl_type::generate_OES_texture_3D_types(glsl_symbol_table *symtab, bool warn)
+{
+   add_types_to_symbol_table(symtab, &_sampler3D_type, 1, warn);
+}
+
+
 void
 _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
 {
@@ -204,6 +212,10 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
       glsl_type::generate_ARB_texture_rectangle_types(state->symbols,
 					   state->ARB_texture_rectangle_warn);
    }
+   if (state->OES_texture_3D_enable && state->language_version == 100) {
+      glsl_type::generate_OES_texture_3D_types(state->symbols,
+					       state->OES_texture_3D_warn);
+   }
 
    if (state->EXT_texture_array_enable && state->language_version < 130) {
       // These are already included in 130; don't create twice.
diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index 61bf5e0cfd2..3c2672c01a0 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -427,6 +427,7 @@ private:
    /*@{*/
    static const glsl_type _error_type;
    static const glsl_type _void_type;
+   static const glsl_type _sampler3D_type;
    static const glsl_type builtin_core_types[];
    static const glsl_type builtin_structure_types[];
    static const glsl_type builtin_110_deprecated_structure_types[];
@@ -453,6 +454,7 @@ private:
    static void generate_130_types(glsl_symbol_table *);
    static void generate_ARB_texture_rectangle_types(glsl_symbol_table *, bool);
    static void generate_EXT_texture_array_types(glsl_symbol_table *, bool);
+   static void generate_OES_texture_3D_types(glsl_symbol_table *, bool);
    /*@}*/
 
    /**
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 310740b0dd1..b8bb2555acd 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -241,8 +241,7 @@ static const struct extension extension_table[] = {
    { "GL_OES_stencil4",                            o(dummy_false),                     DISABLE                },
    { "GL_OES_stencil8",                            o(EXT_framebuffer_object),                       ES1 | ES2 },
    { "GL_OES_stencil_wrap",                        o(EXT_stencil_wrap),                             ES1       },
-   /* GL_OES_texture_3D is disabled due to missing GLSL support. */
-   { "GL_OES_texture_3D",                          o(EXT_texture3D),                   DISABLE                },
+   { "GL_OES_texture_3D",                          o(EXT_texture3D),                                      ES2 },
    { "GL_OES_texture_cube_map",                    o(ARB_texture_cube_map),                         ES1       },
    { "GL_OES_texture_env_crossbar",                o(ARB_texture_env_crossbar),                     ES1       },
    { "GL_OES_texture_mirrored_repeat",             o(ARB_texture_mirrored_repeat),                  ES1       },
-- 
cgit v1.2.3


From 3f600047d95f46995915d46aff574796d088fa83 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 1 Mar 2011 06:47:00 +1000
Subject: rgtc: fix void pointer arith.

should fix scons build.
---
 src/mesa/main/texcompress_rgtc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/texcompress_rgtc.c b/src/mesa/main/texcompress_rgtc.c
index b7725f4a98f..1a01755f14d 100644
--- a/src/mesa/main/texcompress_rgtc.c
+++ b/src/mesa/main/texcompress_rgtc.c
@@ -88,7 +88,7 @@ _mesa_texstore_red_rgtc1(TEXSTORE_PARAMS)
    const GLchan *tempImage = NULL;
    int i, j;
    int numxpixels, numypixels;
-   const void *srcaddr;
+   const GLchan *srcaddr;
    GLubyte srcpixels[4][4];
    GLubyte *blkaddr;
    GLint dstRowDiff;
@@ -197,7 +197,7 @@ _mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS)
    const GLchan *tempImage = NULL;
    int i, j;
    int numxpixels, numypixels;
-   const void *srcaddr;
+   const GLchan *srcaddr;
    GLubyte srcpixels[4][4];
    GLubyte *blkaddr;
    GLint dstRowDiff;
-- 
cgit v1.2.3


From 5f44fab5a6ba99c287da8d01fa584763bff2565b Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Mon, 28 Feb 2011 17:21:26 -0500
Subject: r600g: add missing evergreen INT_TO_FLT to r600_bc_get_num_operands

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
---
 src/gallium/drivers/r600/r600_asm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index a3197475933..ea1cf7acecf 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -133,6 +133,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
 		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
 		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
 		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR:
+		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
 		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
 		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
 			return 1;
-- 
cgit v1.2.3