21 files changed, 1414 insertions, 32 deletions
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index ea04fb1a0ee..90fec124af9 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -106,6 +106,7 @@ main_sources = [
     'main/stencil.c',
     'main/syncobj.c',
     'main/texcompress.c',
+    'main/texcompress_rgtc.c',
     'main/texcompress_s3tc.c',
     'main/texcompress_fxt1.c',
     'main/texenv.c',
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index a413c02b573..5496b4fdd3b 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -679,6 +679,8 @@
 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2
 #define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0
 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
 #define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO             2
 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 30e3bd54469..9bdcda780ef 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -213,6 +213,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
       return 2;
    case FS_OPCODE_TEX:
    case FS_OPCODE_TXB:
+   case FS_OPCODE_TXD:
    case FS_OPCODE_TXL:
       return 1;
    case FS_OPCODE_FB_WRITE:
@@ -1200,6 +1201,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
       }
       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
       mlen += 3;
+   } else if (ir->op == ir_txd) {
+      assert(!"TXD isn't supported on gen4 yet.");
    } else {
       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
        * instructions.  We'll need to do SIMD16 here.
@@ -1253,6 +1256,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
       inst = emit(fs_inst(FS_OPCODE_TXL, dst));
       break;
    case ir_txd:
+      inst = emit(fs_inst(FS_OPCODE_TXD, dst));
+      break;
    case ir_txf:
       assert(!"GLSL 1.30 features unsupported");
       break;
@@ -2308,6 +2313,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
 	 }
 	 break;
+      case FS_OPCODE_TXL:
+	 if (inst->shadow_compare) {
+	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5;
+	 } else {
+	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5;
+	 }
+	 break;
+      case FS_OPCODE_TXD:
+	 assert(!"TXD isn't supported on gen5+ yet.");
+	 break;
       }
    } else {
       switch (inst->opcode) {
@@ -2325,13 +2340,26 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
       case FS_OPCODE_TXB:
 	 if (inst->shadow_compare) {
 	    assert(inst->mlen == 6);
-	    msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+	    msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
 	 } else {
 	    assert(inst->mlen == 9);
 	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
 	    simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
 	 }
 	 break;
+      case FS_OPCODE_TXL:
+	 if (inst->shadow_compare) {
+	    assert(inst->mlen == 6);
+	    msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
+	 } else {
+	    assert(inst->mlen == 9);
+	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
+	    simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+	 }
+	 break;
+      case FS_OPCODE_TXD:
+	 assert(!"TXD isn't supported on gen4 yet.");
+	 break;
       }
    }
    assert(msg_type != -1);
@@ -3607,6 +3635,7 @@ fs_visitor::generate_code()
 	 break;
       case FS_OPCODE_TEX:
       case FS_OPCODE_TXB:
+      case FS_OPCODE_TXD:
       case FS_OPCODE_TXL:
 	 generate_tex(inst, dst, src[0]);
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8352760acf7..dc030ae5b50 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -71,6 +71,7 @@ enum fs_opcodes {
    FS_OPCODE_LINTERP,
    FS_OPCODE_TEX,
    FS_OPCODE_TXB,
+   FS_OPCODE_TXD,
    FS_OPCODE_TXL,
    FS_OPCODE_DISCARD_NOT,
    FS_OPCODE_DISCARD_AND,
@@ -309,6 +310,7 @@ public:
    {
       return (opcode == FS_OPCODE_TEX ||
 	      opcode == FS_OPCODE_TXB ||
+	      opcode == FS_OPCODE_TXD ||
 	      opcode == FS_OPCODE_TXL);
    }
 
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index 38c98f30efb..d6c1f1c893d 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -68,7 +68,7 @@ upload_clip_state(struct brw_context *brw)
 	     depth_clamp |
 	     provoking);
    OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
-             U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
+             U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
              GEN6_CLIP_FORCE_ZERO_RTAINDEX);
    ADVANCE_BATCH();
 }
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 356d5f72d89..746da462ee2 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -104,7 +104,8 @@ static const __DRItexBufferExtension intelTexBufferExtension = {
 static void
 intelDRI2Flush(__DRIdrawable *drawable)
 {
-   struct intel_context *intel = drawable->driContextPriv->driverPrivate;
+   GET_CURRENT_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
 
    if (intel->gen < 4)
       INTEL_FIREVERTICES(intel);
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 7504b8a85db..b8bb2555acd 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -241,8 +241,7 @@ static const struct extension extension_table[] = {
    { "GL_OES_stencil4",                            o(dummy_false),                     DISABLE                },
    { "GL_OES_stencil8",                            o(EXT_framebuffer_object),                       ES1 | ES2 },
    { "GL_OES_stencil_wrap",                        o(EXT_stencil_wrap),                             ES1       },
-   /* GL_OES_texture_3D is disabled due to missing GLSL support. */
-   { "GL_OES_texture_3D",                          o(EXT_texture3D),                   DISABLE                },
+   { "GL_OES_texture_3D",                          o(EXT_texture3D),                                      ES2 },
    { "GL_OES_texture_cube_map",                    o(ARB_texture_cube_map),                         ES1       },
    { "GL_OES_texture_env_crossbar",                o(ARB_texture_env_crossbar),                     ES1       },
    { "GL_OES_texture_mirrored_repeat",             o(ARB_texture_mirrored_repeat),                  ES1       },
@@ -428,6 +427,7 @@ _mesa_enable_sw_extensions(struct gl_context *ctx)
    ctx->Extensions.ARB_texture_mirrored_repeat = GL_TRUE;
    ctx->Extensions.ARB_texture_non_power_of_two = GL_TRUE;
    ctx->Extensions.ARB_texture_rg = GL_TRUE;
+   ctx->Extensions.ARB_texture_compression_rgtc = GL_TRUE;
    ctx->Extensions.ARB_vertex_array_object = GL_TRUE;
 #if FEATURE_ARB_vertex_program
    ctx->Extensions.ARB_vertex_program = GL_TRUE;
diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index 1e395363475..947db84a69e 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -890,7 +890,43 @@ static struct gl_format_info format_info[MESA_FORMAT_COUNT] =
       16, 16, 16, 16,
       0, 0, 0, 0, 0,
       1, 1, 8
-   }
+   },
+   {
+     MESA_FORMAT_RED_RGTC1,
+     "MESA_FORMAT_RED_RGTC1",
+     GL_RED,
+     GL_UNSIGNED_NORMALIZED,
+     4, 0, 0, 0,
+     0, 0, 0, 0, 0,
+     4, 4, 8                     /* 8 bytes per 4x4 block */
+   },
+   {
+     MESA_FORMAT_SIGNED_RED_RGTC1,
+     "MESA_FORMAT_SIGNED_RED_RGTC1",
+     GL_RED,
+     GL_SIGNED_NORMALIZED,
+     4, 0, 0, 0,
+     0, 0, 0, 0, 0,
+     4, 4, 8                     /* 8 bytes per 4x4 block */
+   },
+   {
+     MESA_FORMAT_RG_RGTC2,
+     "MESA_FORMAT_RG_RGTC2",
+     GL_RG,
+     GL_UNSIGNED_NORMALIZED,
+     4, 4, 0, 0,
+     0, 0, 0, 0, 0,
+     4, 4, 16                     /* 16 bytes per 4x4 block */
+   },
+   {
+     MESA_FORMAT_SIGNED_RG_RGTC2,
+     "MESA_FORMAT_SIGNED_RG_RGTC2",
+     GL_RG,
+     GL_SIGNED_NORMALIZED,
+     4, 4, 0, 0,
+     0, 0, 0, 0, 0,
+     4, 4, 16                     /* 16 bytes per 4x4 block */
+   },
 };
 
 
@@ -1530,6 +1566,10 @@ _mesa_format_to_type_and_comps(gl_format format,
    case MESA_FORMAT_SRGBA_DXT5:
 #endif
 #endif
+   case MESA_FORMAT_RED_RGTC1:
+   case MESA_FORMAT_SIGNED_RED_RGTC1:
+   case MESA_FORMAT_RG_RGTC2:
+   case MESA_FORMAT_SIGNED_RG_RGTC2:
       /* XXX generate error instead? */
       *datatype = GL_UNSIGNED_BYTE;
       *comps = 0;
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
index 9a5cef37788..e21967e2b0c 100644
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -179,6 +179,12 @@ typedef enum
    MESA_FORMAT_RGBA_16,           /* ... */
    /*@}*/
 
+   /*@{*/
+   MESA_FORMAT_RED_RGTC1,
+   MESA_FORMAT_SIGNED_RED_RGTC1,
+   MESA_FORMAT_RG_RGTC2,
+   MESA_FORMAT_SIGNED_RG_RGTC2,
+   /*@}*/
    MESA_FORMAT_COUNT
 } gl_format;
 
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 7a0b522a2d8..82d02ed0ecf 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -64,6 +64,7 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a
          n += 2;
       }
    }
+   /* don't return RGTC - ARB_texture_compression_rgtc query 19 */
    if (ctx->Extensions.EXT_texture_compression_s3tc) {
       if (formats) {
          formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
@@ -163,6 +164,15 @@ _mesa_glenum_to_compressed_format(GLenum format)
    case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
       return MESA_FORMAT_SRGBA_DXT5;
 
+   case GL_COMPRESSED_RED_RGTC1:
+      return MESA_FORMAT_RED_RGTC1;
+   case GL_COMPRESSED_SIGNED_RED_RGTC1:
+      return MESA_FORMAT_SIGNED_RED_RGTC1;
+   case GL_COMPRESSED_RG_RGTC2:
+      return MESA_FORMAT_RG_RGTC2;
+   case GL_COMPRESSED_SIGNED_RG_RGTC2:
+      return MESA_FORMAT_SIGNED_RG_RGTC2;
+
    default:
       return MESA_FORMAT_NONE;
    }
@@ -209,6 +219,16 @@ _mesa_compressed_format_to_glenum(struct gl_context *ctx, GLuint mesaFormat)
       return GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT;
 #endif
 #endif
+
+   case MESA_FORMAT_RED_RGTC1:
+      return GL_COMPRESSED_RED_RGTC1;
+   case MESA_FORMAT_SIGNED_RED_RGTC1:
+      return GL_COMPRESSED_SIGNED_RED_RGTC1;
+   case MESA_FORMAT_RG_RGTC2:
+      return GL_COMPRESSED_RG_RGTC2;
+   case MESA_FORMAT_SIGNED_RG_RGTC2:
+      return GL_COMPRESSED_SIGNED_RG_RGTC2;
+
    default:
       _mesa_problem(ctx, "Unexpected mesa texture format in"
                     " _mesa_compressed_format_to_glenum()");
diff --git a/src/mesa/main/texcompress_rgtc.c b/src/mesa/main/texcompress_rgtc.c
new file mode 100644
index 00000000000..1a01755f14d
--- /dev/null
+++ b/src/mesa/main/texcompress_rgtc.c
@@ -0,0 +1,1122 @@
+/*
+ * Copyright (C) 2011 Red Hat Inc.
+ * 
+ * block compression parts are:
+ * Copyright (C) 2004  Roland Scheidegger   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author:
+ *    Dave Airlie
+ */
+
+/**
+ * \file texcompress_rgtc.c
+ * GL_EXT_texture_compression_rgtc support.
+ */
+
+
+#include "glheader.h"
+#include "imports.h"
+#include "colormac.h"
+#include "image.h"
+#include "macros.h"
+#include "mfeatures.h"
+#include "mipmap.h"
+#include "texcompress.h"
+#include "texcompress_rgtc.h"
+#include "texstore.h"
+
+#define RGTC_DEBUG 0
+
+static void encode_rgtc_chan_u(GLubyte *blkaddr, GLubyte srccolors[4][4],
+			     GLint numxpixels, GLint numypixels);
+static void encode_rgtc_chan_s(GLbyte *blkaddr, GLbyte srccolors[4][4],
+			     GLint numxpixels, GLint numypixels);
+
+static void extractsrc_u( GLubyte srcpixels[4][4], const GLchan *srcaddr,
+			  GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
+{
+   GLubyte i, j;
+   const GLchan *curaddr;
+   for (j = 0; j < numypixels; j++) {
+      curaddr = srcaddr + j * srcRowStride * comps;
+      for (i = 0; i < numxpixels; i++) {
+	 srcpixels[j][i] = *curaddr / (CHAN_MAX / 255);
+	 curaddr += comps;
+      }
+   }
+}
+
+static void extractsrc_s( GLbyte srcpixels[4][4], const GLfloat *srcaddr,
+			  GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
+{
+   GLubyte i, j;
+   const GLfloat *curaddr;
+   for (j = 0; j < numypixels; j++) {
+      curaddr = srcaddr + j * srcRowStride * comps;
+      for (i = 0; i < numxpixels; i++) {
+	 srcpixels[j][i] = FLOAT_TO_BYTE_TEX(*curaddr);
+	 curaddr += comps;
+      }
+   }
+}
+
+
+GLboolean
+_mesa_texstore_red_rgtc1(TEXSTORE_PARAMS)
+{
+   GLubyte *dst;
+   const GLint texWidth = dstRowStride * 4 / 8; /* a bit of a hack */
+   const GLchan *tempImage = NULL;
+   int i, j;
+   int numxpixels, numypixels;
+   const GLchan *srcaddr;
+   GLubyte srcpixels[4][4];
+   GLubyte *blkaddr;
+   GLint dstRowDiff;
+   ASSERT(dstFormat == MESA_FORMAT_RED_RGTC1);
+   ASSERT(dstXoffset % 4 == 0);
+   ASSERT(dstYoffset % 4 == 0);
+   ASSERT(dstZoffset % 4 == 0);
+   (void) dstZoffset;
+   (void) dstImageOffsets;
+
+
+   tempImage = _mesa_make_temp_chan_image(ctx, dims,
+					  baseInternalFormat,
+					  _mesa_get_format_base_format(dstFormat),
+					  srcWidth, srcHeight, srcDepth,
+					  srcFormat, srcType, srcAddr,
+					  srcPacking);
+   if (!tempImage)
+      return GL_FALSE; /* out of memory */
+
+   dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+                                        dstFormat,
+                                        texWidth, (GLubyte *) dstAddr);
+
+   blkaddr = dst;
+   dstRowDiff = dstRowStride >= (srcWidth * 4) ? dstRowStride - (((srcWidth + 3) & ~3) * 4) : 0;
+   for (j = 0; j < srcHeight; j+=4) {
+      if (srcHeight > j + 3) numypixels = 4;
+      else numypixels = srcHeight - j;
+      srcaddr = tempImage + j * srcWidth;
+      for (i = 0; i < srcWidth; i += 4) {
+	 if (srcWidth > i + 3) numxpixels = 4;
+	 else numxpixels = srcWidth - i;
+	 extractsrc_u(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 1);
+	 encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+	 srcaddr += numxpixels;
+	 blkaddr += 8;
+      }
+      blkaddr += dstRowDiff;
+   }
+   if (tempImage)
+      free((void *) tempImage);
+
+   return GL_TRUE;
+}
+
+GLboolean
+_mesa_texstore_signed_red_rgtc1(TEXSTORE_PARAMS)
+{
+   GLbyte *dst;
+   const GLint texWidth = dstRowStride * 4 / 8; /* a bit of a hack */
+   const GLfloat *tempImage = NULL;
+   int i, j;
+   int numxpixels, numypixels;
+   const GLfloat *srcaddr;
+   GLbyte srcpixels[4][4];
+   GLbyte *blkaddr;
+   GLint dstRowDiff;
+   ASSERT(dstFormat == MESA_FORMAT_SIGNED_RED_RGTC1);
+   ASSERT(dstXoffset % 4 == 0);
+   ASSERT(dstYoffset % 4 == 0);
+   ASSERT(dstZoffset % 4 == 0);
+   (void) dstZoffset;
+   (void) dstImageOffsets;
+
+   tempImage = _mesa_make_temp_float_image(ctx, dims,
+					   baseInternalFormat,
+					   _mesa_get_format_base_format(dstFormat),
+					   srcWidth, srcHeight, srcDepth,
+					   srcFormat, srcType, srcAddr,
+					   srcPacking, 0x0);
+   if (!tempImage)
+      return GL_FALSE; /* out of memory */
+
+   dst = (GLbyte *)_mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+						  dstFormat,
+						  texWidth, (GLubyte *) dstAddr);
+
+   blkaddr = dst;
+   dstRowDiff = dstRowStride >= (srcWidth * 4) ? dstRowStride - (((srcWidth + 3) & ~3) * 4) : 0;
+   for (j = 0; j < srcHeight; j+=4) {
+      if (srcHeight > j + 3) numypixels = 4;
+      else numypixels = srcHeight - j;
+      srcaddr = tempImage + j * srcWidth;
+      for (i = 0; i < srcWidth; i += 4) {
+	 if (srcWidth > i + 3) numxpixels = 4;
+	 else numxpixels = srcWidth - i;
+	 extractsrc_s(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 1);
+	 encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+	 srcaddr += numxpixels;
+	 blkaddr += 8;
+      }
+      blkaddr += dstRowDiff;
+   }
+   if (tempImage)
+      free((void *) tempImage);
+
+   return GL_TRUE;
+}
+
+GLboolean
+_mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS)
+{
+   GLubyte *dst;
+   const GLint texWidth = dstRowStride * 4 / 16; /* a bit of a hack */
+   const GLchan *tempImage = NULL;
+   int i, j;
+   int numxpixels, numypixels;
+   const GLchan *srcaddr;
+   GLubyte srcpixels[4][4];
+   GLubyte *blkaddr;
+   GLint dstRowDiff;
+
+   ASSERT(dstFormat == MESA_FORMAT_RG_RGTC2);
+   ASSERT(dstXoffset % 4 == 0);
+   ASSERT(dstYoffset % 4 == 0);
+   ASSERT(dstZoffset % 4 == 0);
+   (void) dstZoffset;
+   (void) dstImageOffsets;
+
+   tempImage = _mesa_make_temp_chan_image(ctx, dims,
+					  baseInternalFormat,
+					  _mesa_get_format_base_format(dstFormat),
+					  srcWidth, srcHeight, srcDepth,
+					  srcFormat, srcType, srcAddr,
+					  srcPacking);
+   if (!tempImage)
+      return GL_FALSE; /* out of memory */
+
+   dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+                                        dstFormat,
+                                        texWidth, (GLubyte *) dstAddr);
+
+   blkaddr = dst;
+   dstRowDiff = dstRowStride >= (srcWidth * 8) ? dstRowStride - (((srcWidth + 7) & ~7) * 8) : 0;
+   for (j = 0; j < srcHeight; j+=4) {
+      if (srcHeight > j + 3) numypixels = 4;
+      else numypixels = srcHeight - j;
+      srcaddr = tempImage + j * srcWidth * 2;
+      for (i = 0; i < srcWidth; i += 4) {
+	 if (srcWidth > i + 3) numxpixels = 4;
+	 else numxpixels = srcWidth - i;
+	 extractsrc_u(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 2);
+	 encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+
+	 blkaddr += 8;
+	 extractsrc_u(srcpixels, (GLchan *)srcaddr + 1, srcWidth, numxpixels, numypixels, 2);
+	 encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+
+	 blkaddr += 8;
+
+	 srcaddr += numxpixels * 2;
+      }
+      blkaddr += dstRowDiff;
+   }
+   if (tempImage)
+      free((void *) tempImage);
+
+   return GL_TRUE;
+}
+
+GLboolean
+_mesa_texstore_signed_rg_rgtc2(TEXSTORE_PARAMS)
+{
+   GLbyte *dst;
+   const GLint texWidth = dstRowStride * 4 / 16; /* a bit of a hack */
+   const GLfloat *tempImage = NULL;
+   int i, j;
+   int numxpixels, numypixels;
+   const GLfloat *srcaddr;
+   GLbyte srcpixels[4][4];
+   GLbyte *blkaddr;
+   GLint dstRowDiff;
+
+   ASSERT(dstFormat == MESA_FORMAT_SIGNED_RG_RGTC2);
+   ASSERT(dstXoffset % 4 == 0);
+   ASSERT(dstYoffset % 4 == 0);
+   ASSERT(dstZoffset % 4 == 0);
+   (void) dstZoffset;
+   (void) dstImageOffsets;
+
+   tempImage = _mesa_make_temp_float_image(ctx, dims,
+					   baseInternalFormat,
+					   _mesa_get_format_base_format(dstFormat),
+					   srcWidth, srcHeight, srcDepth,
+					   srcFormat, srcType, srcAddr,
+					   srcPacking, 0x0);
+   if (!tempImage)
+      return GL_FALSE; /* out of memory */
+
+   dst = (GLbyte *)_mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+						  dstFormat,
+						  texWidth, (GLubyte *) dstAddr);
+
+   blkaddr = dst;
+   dstRowDiff = dstRowStride >= (srcWidth * 8) ? dstRowStride - (((srcWidth + 7) & ~7) * 8) : 0;
+   for (j = 0; j < srcHeight; j += 4) {
+      if (srcHeight > j + 3) numypixels = 4;
+      else numypixels = srcHeight - j;
+      srcaddr = tempImage + j * srcWidth * 2;
+      for (i = 0; i < srcWidth; i += 4) {
+	 if (srcWidth > i + 3) numxpixels = 4;
+	 else numxpixels = srcWidth - i;
+
+	 extractsrc_s(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 2);
+	 encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+	 blkaddr += 8;
+
+	 extractsrc_s(srcpixels, srcaddr + 1, srcWidth, numxpixels, numypixels, 2);
+	 encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+	 blkaddr += 8;
+
+	 srcaddr += numxpixels * 2;
+
+      }
+      blkaddr += dstRowDiff;
+   }
+   if (tempImage)
+      free((void *) tempImage);
+
+   return GL_TRUE;
+}
+
+static void _fetch_texel_rgtc_u(GLint srcRowStride, const GLubyte *pixdata,
+				GLint i, GLint j, GLchan *value, int comps)
+{
+   GLchan decode;
+   const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8 * comps);
+   const GLubyte alpha0 = blksrc[0];
+   const GLubyte alpha1 = blksrc[1];
+   const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
+   const GLubyte acodelow = blksrc[2 + bit_pos / 8];
+   const GLubyte acodehigh = blksrc[3 + bit_pos / 8];
+   const GLubyte code = (acodelow >> (bit_pos & 0x7) |
+      (acodehigh  << (8 - (bit_pos & 0x7)))) & 0x7;
+
+   if (code == 0)
+      decode = UBYTE_TO_CHAN( alpha0 );
+   else if (code == 1)
+      decode = UBYTE_TO_CHAN( alpha1 );
+   else if (alpha0 > alpha1)
+      decode = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
+   else if (code < 6)
+      decode = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
+   else if (code == 6)
+      decode = 0;
+   else
+      decode = CHAN_MAX;
+
+   *value = decode;
+}
+
+
+static void _fetch_texel_rgtc_s(GLint srcRowStride, const GLbyte *pixdata,
+				GLint i, GLint j, GLbyte *value, int comps)
+{
+   GLbyte decode;
+   const GLbyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8 * comps);
+   const GLbyte alpha0 = blksrc[0];
+   const GLbyte alpha1 = blksrc[1];
+   const GLbyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
+   const GLbyte acodelow = blksrc[2 + bit_pos / 8];
+   const GLbyte acodehigh = blksrc[3 + bit_pos / 8];
+   const GLbyte code = (acodelow >> (bit_pos & 0x7) |
+      (acodehigh  << (8 - (bit_pos & 0x7)))) & 0x7;
+
+   if (code == 0)
+      decode = alpha0;
+   else if (code == 1)
+      decode = alpha1;
+   else if (alpha0 > alpha1)
+      decode = ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7);
+   else if (code < 6)
+      decode = ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5);
+   else if (code == 6)
+      decode = -128;
+   else
+      decode = 127;
+
+   *value = decode;
+}
+
+void
+_mesa_fetch_texel_2d_f_red_rgtc1(const struct gl_texture_image *texImage,
+				 GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLchan red;
+   _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data),
+		       i, j, &red, 1);
+   texel[RCOMP] = CHAN_TO_FLOAT(red);
+   texel[GCOMP] = 0.0;
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+void
+_mesa_fetch_texel_2d_f_signed_red_rgtc1(const struct gl_texture_image *texImage,
+					GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLbyte red;
+   _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data),
+		       i, j, &red, 1);
+   texel[RCOMP] = BYTE_TO_FLOAT_TEX(red);
+   texel[GCOMP] = 0.0;
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+void
+_mesa_fetch_texel_2d_f_rg_rgtc2(const struct gl_texture_image *texImage,
+				 GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLchan red, green;
+   _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data),
+		     i, j, &red, 2);
+   _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data) + 8,
+		     i, j, &green, 2);
+   texel[RCOMP] = CHAN_TO_FLOAT(red);
+   texel[GCOMP] = CHAN_TO_FLOAT(green);
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+void
+_mesa_fetch_texel_2d_f_signed_rg_rgtc2(const struct gl_texture_image *texImage,
+				       GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLbyte red, green;
+   _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data),
+		     i, j, &red, 2);
+   _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data) + 8,
+		     i, j, &green, 2);
+   texel[RCOMP] = BYTE_TO_FLOAT_TEX(red);
+   texel[GCOMP] = BYTE_TO_FLOAT_TEX(green);
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+static void write_rgtc_encoded_channel(GLubyte *blkaddr,
+				       GLubyte alphabase1,
+				       GLubyte alphabase2,
+				       GLubyte alphaenc[16])
+{
+   *blkaddr++ = alphabase1;
+   *blkaddr++ = alphabase2;
+   *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
+   *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
+   *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
+   *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
+   *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
+   *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
+}
+
+static void encode_rgtc_chan_u(GLubyte *blkaddr, GLubyte srccolors[4][4],
+			     GLint numxpixels, GLint numypixels)
+{
+   GLubyte alphabase[2], alphause[2];
+   GLshort alphatest[2] = { 0 };
+   GLuint alphablockerror1, alphablockerror2, alphablockerror3;
+   GLubyte i, j, aindex, acutValues[7];
+   GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
+   GLboolean alphaabsmin = GL_FALSE;
+   GLboolean alphaabsmax = GL_FALSE;
+   GLshort alphadist;
+
+   /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
+   alphabase[0] = 0xff; alphabase[1] = 0x0;
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+         if (srccolors[j][i] == 0)
+            alphaabsmin = GL_TRUE;
+         else if (srccolors[j][i] == 255)
+            alphaabsmax = GL_TRUE;
+         else {
+            if (srccolors[j][i] > alphabase[1])
+               alphabase[1] = srccolors[j][i];
+            if (srccolors[j][i] < alphabase[0])
+               alphabase[0] = srccolors[j][i];
+         }
+      }
+   }
+
+
+   if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
+      /* shortcut here since it is a very common case (and also avoids later problems) */
+      /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
+      /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
+
+      *blkaddr++ = srccolors[0][0];
+      blkaddr++;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+#if RGTC_DEBUG
+      fprintf(stderr, "enc0 used\n");
+#endif
+      return;
+   }
+
+   /* find best encoding for alpha0 > alpha1 */
+   /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
+   alphablockerror1 = 0x0;
+   alphablockerror2 = 0xffffffff;
+   alphablockerror3 = 0xffffffff;
+   if (alphaabsmin) alphause[0] = 0;
+   else alphause[0] = alphabase[0];
+   if (alphaabsmax) alphause[1] = 255;
+   else alphause[1] = alphabase[1];
+   /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
+   for (aindex = 0; aindex < 7; aindex++) {
+      /* don't forget here is always rounded down */
+      acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
+   }
+
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+         /* maybe it's overkill to have the most complicated calculation just for the error
+            calculation which we only need to figure out if encoding1 or encoding2 is better... */
+         if (srccolors[j][i] > acutValues[0]) {
+            alphaenc1[4*j + i] = 0;
+            alphadist = srccolors[j][i] - alphause[1];
+         }
+         else if (srccolors[j][i] > acutValues[1]) {
+            alphaenc1[4*j + i] = 2;
+            alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[2]) {
+            alphaenc1[4*j + i] = 3;
+            alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[3]) {
+            alphaenc1[4*j + i] = 4;
+            alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[4]) {
+            alphaenc1[4*j + i] = 5;
+            alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[5]) {
+            alphaenc1[4*j + i] = 6;
+            alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[6]) {
+            alphaenc1[4*j + i] = 7;
+            alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7;
+         }
+         else {
+            alphaenc1[4*j + i] = 1;
+            alphadist = srccolors[j][i] - alphause[0];
+         }
+         alphablockerror1 += alphadist * alphadist;
+      }
+   }
+
+#if RGTC_DEBUG
+   for (i = 0; i < 16; i++) {
+      fprintf(stderr, "%d ", alphaenc1[i]);
+   }
+   fprintf(stderr, "cutVals ");
+   for (i = 0; i < 8; i++) {
+      fprintf(stderr, "%d ", acutValues[i]);
+   }
+   fprintf(stderr, "srcVals ");
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+	 fprintf(stderr, "%d ", srccolors[j][i]);
+      }
+   }
+   fprintf(stderr, "\n");
+#endif
+
+   /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
+      are false but try it anyway */
+   if (alphablockerror1 >= 32) {
+
+      /* don't bother if encoding is already very good, this condition should also imply
+      we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
+      alphablockerror2 = 0;
+      for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+         acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
+      }
+      for (j = 0; j < numypixels; j++) {
+         for (i = 0; i < numxpixels; i++) {
+             /* maybe it's overkill to have the most complicated calculation just for the error
+               calculation which we only need to figure out if encoding1 or encoding2 is better... */
+            if (srccolors[j][i] == 0) {
+               alphaenc2[4*j + i] = 6;
+               alphadist = 0;
+            }
+            else if (srccolors[j][i] == 255) {
+               alphaenc2[4*j + i] = 7;
+               alphadist = 0;
+            }
+            else if (srccolors[j][i] <= acutValues[0]) {
+               alphaenc2[4*j + i] = 0;
+               alphadist = srccolors[j][i] - alphabase[0];
+            }
+            else if (srccolors[j][i] <= acutValues[1]) {
+               alphaenc2[4*j + i] = 2;
+               alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[2]) {
+               alphaenc2[4*j + i] = 3;
+               alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[3]) {
+               alphaenc2[4*j + i] = 4;
+               alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[4]) {
+               alphaenc2[4*j + i] = 5;
+               alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
+            }
+            else {
+               alphaenc2[4*j + i] = 1;
+               alphadist = srccolors[j][i] - alphabase[1];
+            }
+            alphablockerror2 += alphadist * alphadist;
+         }
+      }
+
+
+      /* skip this if the error is already very small
+         this encoding is MUCH better on average than #2 though, but expensive! */
+      if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
+         GLshort blockerrlin1 = 0;
+         GLshort blockerrlin2 = 0;
+         GLubyte nralphainrangelow = 0;
+         GLubyte nralphainrangehigh = 0;
+         alphatest[0] = 0xff;
+         alphatest[1] = 0x0;
+         /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+               if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (255 -(alphabase[1] - alphabase[0]) / 28)))
+                  alphatest[1] = srccolors[j][i];
+               if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28))
+                  alphatest[0] = srccolors[j][i];
+            }
+         }
+          /* shouldn't happen too often, don't really care about those degenerated cases */
+          if (alphatest[1] <= alphatest[0]) {
+             alphatest[0] = 1;
+             alphatest[1] = 254;
+         }
+         for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+         }
+
+         /* find the "average" difference between the alpha values and the next encoded value.
+            This is then used to calculate new base values.
+            Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
+            since they will see more improvement, and also because the values in the middle are somewhat
+            likely to get no improvement at all (because the base values might move in different directions)?
+            OTOH it would mean the values in the middle are even less likely to get an improvement
+         */
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+               if (srccolors[j][i] <= alphatest[0] / 2) {
+               }
+               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+               }
+               else if (srccolors[j][i] <= acutValues[0]) {
+                  blockerrlin1 += (srccolors[j][i] - alphatest[0]);
+                  nralphainrangelow += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[1]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[2]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[3]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[4]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+                  }
+               else {
+                  blockerrlin2 += (srccolors[j][i] - alphatest[1]);
+                  nralphainrangehigh += 1;
+               }
+            }
+         }
+         /* shouldn't happen often, needed to avoid div by zero */
+         if (nralphainrangelow == 0) nralphainrangelow = 1;
+         if (nralphainrangehigh == 0) nralphainrangehigh = 1;
+         alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
+#if RGTC_DEBUG
+         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
+         fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);
+#endif
+         /* again shouldn't really happen often... */
+         if (alphatest[0] < 0) {
+            alphatest[0] = 0;
+         }
+         alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
+         if (alphatest[1] > 255) {
+            alphatest[1] = 255;
+         }
+
+         alphablockerror3 = 0;
+         for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+         }
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+                /* maybe it's overkill to have the most complicated calculation just for the error
+                  calculation which we only need to figure out if encoding1 or encoding2 is better... */
+               if (srccolors[j][i] <= alphatest[0] / 2) {
+                  alphaenc3[4*j + i] = 6;
+                  alphadist = srccolors[j][i];
+               }
+               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+                  alphaenc3[4*j + i] = 7;
+                  alphadist = 255 - srccolors[j][i];
+               }
+               else if (srccolors[j][i] <= acutValues[0]) {
+                  alphaenc3[4*j + i] = 0;
+                  alphadist = srccolors[j][i] - alphatest[0];
+               }
+               else if (srccolors[j][i] <= acutValues[1]) {
+                 alphaenc3[4*j + i] = 2;
+                 alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[2]) {
+                  alphaenc3[4*j + i] = 3;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[3]) {
+                  alphaenc3[4*j + i] = 4;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[4]) {
+                  alphaenc3[4*j + i] = 5;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
+               }
+               else {
+                  alphaenc3[4*j + i] = 1;
+                  alphadist = srccolors[j][i] - alphatest[1];
+               }
+               alphablockerror3 += alphadist * alphadist;
+            }
+         }
+      }
+   }
+  /* write the alpha values and encoding back. */
+   if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
+#if RGTC_DEBUG
+      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);
+#endif
+      write_rgtc_encoded_channel( blkaddr, alphause[1], alphause[0], alphaenc1 );
+   }
+   else if (alphablockerror2 <= alphablockerror3) {
+#if RGTC_DEBUG
+      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);
+#endif
+      write_rgtc_encoded_channel( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
+   }
+   else {
+#if RGTC_DEBUG
+      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);
+#endif
+      write_rgtc_encoded_channel( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
+   }
+}
+
+
+static void write_rgtc_encoded_channel_s(GLbyte *blkaddr,
+					 GLbyte alphabase1,
+					 GLbyte alphabase2,
+					 GLbyte alphaenc[16])
+{
+   *blkaddr++ = alphabase1;
+   *blkaddr++ = alphabase2;
+   *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
+   *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
+   *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
+   *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
+   *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
+   *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
+}
+
+static void encode_rgtc_chan_s(GLbyte *blkaddr, GLbyte srccolors[4][4],
+			       GLint numxpixels, GLint numypixels)
+{
+   GLbyte alphabase[2], alphause[2];
+   GLshort alphatest[2] = { 0 };
+   GLuint alphablockerror1, alphablockerror2, alphablockerror3;
+   GLbyte i, j, aindex, acutValues[7];
+   GLbyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
+   GLboolean alphaabsmin = GL_FALSE;
+   GLboolean alphaabsmax = GL_FALSE;
+   GLshort alphadist;
+
+   /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
+   alphabase[0] = 0xff; alphabase[1] = 0x0;
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+         if (srccolors[j][i] == 0)
+            alphaabsmin = GL_TRUE;
+         else if (srccolors[j][i] == 255)
+            alphaabsmax = GL_TRUE;
+         else {
+            if (srccolors[j][i] > alphabase[1])
+               alphabase[1] = srccolors[j][i];
+            if (srccolors[j][i] < alphabase[0])
+               alphabase[0] = srccolors[j][i];
+         }
+      }
+   }
+
+
+   if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
+      /* shortcut here since it is a very common case (and also avoids later problems) */
+      /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
+      /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
+
+      *blkaddr++ = srccolors[0][0];
+      blkaddr++;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+      *blkaddr++ = 0;
+#if RGTC_DEBUG
+      fprintf(stderr, "enc0 used\n");
+#endif
+      return;
+   }
+
+   /* find best encoding for alpha0 > alpha1 */
+   /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
+   alphablockerror1 = 0x0;
+   alphablockerror2 = 0xffffffff;
+   alphablockerror3 = 0xffffffff;
+   if (alphaabsmin) alphause[0] = 0;
+   else alphause[0] = alphabase[0];
+   if (alphaabsmax) alphause[1] = 255;
+   else alphause[1] = alphabase[1];
+   /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
+   for (aindex = 0; aindex < 7; aindex++) {
+      /* don't forget here is always rounded down */
+      acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
+   }
+
+   for (j = 0; j < numypixels; j++) {
+      for (i = 0; i < numxpixels; i++) {
+         /* maybe it's overkill to have the most complicated calculation just for the error
+            calculation which we only need to figure out if encoding1 or encoding2 is better... */
+         if (srccolors[j][i] > acutValues[0]) {
+            alphaenc1[4*j + i] = 0;
+            alphadist = srccolors[j][i] - alphause[1];
+         }
+         else if (srccolors[j][i] > acutValues[1]) {
+            alphaenc1[4*j + i] = 2;
+            alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[2]) {
+            alphaenc1[4*j + i] = 3;
+            alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[3]) {
+            alphaenc1[4*j + i] = 4;
+            alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[4]) {
+            alphaenc1[4*j + i] = 5;
+            alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[5]) {
+            alphaenc1[4*j + i] = 6;
+            alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7;
+         }
+         else if (srccolors[j][i] > acutValues[6]) {
+            alphaenc1[4*j + i] = 7;
+            alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7;
+         }
+         else {
+            alphaenc1[4*j + i] = 1;
+            alphadist = srccolors[j][i] - alphause[0];
+         }
+         alphablockerror1 += alphadist * alphadist;
+      }
+   }
+#if RGTC_DEBUG
+   for (i = 0; i < 16; i++) {
+      fprintf(stderr, "%d ", alphaenc1[i]);
+   }
+   fprintf(stderr, "cutVals ");
+   for (i = 0; i < 8; i++) {
+      fprintf(stderr, "%d ", acutValues[i]);
+   }
+   fprintf(stderr, "srcVals ");
+   for (j = 0; j < numypixels; j++)
+      for (i = 0; i < numxpixels; i++) {
+	 fprintf(stderr, "%d ", srccolors[j][i]);
+      }
+   
+   fprintf(stderr, "\n");
+#endif
+
+   /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
+      are false but try it anyway */
+   if (alphablockerror1 >= 32) {
+
+      /* don't bother if encoding is already very good, this condition should also imply
+      we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
+      alphablockerror2 = 0;
+      for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+         acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
+      }
+      for (j = 0; j < numypixels; j++) {
+         for (i = 0; i < numxpixels; i++) {
+             /* maybe it's overkill to have the most complicated calculation just for the error
+               calculation which we only need to figure out if encoding1 or encoding2 is better... */
+            if (srccolors[j][i] == 0) {
+               alphaenc2[4*j + i] = 6;
+               alphadist = 0;
+            }
+            else if (srccolors[j][i] == 255) {
+               alphaenc2[4*j + i] = 7;
+               alphadist = 0;
+            }
+            else if (srccolors[j][i] <= acutValues[0]) {
+               alphaenc2[4*j + i] = 0;
+               alphadist = srccolors[j][i] - alphabase[0];
+            }
+            else if (srccolors[j][i] <= acutValues[1]) {
+               alphaenc2[4*j + i] = 2;
+               alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[2]) {
+               alphaenc2[4*j + i] = 3;
+               alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[3]) {
+               alphaenc2[4*j + i] = 4;
+               alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
+            }
+            else if (srccolors[j][i] <= acutValues[4]) {
+               alphaenc2[4*j + i] = 5;
+               alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
+            }
+            else {
+               alphaenc2[4*j + i] = 1;
+               alphadist = srccolors[j][i] - alphabase[1];
+            }
+            alphablockerror2 += alphadist * alphadist;
+         }
+      }
+
+
+      /* skip this if the error is already very small
+         this encoding is MUCH better on average than #2 though, but expensive! */
+      if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
+         GLshort blockerrlin1 = 0;
+         GLshort blockerrlin2 = 0;
+         GLubyte nralphainrangelow = 0;
+         GLubyte nralphainrangehigh = 0;
+         alphatest[0] = 0xff;
+         alphatest[1] = 0x0;
+         /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+               if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (255 -(alphabase[1] - alphabase[0]) / 28)))
+                  alphatest[1] = srccolors[j][i];
+               if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28))
+                  alphatest[0] = srccolors[j][i];
+            }
+         }
+          /* shouldn't happen too often, don't really care about those degenerated cases */
+          if (alphatest[1] <= alphatest[0]) {
+             alphatest[0] = 1;
+             alphatest[1] = 254;
+         }
+         for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+         }
+
+         /* find the "average" difference between the alpha values and the next encoded value.
+            This is then used to calculate new base values.
+            Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
+            since they will see more improvement, and also because the values in the middle are somewhat
+            likely to get no improvement at all (because the base values might move in different directions)?
+            OTOH it would mean the values in the middle are even less likely to get an improvement
+         */
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+               if (srccolors[j][i] <= alphatest[0] / 2) {
+               }
+               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+               }
+               else if (srccolors[j][i] <= acutValues[0]) {
+                  blockerrlin1 += (srccolors[j][i] - alphatest[0]);
+                  nralphainrangelow += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[1]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[2]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[3]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+               }
+               else if (srccolors[j][i] <= acutValues[4]) {
+                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+                  nralphainrangelow += 1;
+                  nralphainrangehigh += 1;
+                  }
+               else {
+                  blockerrlin2 += (srccolors[j][i] - alphatest[1]);
+                  nralphainrangehigh += 1;
+               }
+            }
+         }
+         /* shouldn't happen often, needed to avoid div by zero */
+         if (nralphainrangelow == 0) nralphainrangelow = 1;
+         if (nralphainrangehigh == 0) nralphainrangehigh = 1;
+         alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
+#if RGTC_DEBUG
+	 fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
+         fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);
+#endif
+         /* again shouldn't really happen often... */
+         if (alphatest[0] < 0) {
+            alphatest[0] = 0;
+         }
+         alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
+         if (alphatest[1] > 255) {
+            alphatest[1] = 255;
+         }
+
+         alphablockerror3 = 0;
+         for (aindex = 0; aindex < 5; aindex++) {
+         /* don't forget here is always rounded down */
+            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+         }
+         for (j = 0; j < numypixels; j++) {
+            for (i = 0; i < numxpixels; i++) {
+                /* maybe it's overkill to have the most complicated calculation just for the error
+                  calculation which we only need to figure out if encoding1 or encoding2 is better... */
+               if (srccolors[j][i] <= alphatest[0] / 2) {
+                  alphaenc3[4*j + i] = 6;
+                  alphadist = srccolors[j][i];
+               }
+               else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+                  alphaenc3[4*j + i] = 7;
+                  alphadist = 255 - srccolors[j][i];
+               }
+               else if (srccolors[j][i] <= acutValues[0]) {
+                  alphaenc3[4*j + i] = 0;
+                  alphadist = srccolors[j][i] - alphatest[0];
+               }
+               else if (srccolors[j][i] <= acutValues[1]) {
+                 alphaenc3[4*j + i] = 2;
+                 alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[2]) {
+                  alphaenc3[4*j + i] = 3;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[3]) {
+                  alphaenc3[4*j + i] = 4;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
+               }
+               else if (srccolors[j][i] <= acutValues[4]) {
+                  alphaenc3[4*j + i] = 5;
+                  alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
+               }
+               else {
+                  alphaenc3[4*j + i] = 1;
+                  alphadist = srccolors[j][i] - alphatest[1];
+               }
+               alphablockerror3 += alphadist * alphadist;
+            }
+         }
+      }
+   }
+  /* write the alpha values and encoding back. */
+   if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
+#if RGTC_DEBUG
+      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);
+#endif
+      write_rgtc_encoded_channel_s( blkaddr, alphause[1], alphause[0], alphaenc1 );
+   }
+   else if (alphablockerror2 <= alphablockerror3) {
+#if RGTC_DEBUG
+      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);
+#endif
+      write_rgtc_encoded_channel_s( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
+   }
+   else {
+#if RGTC_DEBUG
+      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);
+#endif
+      write_rgtc_encoded_channel_s( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
+   }
+}
diff --git a/src/mesa/main/texcompress_rgtc.h b/src/mesa/main/texcompress_rgtc.h
new file mode 100644
index 00000000000..424edc4581c
--- /dev/null
+++ b/src/mesa/main/texcompress_rgtc.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TEXCOMPRESS_RGTC_H
+#define TEXCOMPRESS_RGTC_H
+
+#include "glheader.h"
+#include "mfeatures.h"
+#include "texstore.h"
+
+struct gl_texture_image;
+
+extern GLboolean
+_mesa_texstore_red_rgtc1(TEXSTORE_PARAMS);
+
+extern GLboolean
+_mesa_texstore_signed_red_rgtc1(TEXSTORE_PARAMS);
+
+extern GLboolean
+_mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS);
+
+extern GLboolean
+_mesa_texstore_signed_rg_rgtc2(TEXSTORE_PARAMS);
+
+extern void
+_mesa_fetch_texel_2d_f_red_rgtc1(const struct gl_texture_image *texImage,
+				 GLint i, GLint j, GLint k, GLfloat *texel);
+
+extern void
+_mesa_fetch_texel_2d_f_signed_red_rgtc1(const struct gl_texture_image *texImage,
+					GLint i, GLint j, GLint k, GLfloat *texel);
+
+extern void
+_mesa_fetch_texel_2d_f_rg_rgtc2(const struct gl_texture_image *texImage,
+				 GLint i, GLint j, GLint k, GLfloat *texel);
+
+extern void
+_mesa_fetch_texel_2d_f_signed_rg_rgtc2(const struct gl_texture_image *texImage,
+				       GLint i, GLint j, GLint k, GLfloat *texel);
+#endif
diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c
index 8aa1e4970d5..550597e1cdf 100644
--- a/src/mesa/main/texfetch.c
+++ b/src/mesa/main/texfetch.c
@@ -38,6 +38,7 @@
 #include "texcompress.h"
 #include "texcompress_fxt1.h"
 #include "texcompress_s3tc.h"
+#include "texcompress_rgtc.h"
 #include "texfetch.h"
 #include "teximage.h"
 
@@ -756,7 +757,35 @@ texfetch_funcs[MESA_FORMAT_COUNT] =
       fetch_texel_2d_rgba_16,
       fetch_texel_3d_rgba_16,
       store_texel_rgba_16
-   }
+   },
+   {
+      MESA_FORMAT_RED_RGTC1,
+      NULL,
+      _mesa_fetch_texel_2d_f_red_rgtc1,
+      NULL,
+      NULL
+   },
+   {
+      MESA_FORMAT_SIGNED_RED_RGTC1,
+      NULL,
+      _mesa_fetch_texel_2d_f_signed_red_rgtc1,
+      NULL,
+      NULL
+   },
+   {
+      MESA_FORMAT_RG_RGTC2,
+      NULL,
+      _mesa_fetch_texel_2d_f_rg_rgtc2,
+      NULL,
+      NULL
+   },
+   {
+      MESA_FORMAT_SIGNED_RG_RGTC2,
+      NULL,
+      _mesa_fetch_texel_2d_f_signed_rg_rgtc2,
+      NULL,
+      NULL
+   },
 };
 
 
diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
index 2542cea856b..72025cf828e 100644
--- a/src/mesa/main/texformat.c
+++ b/src/mesa/main/texformat.c
@@ -602,6 +602,25 @@ _mesa_choose_tex_format( struct gl_context *ctx, GLint internalFormat,
       }
    }
 
+   if (ctx->Extensions.ARB_texture_compression_rgtc) {
+      switch (internalFormat) {
+         case GL_COMPRESSED_RED_RGTC1:
+	    RETURN_IF_SUPPORTED(MESA_FORMAT_RED_RGTC1);
+	    break;
+         case GL_COMPRESSED_SIGNED_RED_RGTC1:
+	    RETURN_IF_SUPPORTED(MESA_FORMAT_SIGNED_RED_RGTC1);
+	    break;
+         case GL_COMPRESSED_RG_RGTC2:
+	    RETURN_IF_SUPPORTED(MESA_FORMAT_RG_RGTC2);
+	    break;
+         case GL_COMPRESSED_SIGNED_RG_RGTC2:
+	    RETURN_IF_SUPPORTED(MESA_FORMAT_SIGNED_RG_RGTC2);
+	    break;
+         default:
+            ; /* fallthrough */
+      }
+   }
+
    _mesa_problem(ctx, "unexpected format in _mesa_choose_tex_format()");
    return MESA_FORMAT_NONE;
 }
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 7dd4a1fa650..8a3e5f77979 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -65,6 +65,7 @@
 #include "pack.h"
 #include "texcompress.h"
 #include "texcompress_fxt1.h"
+#include "texcompress_rgtc.h"
 #include "texcompress_s3tc.h"
 #include "teximage.h"
 #include "texstore.h"
@@ -310,15 +311,15 @@ compute_component_mapping(GLenum inFormat, GLenum outFormat,
  * \param srcPacking  source image pixel packing
  * \return resulting image with format = textureBaseFormat and type = GLfloat.
  */
-static GLfloat *
-make_temp_float_image(struct gl_context *ctx, GLuint dims,
-                      GLenum logicalBaseFormat,
-                      GLenum textureBaseFormat,
-                      GLint srcWidth, GLint srcHeight, GLint srcDepth,
-                      GLenum srcFormat, GLenum srcType,
-                      const GLvoid *srcAddr,
-                      const struct gl_pixelstore_attrib *srcPacking,
-                      GLbitfield transferOps)
+GLfloat *
+_mesa_make_temp_float_image(struct gl_context *ctx, GLuint dims,
+			    GLenum logicalBaseFormat,
+			    GLenum textureBaseFormat,
+			    GLint srcWidth, GLint srcHeight, GLint srcDepth,
+			    GLenum srcFormat, GLenum srcType,
+			    const GLvoid *srcAddr,
+			    const struct gl_pixelstore_attrib *srcPacking,
+			    GLbitfield transferOps)
 {
    GLfloat *tempImage;
    const GLint components = _mesa_components_in_format(logicalBaseFormat);
@@ -2065,7 +2066,7 @@ _mesa_texstore_argb2101010(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2317,7 +2318,7 @@ _mesa_texstore_unorm1616(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2394,7 +2395,7 @@ _mesa_texstore_unorm16(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2452,7 +2453,7 @@ _mesa_texstore_rgba_16(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2519,7 +2520,7 @@ _mesa_texstore_signed_rgba_16(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2901,7 +2902,7 @@ _mesa_texstore_signed_r8(TEXSTORE_PARAMS)
    /* XXX look at adding optimized paths */
    {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2946,7 +2947,7 @@ _mesa_texstore_signed_rg88(TEXSTORE_PARAMS)
    /* XXX look at adding optimized paths */
    {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -2991,7 +2992,7 @@ _mesa_texstore_signed_rgbx8888(TEXSTORE_PARAMS)
 
    {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3104,7 +3105,7 @@ _mesa_texstore_signed_rgba8888(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3413,7 +3414,7 @@ _mesa_texstore_rgba_float32(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3483,7 +3484,7 @@ _mesa_texstore_rgba_float16(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3549,7 +3550,7 @@ _mesa_texstore_rgba_int8(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3614,7 +3615,7 @@ _mesa_texstore_rgba_int16(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -3679,7 +3680,7 @@ _mesa_texstore_rgba_int32(TEXSTORE_PARAMS)
    }
    else {
       /* general path */
-      const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
                                                  baseInternalFormat,
                                                  baseFormat,
                                                  srcWidth, srcHeight, srcDepth,
@@ -4128,7 +4129,12 @@ texstore_funcs[MESA_FORMAT_COUNT] =
    { MESA_FORMAT_SIGNED_RG_16, _mesa_texstore_signed_rgba_16 },
    { MESA_FORMAT_SIGNED_RGB_16, _mesa_texstore_signed_rgba_16 },
    { MESA_FORMAT_SIGNED_RGBA_16, _mesa_texstore_signed_rgba_16 },
-   { MESA_FORMAT_RGBA_16, _mesa_texstore_rgba_16 }
+   { MESA_FORMAT_RGBA_16, _mesa_texstore_rgba_16 },
+
+   { MESA_FORMAT_RED_RGTC1, _mesa_texstore_red_rgtc1 },
+   { MESA_FORMAT_SIGNED_RED_RGTC1, _mesa_texstore_signed_red_rgtc1 },
+   { MESA_FORMAT_RG_RGTC2, _mesa_texstore_rg_rgtc2 },
+   { MESA_FORMAT_SIGNED_RG_RGTC2, _mesa_texstore_signed_rg_rgtc2 }
 };
 
 
diff --git a/src/mesa/main/texstore.h b/src/mesa/main/texstore.h
index 177ede423f5..2f3c4e821fc 100644
--- a/src/mesa/main/texstore.h
+++ b/src/mesa/main/texstore.h
@@ -81,6 +81,15 @@ _mesa_make_temp_chan_image(struct gl_context *ctx, GLuint dims,
                            const GLvoid *srcAddr,
                            const struct gl_pixelstore_attrib *srcPacking);
 
+GLfloat *
+_mesa_make_temp_float_image(struct gl_context *ctx, GLuint dims,
+			    GLenum logicalBaseFormat,
+			    GLenum textureBaseFormat,
+			    GLint srcWidth, GLint srcHeight, GLint srcDepth,
+			    GLenum srcFormat, GLenum srcType,
+			    const GLvoid *srcAddr,
+			    const struct gl_pixelstore_attrib *srcPacking,
+			    GLbitfield transferOps);
 
 extern void
 _mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level,
diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index 9a78a23aa7e..bdf4126cf58 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -78,6 +78,7 @@ MAIN_SOURCES = \
 	main/stencil.c \
 	main/syncobj.c \
 	main/texcompress.c \
+	main/texcompress_rgtc.c \
 	main/texcompress_s3tc.c \
 	main/texcompress_fxt1.c \
 	main/texenv.c \
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 6530a06ade4..c99eafbadf3 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -579,6 +579,7 @@ st_validate_varrays(struct gl_context *ctx,
    if (is_interleaved_arrays(vp, vpv, arrays)) {
       setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements,
                                 max_index);
+
       num_vbuffers = 1;
       num_velements = vpv->num_inputs;
       if (num_velements == 0)
@@ -645,6 +646,7 @@ st_draw_vbo(struct gl_context *ctx,
       for (i = 0; i < nr_prims; i++) {
          min_index = MIN2(min_index, prims[i].start);
          max_index = MAX2(max_index, prims[i].start + prims[i].count - 1);
+         max_index = MAX2(max_index, prims[i].num_instances);
       }
    }
 
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 2f45f470334..d2098987d1d 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -416,6 +416,22 @@ void st_init_extensions(struct st_context *st)
       ctx->Extensions.S3_s3tc = GL_TRUE;
    }
 
+   if (screen->is_format_supported(screen, PIPE_FORMAT_RGTC1_UNORM,
+                                   PIPE_TEXTURE_2D, 0,
+                                   PIPE_BIND_SAMPLER_VIEW, 0) &&
+       screen->is_format_supported(screen, PIPE_FORMAT_RGTC1_SNORM,
+				   PIPE_TEXTURE_2D, 0,
+                                   PIPE_BIND_SAMPLER_VIEW, 0) &&
+       screen->is_format_supported(screen, PIPE_FORMAT_RGTC2_UNORM,
+                                   PIPE_TEXTURE_2D, 0,
+                                   PIPE_BIND_SAMPLER_VIEW, 0) &&
+       screen->is_format_supported(screen, PIPE_FORMAT_RGTC2_SNORM,
+                                   PIPE_TEXTURE_2D, 0,
+                                   PIPE_BIND_SAMPLER_VIEW, 0)
+       ) {
+     ctx->Extensions.ARB_texture_compression_rgtc = GL_TRUE;
+   }
+
    /* ycbcr support */
    if (screen->is_format_supported(screen, PIPE_FORMAT_UYVY, 
                                    PIPE_TEXTURE_2D, 0,
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 577ee6189bd..c58ec9267dc 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -241,6 +241,14 @@ st_mesa_format_to_pipe_format(gl_format mesaFormat)
    case MESA_FORMAT_RGBA_UINT32:
       return PIPE_FORMAT_R32G32B32A32_USCALED;
 
+   case MESA_FORMAT_RED_RGTC1:
+      return PIPE_FORMAT_RGTC1_UNORM;
+   case MESA_FORMAT_SIGNED_RED_RGTC1:
+      return PIPE_FORMAT_RGTC1_SNORM;
+   case MESA_FORMAT_RG_RGTC2:
+      return PIPE_FORMAT_RGTC2_UNORM;
+   case MESA_FORMAT_SIGNED_RG_RGTC2:
+      return PIPE_FORMAT_RGTC2_SNORM;
    default:
       assert(0);
       return PIPE_FORMAT_NONE;
@@ -380,6 +388,15 @@ st_pipe_format_to_mesa_format(enum pipe_format format)
    case PIPE_FORMAT_R32G32B32A32_USCALED:
       return MESA_FORMAT_RGBA_UINT32;
 
+   case PIPE_FORMAT_RGTC1_UNORM:
+      return MESA_FORMAT_RED_RGTC1;
+   case PIPE_FORMAT_RGTC1_SNORM:
+      return MESA_FORMAT_SIGNED_RED_RGTC1;
+   case PIPE_FORMAT_RGTC2_UNORM:
+      return MESA_FORMAT_RG_RGTC2;
+   case PIPE_FORMAT_RGTC2_SNORM:
+      return MESA_FORMAT_SIGNED_RG_RGTC2;
+
    default:
       assert(0);
       return MESA_FORMAT_NONE;
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 5c68fd78c30..c07739f9d53 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -224,9 +224,9 @@ src_register( struct st_translate *t,
 
    case PROGRAM_TEMPORARY:
       assert(index >= 0);
+      assert(index < Elements(t->temps));
       if (ureg_dst_is_undef(t->temps[index]))
          t->temps[index] = ureg_DECL_temporary( t->ureg );
-      assert(index < Elements(t->temps));
       return ureg_src(t->temps[index]);
 
    case PROGRAM_NAMED_PARAM: