29 files changed, 674 insertions, 695 deletions
diff --git a/progs/tests/fbotest2.c b/progs/tests/fbotest2.c
index c3117b0f767..18f28972b6e 100644
--- a/progs/tests/fbotest2.c
+++ b/progs/tests/fbotest2.c
@@ -13,6 +13,7 @@
 #include <math.h>
 #include <GL/glut.h>
 
+static int Win = 0;
 static int Width = 400, Height = 400;
 static GLuint MyFB, ColorRb, DepthRb;
 static GLboolean Animate = GL_TRUE;
@@ -110,6 +111,7 @@ CleanUp(void)
    assert(!glIsFramebufferEXT(MyFB));
    assert(!glIsRenderbufferEXT(ColorRb));
    assert(!glIsRenderbufferEXT(DepthRb));
+   glutDestroyWindow(Win);
    exit(0);
 }
 
@@ -187,7 +189,7 @@ main( int argc, char *argv[] )
    glutInitWindowPosition( 0, 0 );
    glutInitWindowSize(Width, Height);
    glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
-   glutCreateWindow(argv[0]);
+   Win = glutCreateWindow(argv[0]);
    glutReshapeFunc( Reshape );
    glutKeyboardFunc( Key );
    glutDisplayFunc( Display );
diff --git a/src/mesa/drivers/dri/i915tex/intel_fbo.c b/src/mesa/drivers/dri/i915tex/intel_fbo.c
index 8d430553822..9b84faaeb7d 100644
--- a/src/mesa/drivers/dri/i915tex/intel_fbo.c
+++ b/src/mesa/drivers/dri/i915tex/intel_fbo.c
@@ -80,21 +80,33 @@ intel_flip_renderbuffers(struct intel_framebuffer *intel_fb)
 {
    int current_page = intel_fb->pf_current_page;
    int next_page = (current_page + 1) % intel_fb->pf_num_pages;
+   struct gl_renderbuffer *tmp_rb;
 
+   /* Exchange renderbuffers if necessary but make sure their reference counts
+    * are preserved.
+    */
    if (intel_fb->color_rb[current_page] &&
        intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer !=
        &intel_fb->color_rb[current_page]->Base) {
-      _mesa_remove_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT);
-      _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT,
-			     &intel_fb->color_rb[current_page]->Base);
+      tmp_rb = NULL;
+      _mesa_reference_renderbuffer(&tmp_rb,
+	 intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+      tmp_rb = &intel_fb->color_rb[current_page]->Base;
+      _mesa_reference_renderbuffer(
+	 &intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer, tmp_rb);
+      _mesa_reference_renderbuffer(&tmp_rb, NULL);
    }
 
    if (intel_fb->color_rb[next_page] &&
        intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer !=
        &intel_fb->color_rb[next_page]->Base) {
-      _mesa_remove_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT);
-      _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT,
-			     &intel_fb->color_rb[next_page]->Base);
+      tmp_rb = NULL;
+      _mesa_reference_renderbuffer(&tmp_rb,
+	 intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+      tmp_rb = &intel_fb->color_rb[next_page]->Base;
+      _mesa_reference_renderbuffer(
+	 &intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer, tmp_rb);
+      _mesa_reference_renderbuffer(&tmp_rb, NULL);
    }
 }
 
diff --git a/src/mesa/drivers/dri/i915tex/intel_screen.c b/src/mesa/drivers/dri/i915tex/intel_screen.c
index a6342046b5f..9034ee1b223 100644
--- a/src/mesa/drivers/dri/i915tex/intel_screen.c
+++ b/src/mesa/drivers/dri/i915tex/intel_screen.c
@@ -613,6 +613,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
 				&intel_fb->color_rb[1]->Base);
 
 	 if (screen->third.handle) {
+	    struct gl_renderbuffer *tmp_rb = NULL;
+
 	    intel_fb->color_rb[2]
 	       = intel_create_renderbuffer(rgbFormat,
 					   screen->width, screen->height,
@@ -621,6 +623,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
 					   screen->cpp,
 					   screen->third.map);
 	    intel_set_span_functions(&intel_fb->color_rb[2]->Base);
+	    _mesa_reference_renderbuffer(&tmp_rb, &intel_fb->color_rb[2]->Base);
 	 }
       }
 
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 1ae065b10dd..5d98176dced 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -117,7 +117,7 @@ static void brwProgramStringNotify( GLcontext *ctx,
       if (p == fp)
 	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
       p->id = brw->program_id++;      
-      p->param_state = brw_parameter_list_state_flags(p->program.Base.Parameters); 
+      p->param_state = p->program.Base.Parameters->StateFlags;
    }
    else if (target == GL_VERTEX_PROGRAM_ARB) {
       struct brw_context *brw = brw_context(ctx);
@@ -126,7 +126,7 @@ static void brwProgramStringNotify( GLcontext *ctx,
       if (p == vp)
 	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
       p->id = brw->program_id++;      
-      p->param_state = brw_parameter_list_state_flags(p->program.Base.Parameters); 
+      p->param_state = p->program.Base.Parameters->StateFlags;
 
       /* Also tell tnl about it:
        */
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
index 79ff2b2d4db..64f5904ac68 100644
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -131,7 +131,7 @@ static void recalculate_urb_fence( struct brw_context *brw )
 
 	 brw->urb.constrained = 1;
 	 
-	 if (check_urb_layout(brw)) {
+	 if (!check_urb_layout(brw)) {
 	    /* This is impossible, given the maximal sizes of urb
 	     * entries and the values for minimum nr of entries
 	     * provided above.
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 47ddcd0f05e..ff97d87dc45 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -397,7 +397,7 @@ static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
 
    /* Recalculate state dependency: 
     */
-   c->fp->param_state = brw_parameter_list_state_flags( paramList );
+   c->fp->param_state = paramList->StateFlags;
 
    return src_reg(PROGRAM_STATE_VAR, idx);
 }
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_screen.c b/src/mesa/drivers/dri/nouveau/nouveau_screen.c
index 65bde99671d..7a4b9f1cd00 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_screen.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_screen.c
@@ -328,7 +328,7 @@ void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIsc
 	static const __DRIversion ddx_expected = { 1, 2, 0 };
 	static const __DRIversion dri_expected = { 4, 0, 0 };
 	static const __DRIversion drm_expected = { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL };
-#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 5
+#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 6
 #error nouveau_drm.h version doesn't match expected version
 #endif
 	dri_interface = interface;
diff --git a/src/mesa/drivers/dri/nouveau/nv10_swtcl.c b/src/mesa/drivers/dri/nouveau/nv10_swtcl.c
index 32da40661b4..3bc84d862d3 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_swtcl.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_swtcl.c
@@ -315,6 +315,9 @@ static inline void nv10_render_line(GLcontext *ctx,GLuint v1,GLuint v2)
 	GLuint vertsize = nmesa->vertex_size;
 	GLuint size_dword = vertsize*(2)/4;
 
+	/* OUT_RINGp wants size in DWORDS */
+	vertsize >>= 2;
+
 	nv10ExtendPrimitive(nmesa, size_dword);
 	nv10StartPrimitive(nmesa,GL_LINES+1,size_dword);
 	OUT_RINGp((nouveauVertex*)(vertptr+(v1*vertsize)),vertsize);
@@ -329,6 +332,9 @@ static inline void nv10_render_triangle(GLcontext *ctx,GLuint v1,GLuint v2,GLuin
 	GLuint vertsize = nmesa->vertex_size;
 	GLuint size_dword = vertsize*(3)/4;
 
+	/* OUT_RINGp wants size in DWORDS */
+	vertsize >>= 2;
+
 	nv10ExtendPrimitive(nmesa, size_dword);
 	nv10StartPrimitive(nmesa,GL_TRIANGLES+1,size_dword);
 	OUT_RINGp((nouveauVertex*)(vertptr+(v1*vertsize)),vertsize);
@@ -344,6 +350,9 @@ static inline void nv10_render_quad(GLcontext *ctx,GLuint v1,GLuint v2,GLuint v3
 	GLuint vertsize = nmesa->vertex_size;
 	GLuint size_dword = vertsize*(4)/4;
 
+	/* OUT_RINGp wants size in DWORDS */
+	vertsize >>= 2;
+
 	nv10ExtendPrimitive(nmesa, size_dword);
 	nv10StartPrimitive(nmesa,GL_QUADS+1,size_dword);
 	OUT_RINGp((nouveauVertex*)(vertptr+(v1*vertsize)),vertsize);
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state.c b/src/mesa/drivers/dri/nouveau/nv20_state.c
index 030713c0dbb..3d8d83a865a 100644
--- a/src/mesa/drivers/dri/nouveau/nv20_state.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state.c
@@ -78,7 +78,18 @@ static void nv20BlendFuncSeparate(GLcontext *ctx, GLenum sfactorRGB, GLenum dfac
 
 static void nv20Clear(GLcontext *ctx, GLbitfield mask)
 {
-	/* TODO */
+	nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+	GLuint hw_bufs = 0;
+
+	if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT))
+		hw_bufs |= 0xf0;
+	if (mask & (BUFFER_BIT_DEPTH))
+		hw_bufs |= 0x03;
+
+	if (hw_bufs) {
+		BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_CLEAR_WHICH_BUFFERS, 1);
+		OUT_RING_CACHE(hw_bufs);
+	}
 }
 
 static void nv20ClearColor(GLcontext *ctx, const GLfloat color[4])
@@ -557,9 +568,11 @@ static void nv20Scissor(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
 	   y += nmesa->drawY;
 	}
 
-        BEGIN_RING_CACHE(NvSub3D, NV20_TCL_PRIMITIVE_3D_SCISSOR_X2_X1, 2);
-        OUT_RING_CACHE(((x+w-1) << 16) | x);
-        OUT_RING_CACHE(((y+h-1) << 16) | y);
+        BEGIN_RING_CACHE(NvSub3D, NV20_TCL_PRIMITIVE_3D_SCISSOR_X2_X1, 1);
+        OUT_RING_CACHE((w << 16) | x );
+        BEGIN_RING_CACHE(NvSub3D, NV20_TCL_PRIMITIVE_3D_SCISSOR_Y2_Y1, 1);
+        OUT_RING_CACHE((h << 16) | y );
+
 }
 
 /** Select flat or smooth shading */
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 8aaf50b6d81..1f8d95078fb 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -133,7 +133,6 @@ const struct dri_extension stencil_two_side[] = {
 
 extern struct tnl_pipeline_stage _r300_render_stage;
 extern const struct tnl_pipeline_stage _r300_tcl_stage;
-extern const struct tnl_pipeline_stage _r300_texrect_stage;
 
 static const struct tnl_pipeline_stage *r300_pipeline[] = {
 
@@ -164,8 +163,6 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = {
 
 	/* Else do them here.
 	 */
-	/* scale texture rectangle to 0..1. */
-	&_r300_texrect_stage,
 	&_r300_render_stage,
 	&_tnl_render_stage,	/* FALLBACK  */
 	0,
@@ -204,7 +201,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 						     "def_max_anisotropy");
 
 	//r300->texmicrotile = GL_TRUE;
-	
+
 	/* Init default driver functions then plug in our R300-specific functions
 	 * (the texture functions are especially important)
 	 */
@@ -213,7 +210,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 	r300InitStateFuncs(&functions);
 	r300InitTextureFuncs(&functions);
 	r300InitShaderFuncs(&functions);
-	
+
 #ifdef USER_BUFFERS
 	radeon_mm_init(r300);
 #endif
@@ -221,7 +218,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 	if (hw_tcl_on) {
 		r300_init_vbo_funcs(&functions);
 	}
-#endif	
+#endif
 	if (!radeonInitContext(&r300->radeon, &functions,
 			       glVisual, driContextPriv, sharedContextPrivate)) {
 		FREE(r300);
@@ -264,8 +261,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 	 * texturable memory at once.
 	 */
 
-	ctx = r300->radeon.glCtx; 
-	
+	ctx = r300->radeon.glCtx;
+
 	ctx->Const.MaxTextureImageUnits = driQueryOptioni(&r300->radeon.optionCache,
 						     "texture_image_units");
 	ctx->Const.MaxTextureCoordUnits = driQueryOptioni(&r300->radeon.optionCache,
@@ -283,7 +280,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 	ctx->Const.MinLineWidthAA = 1.0;
 	ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
 	ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
-	
+
 #ifdef USER_BUFFERS
 	/* Needs further modifications */
 #if 0
@@ -337,10 +334,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 	ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
 
 	driInitExtensions(ctx, card_extensions, GL_TRUE);
-	
+
 	if (driQueryOptionb(&r300->radeon.optionCache, "disable_stencil_two_side") == 0)
 		driInitSingleExtension(ctx, stencil_two_side);
-	
+
 	if (r300->radeon.glCtx->Mesa_DXTn && !driQueryOptionb (&r300->radeon.optionCache, "disable_s3tc")) {
 	  _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
 	  _mesa_enable_extension( ctx, "GL_S3_s3tc" );
@@ -354,7 +351,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 	radeonInitSpanFuncs(ctx);
 	r300InitCmdBuf(r300);
 	r300InitState(r300);
-	
+
 #ifdef RADEON_VTXFMT_A
 	radeon_init_vtxfmt_a(r300);
 #endif
@@ -407,9 +404,9 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
 	/* Cannot flush/lock if no context exists. */
 	if (in_use)
 		r300FlushCmdBuf(r300, __FUNCTION__);
-	
+
 	done_age = radeonGetAge((radeonContextPtr)r300);
-	
+
 	for (i = r300->rmm->u_last; i > 0; i--) {
 		if (r300->rmm->u_list[i].ptr == NULL) {
 			continue;
@@ -421,7 +418,7 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
 		}
 
 		assert(r300->rmm->u_list[i].h_pending == 0);
-		
+
 		tries = 0;
 		while(r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
 			usleep(10);
@@ -430,10 +427,10 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
 		if (tries >= 1000) {
 			WARN_ONCE("Failed to idle region!");
 		}
-		
+
 		memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
 			(char *)r300->radeon.radeonScreen->gartTextures.map;
-		
+
 		ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
 				DRM_RADEON_FREE, &memfree, sizeof(memfree));
 		if (ret) {
@@ -442,7 +439,7 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
 		} else {
 			if (i == r300->rmm->u_last)
 				r300->rmm->u_last--;
-			
+
 			r300->rmm->u_list[i].pending = 0;
 			r300->rmm->u_list[i].ptr = NULL;
 			if (r300->rmm->u_list[i].fb) {
@@ -490,12 +487,12 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
 		_tnl_DestroyContext(r300->radeon.glCtx);
 		_vbo_DestroyContext(r300->radeon.glCtx);
 		_swrast_DestroyContext(r300->radeon.glCtx);
-		
+
 		if (r300->dma.current.buf) {
 			r300ReleaseDmaRegion(r300, &r300->dma.current, __FUNCTION__ );
 #ifndef USER_BUFFERS
 			r300FlushCmdBuf(r300, __FUNCTION__);
-#endif  
+#endif
 		}
 		r300FreeGartAllocations(r300);
 		r300DestroyCmdBuf(r300);
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index fe261dbbc6e..c8b81d98497 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -553,6 +553,7 @@ struct r300_stencilbuffer_state {
 #define VSF_MAX_FRAGMENT_TEMPS (14)
 
 #define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
 
 struct r300_vertex_shader_fragment {
 	int length;
@@ -602,12 +603,6 @@ extern int hw_tcl_on;
 /* Should but doesnt work */
 //#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp)
 
-//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && (OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit)))) :
-//	(r300->state.render_inputs & (_TNL_BIT_TEX0<<(unit))))
-//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit))) :
-//	ctx->Texture.Unit[unit]._ReallyEnabled && r300->state.render_inputs & (_TNL_BIT_TEX0<<(unit)))
-
-#define TMU_ENABLED(ctx, unit) (ctx->Texture.Unit[unit]._ReallyEnabled)
 
 /* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday.
  * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly.
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index c407dfb5b0b..77f95605e4d 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -930,13 +930,47 @@ static void emit_tex(struct r300_fragment_program *rp,
 	COMPILE_STATE;
 	GLuint coord = t_src(rp, fpi->SrcReg[0]);
 	GLuint dest = undef, rdest = undef;
-	GLuint din = cs->dest_in_node, uin = cs->used_in_node;
+	GLuint din, uin;
 	int unit = fpi->TexSrcUnit;
 	int hwsrc, hwdest;
+	GLuint tempreg = 0;
+
+	uin = cs->used_in_node;
+	din = cs->dest_in_node;
 
 	/* Resolve source/dest to hardware registers */
-	hwsrc = t_hw_src(rp, coord, GL_TRUE);
 	if (opcode != R300_FPITX_OP_KIL) {
+		if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) {
+			/**
+			 * Hardware uses [0..1]x[0..1] range for rectangle textures
+			 * instead of [0..Width]x[0..Height].
+			 * Add a scaling instruction.
+			 *
+			 * \todo Refactor this once we have proper rewriting/optimization
+			 * support for programs.
+			 */
+			GLint tokens[6] = { STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, 0, 0 };
+			int factor_index;
+			GLuint factorreg;
+
+			tokens[2] = unit;
+			factor_index = _mesa_add_state_reference(rp->mesa_program.Base.Parameters, tokens);
+			factorreg = emit_const4fv(rp,
+					rp->mesa_program.Base.Parameters->ParameterValues[factor_index]);
+			tempreg = keep(get_temp_reg(rp));
+
+			emit_arith(rp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
+			           coord, factorreg, pfs_zero, 0);
+
+			/* Ensure correct node indirection */
+			uin = cs->used_in_node;
+			din = cs->dest_in_node;
+
+			hwsrc = t_hw_src(rp, tempreg, GL_TRUE);
+		} else {
+			hwsrc = t_hw_src(rp, coord, GL_TRUE);
+		}
+
 		dest = t_dst(rp, fpi->DstReg);
 
 		/* r300 doesn't seem to be able to do TEX->output reg */
@@ -957,8 +991,10 @@ static void emit_tex(struct r300_fragment_program *rp,
 	} else {
 		hwdest = 0;
 		unit = 0;
+		hwsrc = t_hw_src(rp, coord, GL_TRUE);
 	}
 
+
 	/* Indirection if source has been written in this node, or if the
 	 * dest has been read/written in this node
 	 */
@@ -1010,6 +1046,10 @@ static void emit_tex(struct r300_fragment_program *rp,
 			   pfs_one, pfs_zero, 0);
 		free_temp(rp, dest);
 	}
+
+	/* Free temp register */
+	if (tempreg != 0)
+		free_temp(rp, tempreg);
 }
 
 
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index 1f4a2d2e647..1f65f9a7e7e 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -325,7 +325,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  * Most likely this is used to ignore rest of the program in cases
  * where group of verts arent visible. For some reason this "section"
  * is sometimes accepted other instruction that have no relationship with
- *position calculations. 
+ *position calculations.
  */
 #define R300_VAP_PVS_CNTL_1                 0x22D0
 #       define R300_PVS_CNTL_1_PROGRAM_START_SHIFT   0
@@ -566,8 +566,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define R300_RE_FOG_SCALE                     0x4294
 #define R300_RE_FOG_START                     0x4298
 
-/* Not sure why there are duplicate of factor and constant values. 
- * My best guess so far is that there are seperate zbiases for test and write. 
+/* Not sure why there are duplicate of factor and constant values.
+ * My best guess so far is that there are seperate zbiases for test and write.
  * Ordering might be wrong.
  * Some of the tests indicate that fgl has a fallback implementation of zbias
  * via pixel shaders.
@@ -909,7 +909,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 /* 32 bit chroma key */
 #define R300_TX_CHROMA_KEY_0                      0x4580
 /* ff00ff00 == { 0, 1.0, 0, 1.0 } */
-#define R300_TX_BORDER_COLOR_0              0x45C0 
+#define R300_TX_BORDER_COLOR_0              0x45C0
 
 /* END: Texture specification */
 
@@ -999,6 +999,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #		define R300_FPITX_OP_KIL	2
 #		define R300_FPITX_OP_TXP	3
 #		define R300_FPITX_OP_TXB	4
+#	define R300_FPITX_OPCODE_MASK           (7 << 15)
 
 /* ALU
  * The ALU instructions register blocks are enumerated according to the order
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
index 0864558e8dc..63b21b9379a 100644
--- a/src/mesa/drivers/dri/r300/r300_render.c
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -202,23 +202,23 @@ static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_
 	unsigned long t_addr;
 	unsigned long magic_1, magic_2;
 	GLcontext *ctx;
-	ctx = rmesa->radeon.glCtx; 
-	
+	ctx = rmesa->radeon.glCtx;
+
 	assert(elt_size == 2 || elt_size == 4);
-	
+
 	if(addr & (elt_size-1)){
 		WARN_ONCE("Badly aligned buffer\n");
 		return ;
 	}
 #ifdef OPTIMIZE_ELTS
 	addr_a = 0;
-	
+
 	magic_1 = (addr % 32) / 4;
 	t_addr = addr & (~0x1d);
 	magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1;
-	
+
 	check_space(6);
-	
+
 	start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
 	if(elt_size == 4){
 		e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
@@ -234,7 +234,7 @@ static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_
 		e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2);
 		e32(t_addr);
 	}
-	
+
 	if(elt_size == 4){
 		e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */
 	} else {
@@ -249,11 +249,11 @@ static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_
 #endif
 #else
 	(void)magic_2, (void)magic_1, (void)t_addr;
-	
+
 	addr_a = 0;
-	
+
 	check_space(6);
-	
+
 	start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
 	if(elt_size == 4){
 		e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
@@ -264,14 +264,14 @@ static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_
 	start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
 	e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
 	e32(addr /*& 0xffffffe3*/);
-	
+
 	if(elt_size == 4){
 		e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */
 	} else {
 		e32((vertex_count+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */
 	}
 	//cp_delay(rmesa, 1);
-#endif	
+#endif
 }
 
 static void r300_render_vb_primitive(r300ContextPtr rmesa,
@@ -303,12 +303,12 @@ static void r300_render_vb_primitive(r300ContextPtr rmesa,
 		//e32(rmesa->state.Elts[start]);
 		return;
 	}
-	
+
 	if(num_verts > 65535){ /* not implemented yet */
 		WARN_ONCE("Too many elts\n");
 		return;
 	}
-	
+
 	r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, rmesa->state.VB.elt_size);
 	fire_EB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size);
 #endif
@@ -328,7 +328,7 @@ GLboolean r300_run_vb_render(GLcontext *ctx,
 	int cmd_written = 0;
 	drm_radeon_cmd_header_t *cmd = NULL;
 
-   
+
 	if (RADEON_DEBUG & DEBUG_PRIMS)
 		fprintf(stderr, "%s\n", __FUNCTION__);
 
@@ -336,26 +336,26 @@ GLboolean r300_run_vb_render(GLcontext *ctx,
  		TNLcontext *tnl = TNL_CONTEXT(ctx);
 		radeon_vb_to_rvb(rmesa, VB, &tnl->vb);
 	}
-	
+
 	r300UpdateShaders(rmesa);
 	if (r300EmitArrays(ctx))
 		return GL_TRUE;
 
 	r300UpdateShaderStates(rmesa);
-	
+
 	reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
 	e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
 
 	reg_start(R300_RB3D_ZCACHE_CTLSTAT,0);
 	e32(R300_RB3D_ZCACHE_UNKNOWN_03);
-	
+
 	r300EmitState(rmesa);
-	
+
 	for(i=0; i < VB->PrimitiveCount; i++){
 		GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
 		GLuint start = VB->Primitive[i].start;
 		GLuint length = VB->Primitive[i].count;
-		
+
 		r300_render_vb_primitive(rmesa, ctx, start, start + length, prim);
 	}
 
@@ -453,11 +453,6 @@ int r300Fallback(GLcontext *ctx)
 		/* GL_POINT_SPRITE_NV */
 		FALLBACK_IF(ctx->Point.PointSprite);
 
-	/* Fallback for rectangular texture */
-	for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
-		if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT)
-			return R300_FALLBACK_TCL;
-
 	return R300_FALLBACK_NONE;
 }
 
@@ -494,19 +489,19 @@ static GLboolean r300_run_tcl_render(GLcontext *ctx,
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	struct r300_vertex_program *vp;
-   
+
    	hw_tcl_on=future_hw_tcl_on;
-   
+
 	if (RADEON_DEBUG & DEBUG_PRIMS)
 		fprintf(stderr, "%s\n", __FUNCTION__);
 	if(hw_tcl_on == GL_FALSE)
 		return GL_TRUE;
-	
+
 	if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
 		hw_tcl_on = GL_FALSE;
 		return GL_TRUE;
 	}
-	
+
 	r300UpdateShaders(rmesa);
 
 	vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
@@ -520,13 +515,13 @@ static GLboolean r300_run_tcl_render(GLcontext *ctx,
 	TNLcontext *tnl = TNL_CONTEXT(ctx);
 	struct tnl_cache *cache;
 	struct tnl_cache_item *c;
-	
+
 	cache = tnl->vp_cache;
 	c = cache->items[0xc000cc0e % cache->size];
-	
+
 	if(c && c->data == vp)
 		vp->native = GL_FALSE;
-	
+
 #endif
 #if 0
 	vp->native = GL_FALSE;
@@ -536,7 +531,7 @@ static GLboolean r300_run_tcl_render(GLcontext *ctx,
 		return GL_TRUE;
 	}
 	//r300UpdateShaderStates(rmesa);
-	
+
 	return r300_run_vb_render(ctx, stage);
 }
 
@@ -549,107 +544,3 @@ const struct tnl_pipeline_stage _r300_tcl_stage = {
 	r300_run_tcl_render	/* run */
 };
 
-/* R300 texture rectangle expects coords in 0..1 range, not 0..dimension
- * as in the extension spec.  Need to translate here.
- *
- * Note that swrast expects 0..dimension, so if a fallback is active,
- * don't do anything.  (Maybe need to configure swrast to match hw)
- */
-struct texrect_stage_data {
-   GLvector4f texcoord[MAX_TEXTURE_UNITS];
-};
-
-#define TEXRECT_STAGE_DATA(stage) ((struct texrect_stage_data *)stage->privatePtr)
-
-
-static GLboolean run_texrect_stage( GLcontext *ctx,
-				    struct tnl_pipeline_stage *stage )
-{
-   struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
-   r300ContextPtr rmesa = R300_CONTEXT(ctx);
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
-   struct vertex_buffer *VB = &tnl->vb;
-   GLuint i;
-
-   if (rmesa->radeon.Fallback)
-      return GL_TRUE;
-
-   for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) {
-      if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT) {
-	 struct gl_texture_object *texObj = ctx->Texture.Unit[i].CurrentRect;
-	 struct gl_texture_image *texImage = texObj->Image[0][texObj->BaseLevel];
-	 const GLfloat iw = 1.0/texImage->Width;
-	 const GLfloat ih = 1.0/texImage->Height;
-	 GLfloat *in = (GLfloat *)VB->TexCoordPtr[i]->data;
-	 GLint instride = VB->TexCoordPtr[i]->stride;
-	 GLfloat (*out)[4] = store->texcoord[i].data;
-	 GLint j;
-
-	 store->texcoord[i].size = VB->TexCoordPtr[i]->size;
-	 for (j = 0 ; j < VB->Count ; j++) {
-	    switch (VB->TexCoordPtr[i]->size) {
-	    case 4:
-	       out[j][3] = in[3];
-	    /* fallthrough */
-	    case 3:
-	       out[j][2] = in[2];
-	    /* fallthrough */
-	    default:
-	       out[j][0] = in[0] * iw;
-	       out[j][1] = in[1] * ih;
-	    }
-	    in = (GLfloat *)((GLubyte *)in + instride);
-	 }
-
-	 VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i] = &store->texcoord[i];
-      }
-   }
-
-   return GL_TRUE;
-}
-
-
-/* Called the first time stage->run() is invoked.
- */
-static GLboolean alloc_texrect_data( GLcontext *ctx,
-				     struct tnl_pipeline_stage *stage )
-{
-   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
-   struct texrect_stage_data *store;
-   GLuint i;
-
-   stage->privatePtr = CALLOC(sizeof(*store));
-   store = TEXRECT_STAGE_DATA(stage);
-   if (!store)
-      return GL_FALSE;
-
-   for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
-      _mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 );
-
-   return GL_TRUE;
-}
-
-static void free_texrect_data( struct tnl_pipeline_stage *stage )
-{
-   struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
-   GLuint i;
-
-   if (store) {
-      for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
-	 if (store->texcoord[i].data)
-	    _mesa_vector4f_free( &store->texcoord[i] );
-      FREE( store );
-      stage->privatePtr = NULL;
-   }
-}
-
-const struct tnl_pipeline_stage _r300_texrect_stage =
-{
-   "r300 texrect stage",			/* name */
-   NULL,
-   alloc_texrect_data,
-   free_texrect_data,
-   NULL,
-   run_texrect_stage
-};
-
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index fff11653de1..ff3c51c5edf 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -359,7 +359,7 @@ static void update_alpha(GLcontext *ctx)
 	GLboolean really_enabled = ctx->Color.AlphaEnabled;
 
 	CLAMPED_FLOAT_TO_UBYTE(refByte, ctx->Color.AlphaRef);
-	
+
 	switch (ctx->Color.AlphaFunc) {
 	case GL_NEVER:
 		pp_misc |= R300_ALPHA_TEST_FAIL;
@@ -387,15 +387,15 @@ static void update_alpha(GLcontext *ctx)
 		really_enabled = GL_FALSE;
 		break;
 	}
-	
+
 	if (really_enabled) {
 		pp_misc |= R300_ALPHA_TEST_ENABLE;
 		pp_misc |= (refByte & R300_REF_ALPHA_MASK);
 	} else {
 		pp_misc = 0x0;
 	}
-	
-	
+
+
 	R300_STATECHANGE(r300, at);
 	r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc;
 	update_early_z(ctx);
@@ -438,19 +438,19 @@ static void update_depth(GLcontext* ctx)
 	R300_STATECHANGE(r300, zs);
 	r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE;
 	r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT);
-	
+
 	if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) {
 		if (ctx->Depth.Mask)
 			r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST_AND_WRITE;
 		else
 			r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST;
-		
+
 		r300->hw.zs.cmd[R300_ZS_CNTL_1] |= translate_func(ctx->Depth.Func) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
 	} else {
 		r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_DISABLED_1;
 		r300->hw.zs.cmd[R300_ZS_CNTL_1] |= translate_func(GL_NEVER) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
 	}
-	
+
 	update_early_z(ctx);
 }
 
@@ -481,7 +481,7 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
 		if (state) {
 			r300->hw.fogs.cmd[R300_FOGS_STATE] |=
 			    R300_FOG_ENABLE;
-			
+
 			ctx->Driver.Fogfv( ctx, GL_FOG_MODE, NULL );
 			ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
 			ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
@@ -491,7 +491,7 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
 			r300->hw.fogs.cmd[R300_FOGS_STATE] &=
 			    ~R300_FOG_ENABLE;
 		}
-		
+
 		break;
 
 	case GL_ALPHA_TEST:
@@ -520,7 +520,7 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
 		} else {
 #if R200_MERGED
 			FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state);
-#endif			
+#endif
 		}
 		break;
 
@@ -555,7 +555,7 @@ static void r300UpdatePolygonMode(GLcontext *ctx)
 	if (ctx->Polygon.FrontMode != GL_FILL ||
 	    ctx->Polygon.BackMode != GL_FILL) {
 		GLenum f, b;
-		
+
 		if (ctx->Polygon.FrontFace == GL_CCW) {
 			f = ctx->Polygon.FrontMode;
 			b = ctx->Polygon.BackMode;
@@ -673,9 +673,9 @@ static void r300Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
 {
 	r300ContextPtr r300 = R300_CONTEXT(ctx);
 	union { int i; float f; } fogScale, fogStart;
-	
+
 	(void) param;
-	
+
 	fogScale.i = r300->hw.fogp.cmd[R300_FOGP_SCALE];
 	fogStart.i = r300->hw.fogp.cmd[R300_FOGP_START];
 
@@ -771,7 +771,7 @@ static void r300PointSize(GLcontext * ctx, GLfloat size)
 	size = ctx->Point._Size;
 
 	R300_STATECHANGE(r300, ps);
-	r300->hw.ps.cmd[R300_PS_POINTSIZE] = 
+	r300->hw.ps.cmd[R300_PS_POINTSIZE] =
 		((int)(size * 6) << R300_POINTSIZE_X_SHIFT) |
 		((int)(size * 6) << R300_POINTSIZE_Y_SHIFT);
 }
@@ -794,7 +794,7 @@ static void r300PolygonMode(GLcontext *ctx, GLenum face, GLenum mode)
 {
 	(void)face;
 	(void)mode;
-	
+
 	r300UpdatePolygonMode(ctx);
 }
 
@@ -831,7 +831,7 @@ static int translate_stencil_op(int op)
 static void r300ShadeModel(GLcontext * ctx, GLenum mode)
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-	
+
 	R300_STATECHANGE(rmesa, shade);
 	switch (mode) {
 	case GL_FLAT:
@@ -851,7 +851,7 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
 			  ((ctx->Stencil.ValueMask[0] & 0xff) << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
-			  
+
 	GLuint flag;
 
 	R300_STATECHANGE(rmesa, zs);
@@ -859,16 +859,16 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
 	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(
 		(R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT)
 		| (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT));
-	
+
 	rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=  ~((R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
 						(R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
-	
+
 	flag = translate_func(ctx->Stencil.Function[0]);
 	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT);
-	
+
 	if (ctx->Stencil._TestTwoSide)
 		flag = translate_func(ctx->Stencil.Function[1]);
-	
+
 	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
 	rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask;
 }
@@ -890,16 +890,16 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail,
 
 	R300_STATECHANGE(rmesa, zs);
 		/* It is easier to mask what's left.. */
-	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= 
-	    (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT) | 
-	    (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | 
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &=
+	    (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT) |
+	    (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) |
 	    (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
 
 	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
 		 (translate_stencil_op(ctx->Stencil.FailFunc[0]) << R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT)
 		|(translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT)
 		|(translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT);
-	
+
 	if (ctx->Stencil._TestTwoSide) {
 		rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
 			 (translate_stencil_op(ctx->Stencil.FailFunc[1]) << R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
@@ -994,7 +994,7 @@ void r300UpdateViewportOffset( GLcontext *ctx )
 	R300_STATECHANGE( rmesa, vpt );
 	rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx);
 	rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty);
-      
+
 	}
 
 	radeonUpdateScissor( ctx );
@@ -1030,16 +1030,16 @@ r300UpdateDrawBuffer(GLcontext *ctx)
 
 
 	R300_STATECHANGE( rmesa, cb );
-	
+
 	r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset + //r300->radeon.state.color.drawOffset +
 		r300->radeon.radeonScreen->fbLocation;
 	r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch;//r300->radeon.state.color.drawPitch;
-	
+
 	if (r300->radeon.radeonScreen->cpp == 4)
 		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
 	else
 		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
-	
+
 	if (r300->radeon.sarea->tiling_enabled)
 		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
 #if 0
@@ -1050,7 +1050,7 @@ r300UpdateDrawBuffer(GLcontext *ctx)
 		= ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
 		& R200_COLOROFFSET_MASK);
 	rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
-	
+
 	if (rmesa->sarea->tiling_enabled) {
 		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
 	}
@@ -1062,23 +1062,41 @@ r300FetchStateParameter(GLcontext *ctx,
                         const gl_state_index state[STATE_LENGTH],
                         GLfloat *value)
 {
-    r300ContextPtr r300 = R300_CONTEXT(ctx);
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
 
-    switch(state[0])
-    {
-    case STATE_INTERNAL:
-    	switch(state[1])
-	{
-	case STATE_R300_WINDOW_DIMENSION:
-	    value[0] = r300->radeon.dri.drawable->w*0.5f;/* width*0.5 */
-    	    value[1] = r300->radeon.dri.drawable->h*0.5f;/* height*0.5 */
-	    value[2] = 0.5F; 				/* for moving range [-1 1] -> [0 1] */
-    	    value[3] = 1.0F; 				/* not used */
-	    break;
-	default:;
+	switch(state[0]) {
+	case STATE_INTERNAL:
+		switch(state[1]) {
+		case STATE_R300_WINDOW_DIMENSION:
+			value[0] = r300->radeon.dri.drawable->w*0.5f;/* width*0.5 */
+			value[1] = r300->radeon.dri.drawable->h*0.5f;/* height*0.5 */
+			value[2] = 0.5F; 				/* for moving range [-1 1] -> [0 1] */
+			value[3] = 1.0F; 				/* not used */
+			break;
+
+		case STATE_R300_TEXRECT_FACTOR: {
+			struct gl_texture_object* t = ctx->Texture.Unit[state[2]].CurrentRect;
+
+			if (t && t->Image[0][t->BaseLevel]) {
+				struct gl_texture_image* image = t->Image[0][t->BaseLevel];
+				value[0] = 1.0 / image->Width2;
+				value[1] = 1.0 / image->Height2;
+			} else {
+				value[0] = 1.0;
+				value[1] = 1.0;
+			}
+			value[2] = 1.0;
+			value[3] = 1.0;
+			break; }
+
+		default:
+			break;
+		}
+		break;
+
+	default:
+		break;
 	}
-    default:;
-    }
 }
 
 /**
@@ -1119,7 +1137,7 @@ static void r300PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units)
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	GLfloat constant = units;
-	
+
 	switch (ctx->Visual.depthBits) {
 	case 16:
 		constant *= 4.0;
@@ -1214,7 +1232,7 @@ void r300_setup_textures(GLcontext *ctx)
 	r300ContextPtr r300 = R300_CONTEXT(ctx);
 	int hw_tmu=0;
 	int last_hw_tmu=-1; /* -1 translates into no setup costs for fields */
-	int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1 };
+	int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, };
 	struct r300_fragment_program *rp =
 		(struct r300_fragment_program *)
 		(char *)ctx->FragmentProgram._Current;
@@ -1228,7 +1246,7 @@ void r300_setup_textures(GLcontext *ctx)
 	R300_STATECHANGE(r300, tex.offset);
 	R300_STATECHANGE(r300, tex.chroma_key);
 	R300_STATECHANGE(r300, tex.border_color);
-	
+
 	r300->hw.txe.cmd[R300_TXE_ENABLE]=0x0;
 
 	mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
@@ -1243,24 +1261,24 @@ void r300_setup_textures(GLcontext *ctx)
 
 	/* We cannot let disabled tmu offsets pass DRM */
 	for(i=0; i < mtu; i++) {
-		if(TMU_ENABLED(ctx, i)) {
-			
+		if (ctx->Texture.Unit[i]._ReallyEnabled) {
+
 #if 0 /* Enables old behaviour */
 			hw_tmu = i;
 #endif
 			tmu_mappings[i] = hw_tmu;
-			
+
 			t=r300->state.texture.unit[i].texobj;
-			
+
 			if((t->format & 0xffffff00)==0xffffff00) {
 				WARN_ONCE("unknown texture format (entry %x) encountered. Help me !\n", t->format & 0xff);
 			}
-			
+
 			if (RADEON_DEBUG & DEBUG_STATE)
 				fprintf(stderr, "Activating texture unit %d\n", i);
-			
+
 			r300->hw.txe.cmd[R300_TXE_ENABLE] |= (1 << hw_tmu);
-			
+
 			r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + hw_tmu] = gen_fixed_filter(t->filter) | (hw_tmu << 28);
 			/* Currently disabled! */
 			r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80;
@@ -1268,24 +1286,24 @@ void r300_setup_textures(GLcontext *ctx)
 			r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->format;
 			r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->pitch_reg;
 			r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->offset;
-			
+
 			if(t->offset & R300_TXO_MACRO_TILE) {
 				WARN_ONCE("macro tiling enabled!\n");
 			}
-			
+
 			if(t->offset & R300_TXO_MICRO_TILE) {
 				WARN_ONCE("micro tiling enabled!\n");
 			}
-			
+
 			r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0;
 			r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->pp_border_color;
-			
+
 			last_hw_tmu = hw_tmu;
-			
+
 			hw_tmu++;
 		}
 	}
-	
+
 	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER_0, last_hw_tmu + 1);
 	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
 	r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1);
@@ -1294,31 +1312,41 @@ void r300_setup_textures(GLcontext *ctx)
 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1);
 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
 	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
-	
-	
+
+
 	if (!rp)	/* should only happenen once, just after context is created */
 		return;
-	
+
 	R300_STATECHANGE(r300, fpt);
-	
+
 	for(i = 0; i < rp->tex.length; i++){
 		int unit;
+		int opcode;
 		unsigned long val;
-		
+
 		unit = rp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT;
 		unit &= 15;
-		
+
 		val = rp->tex.inst[i];
 		val &= ~R300_FPITX_IMAGE_MASK;
-		
-		assert(tmu_mappings[unit] >= 0);
-		
-		val |= tmu_mappings[unit] << R300_FPITX_IMAGE_SHIFT;
-		r300->hw.fpt.cmd[R300_FPT_INSTR_0+i] = val;
+
+		opcode = (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT;
+		if (opcode == R300_FPITX_OP_KIL) {
+			r300->hw.fpt.cmd[R300_FPT_INSTR_0+i] = val;
+		} else {
+			if (tmu_mappings[unit] >= 0) {
+				val |= tmu_mappings[unit] << R300_FPITX_IMAGE_SHIFT;
+				r300->hw.fpt.cmd[R300_FPT_INSTR_0+i] = val;
+			} else {
+				// We get here when the corresponding texture image is incomplete
+				// (e.g. incomplete mipmaps etc.)
+				r300->hw.fpt.cmd[R300_FPT_INSTR_0+i] = val;
+			}
+		}
 	}
-	
+
 	r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, rp->tex.length);
-	
+
 	if (RADEON_DEBUG & DEBUG_STATE)
 		fprintf(stderr, "TX_ENABLE: %08x  last_hw_tmu=%d\n", r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu);
 }
@@ -1367,11 +1395,11 @@ void r300_setup_rs_unit(GLcontext *ctx)
 	R300_STATECHANGE(r300, ri);
 	R300_STATECHANGE(r300, rc);
 	R300_STATECHANGE(r300, rr);
-	
+
 	fp_reg = in_texcoords = col_interp_nr = high_rr = 0;
 
 	r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0;
-	
+
 	if (InputsRead & FRAG_BIT_WPOS){
 		for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
 			if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
@@ -1385,7 +1413,7 @@ void r300_setup_rs_unit(GLcontext *ctx)
 		InputsRead |= (FRAG_BIT_TEX0 << i);
 		InputsRead &= ~FRAG_BIT_WPOS;
 	}
-	
+
 	for (i=0;i<ctx->Const.MaxTextureUnits;i++) {
 		r300->hw.ri.cmd[R300_RI_INTERP_0+i] = 0
 				| R300_RS_INTERP_USED
@@ -1409,7 +1437,7 @@ void r300_setup_rs_unit(GLcontext *ctx)
 			}
 			InputsRead &= ~(FRAG_BIT_TEX0<<i);
 			fp_reg++;
-		} 
+		}
 		/* Need to count all coords enabled at vof */
 		if (R300_OUTPUTS_WRITTEN_TEST( OutputsWritten, VERT_RESULT_TEX0+i, _TNL_ATTRIB_TEX(i) ))
 			in_texcoords++;
@@ -1430,7 +1458,7 @@ void r300_setup_rs_unit(GLcontext *ctx)
 		col_interp_nr++;
 	}
 	out:
-	
+
 	if (InputsRead & FRAG_BIT_COL1) {
 		if (!R300_OUTPUTS_WRITTEN_TEST( OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1 )) {
 			WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
@@ -1444,7 +1472,7 @@ void r300_setup_rs_unit(GLcontext *ctx)
 		if (high_rr < 1) high_rr = 1;
 		col_interp_nr++;
 	}
-	
+
 	/* Need at least one. This might still lock as the values are undefined... */
 	if (in_texcoords == 0 && col_interp_nr == 0) {
 		r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
@@ -1452,7 +1480,7 @@ void r300_setup_rs_unit(GLcontext *ctx)
 				| (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT);
 		col_interp_nr++;
 	}
-	
+
 	r300->hw.rc.cmd[1] = 0
 			| (in_texcoords << R300_RS_CNTL_TC_CNT_SHIFT)
 			| (col_interp_nr << R300_RS_CNTL_CI_CNT_SHIFT)
@@ -1517,8 +1545,8 @@ void r300SetupVertexProgram(r300ContextPtr rmesa);
 /* just a skeleton for now.. */
 
 /* Generate a vertex shader that simply transforms vertex and texture coordinates,
-   while leaving colors intact. Nothing fancy (like lights) 
-   
+   while leaving colors intact. Nothing fancy (like lights)
+
    If implementing lights make a copy first, so it is easy to switch between the two versions */
 static void r300GenerateSimpleVertexShader(r300ContextPtr r300)
 {
@@ -1529,14 +1557,14 @@ static void r300GenerateSimpleVertexShader(r300ContextPtr r300)
 	r300->state.vap_param.transform_offset=0x0;  /* transform matrix */
 	r300->state.vertex_shader.param_offset=0x0;
 	r300->state.vertex_shader.param_count=0x4;  /* 4 vector values - 4x4 matrix */
-	
+
 	r300->state.vertex_shader.program_start=0x0;
 	r300->state.vertex_shader.unknown_ptr1=0x4; /* magic value ? */
 	r300->state.vertex_shader.program_end=0x0;
-	
+
 	r300->state.vertex_shader.unknown_ptr2=0x0; /* magic value */
 	r300->state.vertex_shader.unknown_ptr3=0x4; /* magic value */
-	
+
 	/* Initialize matrix and vector parameters.. these should really be restructured */
 	/* TODO: fix vertex_shader structure */
 	r300->state.vertex_shader.matrix[0].length=16;
@@ -1546,7 +1574,7 @@ static void r300GenerateSimpleVertexShader(r300ContextPtr r300)
 	r300->state.vertex_shader.vector[1].length=0;
 	r300->state.vertex_shader.unknown1.length=0;
 	r300->state.vertex_shader.unknown2.length=0;
-	
+
 #define WRITE_OP(oper,source1,source2,source3)	{\
 	r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].op=(oper); \
 	r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src1=(source1); \
@@ -1556,35 +1584,35 @@ static void r300GenerateSimpleVertexShader(r300ContextPtr r300)
 	}
 
 	/* Multiply vertex coordinates with transform matrix */
-			
+
 	WRITE_OP(
 		EASY_VSF_OP(MUL, 0, ALL, TMP),
 		VSF_PARAM(3),
 		VSF_ATTR_W(0),
 		EASY_VSF_SOURCE(0, W, W, W, W, NONE, NONE)
 		)
-	
+
 	WRITE_OP(
 		EASY_VSF_OP(MUL, 1, ALL, RESULT),
 		VSF_REG(1),
 		VSF_ATTR_UNITY(1),
 		VSF_UNITY(1)
 		)
-	
+
 	WRITE_OP(
 		EASY_VSF_OP(MAD, 0, ALL, TMP),
 		VSF_PARAM(2),
 		VSF_ATTR_Z(0),
 		VSF_TMP(0)
 		)
-	
+
 	WRITE_OP(
 		EASY_VSF_OP(MAD, 0, ALL, TMP),
 		VSF_PARAM(1),
 		VSF_ATTR_Y(0),
 		VSF_TMP(0)
 		)
-	
+
 	WRITE_OP(
 		EASY_VSF_OP(MAD, 0, ALL, RESULT),
 		VSF_PARAM(0),
@@ -1592,7 +1620,7 @@ static void r300GenerateSimpleVertexShader(r300ContextPtr r300)
 		VSF_TMP(0)
 		)
 	o_reg += 2;
-	
+
 	for (i = VERT_ATTRIB_COLOR1; i < VERT_ATTRIB_MAX; i++)
 		if (r300->state.sw_tcl_inputs[i] != -1) {
 			WRITE_OP(
@@ -1601,16 +1629,16 @@ static void r300GenerateSimpleVertexShader(r300ContextPtr r300)
 				VSF_ATTR_UNITY(r300->state.sw_tcl_inputs[i]),
 				VSF_UNITY(r300->state.sw_tcl_inputs[i])
 				)
-		
+
 		}
-	
+
 	r300->state.vertex_shader.program_end--; /* r300 wants program length to be one more - no idea why */
 	r300->state.vertex_shader.program.length=(r300->state.vertex_shader.program_end+1)*4;
-	
+
 	r300->state.vertex_shader.unknown_ptr1=r300->state.vertex_shader.program_end; /* magic value ? */
 	r300->state.vertex_shader.unknown_ptr2=r300->state.vertex_shader.program_end; /* magic value ? */
 	r300->state.vertex_shader.unknown_ptr3=r300->state.vertex_shader.program_end; /* magic value ? */
-	
+
 }
 
 
@@ -1688,14 +1716,14 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
 	int inst_count;
 	int param_count;
 	struct r300_vertex_program *prog=(struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
-			
+
 
 	((drm_r300_cmd_header_t*)rmesa->hw.vpp.cmd)->vpu.count = 0;
 	R300_STATECHANGE(rmesa, vpp);
 	param_count = r300VertexProgUpdateParams(ctx, (struct r300_vertex_program_cont *)ctx->VertexProgram._Current/*prog*/, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
 	bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
 	param_count /= 4;
-	
+
 	/* Reset state, in case we don't use something */
 	((drm_r300_cmd_header_t*)rmesa->hw.vpi.cmd)->vpu.count = 0;
 	((drm_r300_cmd_header_t*)rmesa->hw.vps.cmd)->vpu.count = 0;
@@ -1734,23 +1762,23 @@ void r300UpdateShaders(r300ContextPtr rmesa)
 	GLcontext *ctx;
 	struct r300_vertex_program *vp;
 	int i;
-	
+
 	ctx = rmesa->radeon.glCtx;
-	
+
 	if (rmesa->NewGLState && hw_tcl_on) {
 		rmesa->NewGLState = 0;
-		
+
 		for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
 			rmesa->temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i];
 			TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &rmesa->dummy_attrib[i];
 		}
-		
+
 		_tnl_UpdateFixedFunctionProgram(ctx);
-	
+
 		for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
 			TNL_CONTEXT(ctx)->vb.AttribPtr[i] = rmesa->temp_attrib[i];
 		}
-		
+
 		r300_select_vertex_shader(rmesa);
 		vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
 		/*if (vp->translated == GL_FALSE)
@@ -1764,26 +1792,26 @@ void r300UpdateShaders(r300ContextPtr rmesa)
 		}
 		r300UpdateStateParameters(ctx, _NEW_PROGRAM);
 	}
-	
+
 }
 
 void r300UpdateShaderStates(r300ContextPtr rmesa)
 {
 	GLcontext *ctx;
 	ctx = rmesa->radeon.glCtx;
-	
+
 	r300UpdateTextureState(ctx);
 
 	r300SetupPixelShader(rmesa);
 	r300_setup_textures(ctx);
-	
+
 	r300SetupVertexShader(rmesa);
 	r300_setup_rs_unit(ctx);
 }
 
 /* This is probably wrong for some values, I need to test this
  * some more.  Range checking would be a good idea also..
- * 
+ *
  * But it works for most things.  I'll fix it later if someone
  * else with a better clue doesn't
  */
@@ -1821,13 +1849,13 @@ void r300SetupPixelShader(r300ContextPtr rmesa)
 
 	if (!rp)	/* should only happenen once, just after context is created */
 		return;
-	
+
 	r300_translate_fragment_shader(rmesa, rp);
 	if (!rp->translated) {
 		fprintf(stderr, "%s: No valid fragment shader, exiting\n", __func__);
 		return;
 	}
-	
+
 #define OUTPUT_FIELD(st, reg, field)  \
 		R300_STATECHANGE(rmesa, st); \
 		for(i=0;i<=rp->alu_end;i++) \
@@ -1884,7 +1912,7 @@ void r300SetupPixelShader(r300ContextPtr rmesa)
 static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
 {
 	r300ContextPtr r300 = R300_CONTEXT(ctx);
-	
+
 	_swrast_InvalidateState(ctx, new_state);
 	_swsetup_InvalidateState(ctx, new_state);
 	_vbo_InvalidateState(ctx, new_state);
@@ -1945,7 +1973,7 @@ void r300ResetHwState(r300ContextPtr r300)
 	r300Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
 	r300DepthMask(ctx, ctx->Depth.Mask);
 	r300DepthFunc(ctx, ctx->Depth.Func);
-	
+
 	/* stencil */
 	r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
 	r300StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]);
@@ -1957,7 +1985,7 @@ void r300ResetHwState(r300ContextPtr r300)
 	r300UpdateTextureState(ctx);
 
 //	r300_setup_routing(ctx, GL_TRUE);
-	
+
 #if 0 /* Done in prior to rendering */
 	if(hw_tcl_on == GL_FALSE){
 		r300EmitArrays(ctx, GL_TRUE); /* Just do the routing */
@@ -1973,7 +2001,7 @@ void r300ResetHwState(r300ContextPtr r300)
 
 	r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
 	r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
-		
+
 		/* Initialize magic registers
 		 TODO : learn what they really do, or get rid of
 		 those we don't have to touch */
@@ -2038,7 +2066,7 @@ void r300ResetHwState(r300ContextPtr r300)
 
 	r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666;
 	r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666;
-	if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) || 
+	if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) ||
 	     (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350))
 		r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = R300_GB_TILE_ENABLE
 							| R300_GB_TILE_PIPE_COUNT_R300
@@ -2097,7 +2125,7 @@ void r300ResetHwState(r300ContextPtr r300)
 
 	r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, ctx->Polygon.OffsetUnits);
 	r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill);
-	
+
 	r300->hw.unk42C0.cmd[1] = 0x4B7FFFFF;
 	r300->hw.unk42C0.cmd[2] = 0x00000000;
 
@@ -2153,20 +2181,20 @@ void r300ResetHwState(r300ContextPtr r300)
 	r300BlendColor(ctx, ctx->Color.BlendColor);
 	r300->hw.blend_color.cmd[2] = 0;
 	r300->hw.blend_color.cmd[3] = 0;
-	
+
 	/* Again, r300ClearBuffer uses this */
 	r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset +
 		r300->radeon.radeonScreen->fbLocation;
 	r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
-	
+
 	if (r300->radeon.radeonScreen->cpp == 4)
 		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
 	else
 		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
-	
+
 	if (r300->radeon.sarea->tiling_enabled)
 		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
-	
+
 	r300->hw.unk4E50.cmd[1] = 0;
 	r300->hw.unk4E50.cmd[2] = 0;
 	r300->hw.unk4E50.cmd[3] = 0;
@@ -2193,11 +2221,11 @@ void r300ResetHwState(r300ContextPtr r300)
 		fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
 			ctx->Visual.depthBits);
 		exit(-1);
-			
+
 	}
 	/* z compress? */
 	//r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32;
-	
+
 	r300->hw.zstencil_format.cmd[3] = 0x00000003;
 	r300->hw.zstencil_format.cmd[4] = 0x00000000;
 
@@ -2205,15 +2233,15 @@ void r300ResetHwState(r300ContextPtr r300)
 		r300->radeon.radeonScreen->depthOffset +
 		r300->radeon.radeonScreen->fbLocation;
 	r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
-	
+
 	if (r300->radeon.sarea->tiling_enabled)	{
 		/* Turn off when clearing buffers ? */
 		r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTH_TILE_ENABLE;
-	
+
 		if (ctx->Visual.depthBits == 24)
 			r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTH_MICROTILE_ENABLE;
 	}
-	
+
 	r300->hw.unk4F28.cmd[1] = 0;
 
 	r300->hw.unk4F30.cmd[1] = 0;
@@ -2283,7 +2311,7 @@ void r300InitState(r300ContextPtr r300)
 					 ctx->Visual.depthBits == 24);
 
 	memset(&(r300->state.texture), 0, sizeof(r300->state.texture));
-	
+
 	r300ResetHwState(r300);
 }
 
@@ -2329,7 +2357,7 @@ void r300InitStateFuncs(struct dd_function_table* functions)
 
 	functions->PolygonOffset = r300PolygonOffset;
 	functions->PolygonMode = r300PolygonMode;
-	
+
    	functions->RenderMode = r300RenderMode;
 }
 
diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c
index f531b54d115..c527677cd0c 100644
--- a/src/mesa/drivers/dri/r300/r300_texmem.c
+++ b/src/mesa/drivers/dri/r300/r300_texmem.c
@@ -303,195 +303,190 @@ static void r300UploadRectSubImage(r300ContextPtr rmesa,
  * Upload the texture image associated with texture \a t at the specified
  * level at the address relative to \a start.
  */
-static void uploadSubImage( r300ContextPtr rmesa, r300TexObjPtr t, 
+static void uploadSubImage( r300ContextPtr rmesa, r300TexObjPtr t,
 			    GLint hwlevel,
 			    GLint x, GLint y, GLint width, GLint height,
 			    GLuint face )
 {
-   struct gl_texture_image *texImage = NULL;
-   GLuint offset;
-   GLint imageWidth, imageHeight;
-   GLint ret;
-   drm_radeon_texture_t tex;
-   drm_radeon_tex_image_t tmp;
-   const int level = hwlevel + t->base.firstLevel;
-
-   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
-      fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", 
-	       __FUNCTION__, (void *)t, (void *)t->base.tObj,
-	       level, width, height, face );
-   }
-
-   ASSERT(face < 6);
-
-   /* Ensure we have a valid texture to upload */
-   if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
-      _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
-      return;
-   }
-
-   texImage = t->base.tObj->Image[face][level];
-
-   if ( !texImage ) {
-      if ( RADEON_DEBUG & DEBUG_TEXTURE )
-	 fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
-      return;
-   }
-   if ( !texImage->Data ) {
-      if ( RADEON_DEBUG & DEBUG_TEXTURE )
-	 fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
-      return;
-   }
-
-
-   if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-      assert(level == 0);
-      assert(hwlevel == 0);
-      if ( RADEON_DEBUG & DEBUG_TEXTURE )
-	 fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
-      r300UploadRectSubImage( rmesa, t, texImage, x, y, width, height );
-      return;
-   }
-   else if (texImage->IsClientData) {
-      if ( RADEON_DEBUG & DEBUG_TEXTURE )
-	 fprintf( stderr, "%s: image data is in GART client storage\n",
-		  __FUNCTION__);
-      r300UploadGARTClientSubImage( rmesa, t, texImage, hwlevel,
-				   x, y, width, height );
-      return;
-   }
-   else if ( RADEON_DEBUG & DEBUG_TEXTURE )
-      fprintf( stderr, "%s: image data is in normal memory\n",
-	       __FUNCTION__);
-      
-
-   imageWidth = texImage->Width;
-   imageHeight = texImage->Height;
-
-   offset = t->bufAddr + t->base.totalSize / 6 * face;
-
-   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
-      GLint imageX = 0;
-      GLint imageY = 0;
-      GLint blitX = t->image[face][hwlevel].x;
-      GLint blitY = t->image[face][hwlevel].y;
-      GLint blitWidth = t->image[face][hwlevel].width;
-      GLint blitHeight = t->image[face][hwlevel].height;
-      fprintf( stderr, "   upload image: %d,%d at %d,%d\n",
-	       imageWidth, imageHeight, imageX, imageY );
-      fprintf( stderr, "   upload  blit: %d,%d at %d,%d\n",
-	       blitWidth, blitHeight, blitX, blitY );
-      fprintf( stderr, "       blit ofs: 0x%07x level: %d/%d\n",
-	       (GLuint)offset, hwlevel, level );
-   }
-
-   t->image[face][hwlevel].data = texImage->Data;
-
-   /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
-    * NOTE: we're always use a 1KB-wide blit and I8 texture format.
-    * We used to use 1, 2 and 4-byte texels and used to use the texture
-    * width to dictate the blit width - but that won't work for compressed
-    * textures. (Brian)
-    * NOTE: can't do that with texture tiling. (sroland)
-    */
-   tex.offset = offset;
-   tex.image = &tmp;
-   /* copy (x,y,width,height,data) */
-   memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) );
-   
-   if (texImage->TexFormat->TexelBytes > 4) {
-      const int log2TexelBytes = (3 + (texImage->TexFormat->TexelBytes >> 4));
-      tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
-      tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
-      tex.height = imageHeight;
-      tex.width = imageWidth << log2TexelBytes;
-      tex.offset += (tmp.x << log2TexelBytes) & ~1023;
-      tmp.x = tmp.x % (1024 >> log2TexelBytes);
-      tmp.width = tmp.width << log2TexelBytes;
-   }
-   else if (texImage->TexFormat->TexelBytes) {
-      /* use multi-byte upload scheme */
-      tex.height = imageHeight;
-      tex.width = imageWidth;
-      switch(texImage->TexFormat->TexelBytes) {
-      case 1:
-	tex.format = RADEON_TXFORMAT_I8;
-	break;
-      case 2:
-	tex.format = RADEON_TXFORMAT_AI88;
-	break;
-      case 4:
-	tex.format = RADEON_TXFORMAT_ARGB8888;
-	break;
-      }
-      tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
-      tex.offset += tmp.x & ~1023;
-      tmp.x = tmp.x % 1024;
-#if 1
-      if (t->tile_bits & R300_TXO_MICRO_TILE) {
-	 /* need something like "tiled coordinates" ? */
-	 tmp.y = tmp.x / (tex.pitch * 128) * 2;
-	 tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
-	 tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
-      }
-      else 
-#endif
-      {
-	 tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
-      }
+	struct gl_texture_image *texImage = NULL;
+	GLuint offset;
+	GLint imageWidth, imageHeight;
+	GLint ret;
+	drm_radeon_texture_t tex;
+	drm_radeon_tex_image_t tmp;
+	const int level = hwlevel + t->base.firstLevel;
+
+	if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+		fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
+			__FUNCTION__, (void *)t, (void *)t->base.tObj,
+			level, width, height, face );
+	}
+
+	ASSERT(face < 6);
+
+	/* Ensure we have a valid texture to upload */
+	if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
+		_mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+		return;
+	}
+
+	texImage = t->base.tObj->Image[face][level];
+
+	if ( !texImage ) {
+		if ( RADEON_DEBUG & DEBUG_TEXTURE )
+		fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
+		return;
+	}
+	if ( !texImage->Data ) {
+		if ( RADEON_DEBUG & DEBUG_TEXTURE )
+		fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
+		return;
+	}
+
+
+	if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+		assert(level == 0);
+		assert(hwlevel == 0);
+		if ( RADEON_DEBUG & DEBUG_TEXTURE )
+		fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
+		r300UploadRectSubImage( rmesa, t, texImage, x, y, width, height );
+		return;
+	} else if (texImage->IsClientData) {
+		if ( RADEON_DEBUG & DEBUG_TEXTURE )
+		fprintf( stderr, "%s: image data is in GART client storage\n",
+			__FUNCTION__);
+		r300UploadGARTClientSubImage( rmesa, t, texImage, hwlevel,
+					x, y, width, height );
+		return;
+	} else if ( RADEON_DEBUG & DEBUG_TEXTURE )
+		fprintf( stderr, "%s: image data is in normal memory\n",
+			__FUNCTION__);
+
+
+	imageWidth = texImage->Width;
+	imageHeight = texImage->Height;
+
+	offset = t->bufAddr + t->base.totalSize / 6 * face;
+
+	if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+		GLint imageX = 0;
+		GLint imageY = 0;
+		GLint blitX = t->image[face][hwlevel].x;
+		GLint blitY = t->image[face][hwlevel].y;
+		GLint blitWidth = t->image[face][hwlevel].width;
+		GLint blitHeight = t->image[face][hwlevel].height;
+		fprintf( stderr, "   upload image: %d,%d at %d,%d\n",
+			imageWidth, imageHeight, imageX, imageY );
+		fprintf( stderr, "   upload  blit: %d,%d at %d,%d\n",
+			blitWidth, blitHeight, blitX, blitY );
+		fprintf( stderr, "       blit ofs: 0x%07x level: %d/%d\n",
+			(GLuint)offset, hwlevel, level );
+	}
+
+	t->image[face][hwlevel].data = texImage->Data;
+
+	/* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
+	 * NOTE: we're always use a 1KB-wide blit and I8 texture format.
+	 * We used to use 1, 2 and 4-byte texels and used to use the texture
+	 * width to dictate the blit width - but that won't work for compressed
+	 * textures. (Brian)
+	 * NOTE: can't do that with texture tiling. (sroland)
+	 */
+	tex.offset = offset;
+	tex.image = &tmp;
+	/* copy (x,y,width,height,data) */
+	memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) );
+
+	if (texImage->TexFormat->TexelBytes > 4) {
+		const int log2TexelBytes = (3 + (texImage->TexFormat->TexelBytes >> 4));
+		tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+		tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
+		tex.height = imageHeight;
+		tex.width = imageWidth << log2TexelBytes;
+		tex.offset += (tmp.x << log2TexelBytes) & ~1023;
+		tmp.x = tmp.x % (1024 >> log2TexelBytes);
+		tmp.width = tmp.width << log2TexelBytes;
+	} else if (texImage->TexFormat->TexelBytes) {
+		/* use multi-byte upload scheme */
+		tex.height = imageHeight;
+		tex.width = imageWidth;
+		switch(texImage->TexFormat->TexelBytes) {
+		case 1:
+			tex.format = RADEON_TXFORMAT_I8;
+			break;
+		case 2:
+			tex.format = RADEON_TXFORMAT_AI88;
+			break;
+		case 4:
+			tex.format = RADEON_TXFORMAT_ARGB8888;
+			break;
+		}
+		tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
+		tex.offset += tmp.x & ~1023;
+		tmp.x = tmp.x % 1024;
+
+		if (t->tile_bits & R300_TXO_MICRO_TILE) {
+			/* need something like "tiled coordinates" ? */
+			tmp.y = tmp.x / (tex.pitch * 128) * 2;
+			tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
+			tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+		} else {
+			tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+		}
 #if 1
-      if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
-	 (texImage->Width * texImage->TexFormat->TexelBytes >= 256) &&
-	 ((!(t->tile_bits & R300_TXO_MICRO_TILE) && (texImage->Height >= 8)) ||
-	    (texImage->Height >= 16))) {
-	 /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
-	    OR if height is smaller than 8 automatically, but if micro tiling is active
-	    the limit is height 16 instead ? */
-	 tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
-      }
+		if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
+		    (texImage->Width * texImage->TexFormat->TexelBytes >= 256) &&
+		    ((!(t->tile_bits & R300_TXO_MICRO_TILE) && (texImage->Height >= 8)) ||
+		     (texImage->Height >= 16))) {
+			/* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
+			   OR if height is smaller than 8 automatically, but if micro tiling is active
+			   the limit is height 16 instead ? */
+			tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+		}
 #endif
-   }
-   else {
-      /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
-         needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
-      /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
-         so the kernel module reads the right amount of data. */
-      tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
-      tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
-      tex.height = (imageHeight + 3) / 4;
-      tex.width = (imageWidth + 3) / 4;
-      if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1)
-      {
-           tex.width *= 8;
-      } else {
-           tex.width *= 16;
-      }
-   }
-
-   LOCK_HARDWARE( &rmesa->radeon );
-   do {
-      ret = drmCommandWriteRead( rmesa->radeon.dri.fd, DRM_RADEON_TEXTURE,
-                                 &tex, sizeof(drm_radeon_texture_t) );
-      if (ret) {
-	 if (RADEON_DEBUG & DEBUG_IOCTL)
-	    fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
-	 usleep(1);
-      }
-   } while ( ret == -EAGAIN );
-
-   UNLOCK_HARDWARE( &rmesa->radeon );
-
-   if ( ret ) {
-      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
-      fprintf( stderr, "   offset=0x%08x\n",
-	       offset );
-      fprintf( stderr, "   image width=%d height=%d\n",
-	       imageWidth, imageHeight );
-      fprintf( stderr, "    blit width=%d height=%d data=%p\n",
-	       t->image[face][hwlevel].width, t->image[face][hwlevel].height,
-	       t->image[face][hwlevel].data );
-      exit( 1 );
-   }
+	} else {
+		/* In case of for instance 8x8 texture (2x2 dxt blocks),
+		   padding after the first two blocks is needed (only
+		   with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
+		/* set tex.height to 1/4 since 1 "macropixel" (dxt-block)
+		   has 4 real pixels. Needed so the kernel module reads
+		   the right amount of data. */
+		tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+		tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
+		tex.height = (imageHeight + 3) / 4;
+		tex.width = (imageWidth + 3) / 4;
+		if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1)
+		{
+			tex.width *= 8;
+		} else {
+			tex.width *= 16;
+		}
+	}
+
+	LOCK_HARDWARE( &rmesa->radeon );
+	do {
+		ret = drmCommandWriteRead( rmesa->radeon.dri.fd, DRM_RADEON_TEXTURE,
+		                           &tex, sizeof(drm_radeon_texture_t) );
+		if (ret) {
+		if (RADEON_DEBUG & DEBUG_IOCTL)
+		fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
+		usleep(1);
+		}
+	} while ( ret == -EAGAIN );
+
+	UNLOCK_HARDWARE( &rmesa->radeon );
+
+	if ( ret ) {
+		fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
+		fprintf( stderr, "   offset=0x%08x\n",
+				offset );
+		fprintf( stderr, "   image width=%d height=%d\n",
+				imageWidth, imageHeight );
+		fprintf( stderr, "    blit width=%d height=%d data=%p\n",
+				t->image[face][hwlevel].width, t->image[face][hwlevel].height,
+				t->image[face][hwlevel].data );
+		exit( 1 );
+	}
 }
 
 /**
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index 4bc0ea14f81..14b0c6063b7 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -218,7 +218,7 @@ static void r300SetTexImages(r300ContextPtr rmesa,
 		if (rmesa->texmicrotile  && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
 		   /* texrect might be able to use micro tiling too in theory? */
 		   (baseImage->Height > 1)) {
-			
+
 			/* allow 32 (bytes) x 1 mip (which will use two times the space
 			   the non-tiled version would use) max if base texture is large enough */
 			if ((numLevels == 1) ||
@@ -228,7 +228,7 @@ static void r300SetTexImages(r300ContextPtr rmesa,
 				t->tile_bits |= R300_TXO_MICRO_TILE;
 			}
 		}
-		
+
 		if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
 			/* we can set macro tiling even for small textures, they will be untiled anyway */
 			t->tile_bits |= R300_TXO_MACRO_TILE;
@@ -237,91 +237,85 @@ static void r300SetTexImages(r300ContextPtr rmesa,
 #endif
 
 	for (i = 0; i < numLevels; i++) {
-	  const struct gl_texture_image *texImage;
-	  GLuint size;
-	  
-	  texImage = tObj->Image[0][i + t->base.firstLevel];
-	  if (!texImage)
-	    break;
-	  
-	  /* find image size in bytes */
-	  if (texImage->IsCompressed) {
-	    if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
-	      // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
-	      if ((texImage->Width + 3) < 8) /* width one block */
-		size = texImage->CompressedSize * 4;
-	      else if ((texImage->Width + 3) < 16)
-		size = texImage->CompressedSize * 2;
-	      else size = texImage->CompressedSize;
-	    }
-	    else /* DXT3/5, 16 bytes per block */
-	    {
-	      WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n");
-	      // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
-	      if ((texImage->Width + 3) < 8)
-		size = texImage->CompressedSize * 2;
-	      else size = texImage->CompressedSize;
-	    }
-	    
-	  } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-	    size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
-	    blitWidth = 64 / texelBytes;
-	  } else if (t->tile_bits & R300_TXO_MICRO_TILE) {
-		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
-		   though the actual offset may be different (if texture is less than
-		   32 bytes width) to the untiled case */
-		int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
-		size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
-		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
-	  } else {
-	    int w = (texImage->Width * texelBytes + 31) & ~31;
-	    size = w * texImage->Height * texImage->Depth;
-	    blitWidth = MAX2(texImage->Width, 64 / texelBytes);
-	  }
-	  assert(size > 0);
-	  
-	  if(0)
-	    fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", texImage->Width, texImage->Height,
-		    texImage->Depth, texImage->TexFormat->TexelBytes,
-		    texImage->InternalFormat);
-	  
-	  /* Align to 32-byte offset.  It is faster to do this unconditionally
-	   * (no branch penalty).
-	   */
-	  
-	  curOffset = (curOffset + 0x1f) & ~0x1f;
-	  
-	  if (texelBytes) {
-	    t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
-	    t->image[0][i].y = 0;
-	    t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
-	    t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
-	  } else {
-	    t->image[0][i].x = curOffset % R300_BLIT_WIDTH_BYTES;
-	    t->image[0][i].y = curOffset / R300_BLIT_WIDTH_BYTES;
-	    t->image[0][i].width = MIN2(size, R300_BLIT_WIDTH_BYTES);
-	    t->image[0][i].height = size / t->image[0][i].width;
-	  }
-#if 0
-	  /* for debugging only and only  applicable to non-rectangle targets */
-	  assert(size % t->image[0][i].width == 0);
-	  assert(t->image[0][i].x == 0
-		 || (size < R300_BLIT_WIDTH_BYTES
-		     && t->image[0][i].height == 1));
-#endif
-	  
-	  if (0)
-	    fprintf(stderr,
-		    "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
-		    i, texImage->Width, texImage->Height,
-		    t->image[0][i].x, t->image[0][i].y,
-		    t->image[0][i].width, t->image[0][i].height,
-		    size, curOffset);
-	  
-	  curOffset += size;
-	  
+		const struct gl_texture_image *texImage;
+		GLuint size;
+
+		texImage = tObj->Image[0][i + t->base.firstLevel];
+		if (!texImage)
+			break;
+
+		/* find image size in bytes */
+		if (texImage->IsCompressed) {
+			if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
+				// fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
+				if ((texImage->Width + 3) < 8) /* width one block */
+					size = texImage->CompressedSize * 4;
+				else if ((texImage->Width + 3) < 16)
+					size = texImage->CompressedSize * 2;
+				else
+					size = texImage->CompressedSize;
+			} else {
+				/* DXT3/5, 16 bytes per block */
+				WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n");
+				// fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
+				if ((texImage->Width + 3) < 8)
+					size = texImage->CompressedSize * 2;
+				else
+					size = texImage->CompressedSize;
+			}
+		} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+			size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+			blitWidth = 64 / texelBytes;
+		} else if (t->tile_bits & R300_TXO_MICRO_TILE) {
+			/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+				though the actual offset may be different (if texture is less than
+				32 bytes width) to the untiled case */
+			int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+			size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+			blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+		} else {
+			int w = (texImage->Width * texelBytes + 31) & ~31;
+			size = w * texImage->Height * texImage->Depth;
+			blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+		}
+		assert(size > 0);
+
+		if(0)
+			fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
+					texImage->Width, texImage->Height,
+					texImage->Depth, texImage->TexFormat->TexelBytes,
+					texImage->InternalFormat);
+
+		/* Align to 32-byte offset.  It is faster to do this unconditionally
+		 * (no branch penalty).
+		 */
+
+		curOffset = (curOffset + 0x1f) & ~0x1f;
+
+		if (texelBytes) {
+			/* fix x and y coords up later together with offset */
+			t->image[0][i].x = curOffset;
+			t->image[0][i].y = 0;
+			t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+			t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+		} else {
+			t->image[0][i].x = curOffset % R300_BLIT_WIDTH_BYTES;
+			t->image[0][i].y = curOffset / R300_BLIT_WIDTH_BYTES;
+			t->image[0][i].width = MIN2(size, R300_BLIT_WIDTH_BYTES);
+			t->image[0][i].height = size / t->image[0][i].width;
+		}
+
+		if (0)
+			fprintf(stderr,
+					"level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+					i, texImage->Width, texImage->Height,
+					t->image[0][i].x, t->image[0][i].y,
+					t->image[0][i].width, t->image[0][i].height,
+					size, curOffset);
+
+		curOffset += size;
 	}
-	
+
 	/* Align the total size of texture memory block.
 	 */
 	t->base.totalSize =
@@ -361,7 +355,7 @@ static void r300SetTexImages(r300ContextPtr rmesa,
 	} else if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
 		ASSERT(log2Width == log2Height);
 		t->format |= R300_TX_FORMAT_CUBIC_MAP;
-		
+
 		t->format_x |= R200_TEXCOORD_CUBIC_ENV;
 		t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
 				     (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
@@ -377,7 +371,7 @@ static void r300SetTexImages(r300ContextPtr rmesa,
 		ASSERT(log2Width == log2Height);
 		t->format |= R300_TX_FORMAT_CUBIC_MAP;
 	}
-	
+
 	t->size = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
 			|((tObj->Image[0][t->base.firstLevel]->Height - 1) << R300_TX_HEIGHTMASK_SHIFT))
 			|((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c
index b513dc8d40a..24b19d8eb5a 100644
--- a/src/mesa/drivers/x11/xm_api.c
+++ b/src/mesa/drivers/x11/xm_api.c
@@ -363,7 +363,6 @@ static XMesaBuffer
 create_xmesa_buffer(XMesaDrawable d, BufferType type,
                     XMesaVisual vis, XMesaColormap cmap)
 {
-   GLboolean swAlpha;
    XMesaBuffer b;
 
    ASSERT(type == WINDOW || type == PIXMAP || type == PBUFFER);
@@ -421,10 +420,10 @@ create_xmesa_buffer(XMesaDrawable d, BufferType type,
       /* Visual has alpha, but pixel format doesn't support it.
        * We'll use an alpha renderbuffer wrapper.
        */
-      swAlpha = GL_TRUE;
+      b->swAlpha = GL_TRUE;
    }
    else {
-      swAlpha = GL_FALSE;
+      b->swAlpha = GL_FALSE;
    }
 
    /*
@@ -435,9 +434,9 @@ create_xmesa_buffer(XMesaDrawable d, BufferType type,
                                 vis->mesa_visual.haveDepthBuffer,
                                 vis->mesa_visual.haveStencilBuffer,
                                 vis->mesa_visual.haveAccumBuffer,
-                                swAlpha,
+                                b->swAlpha,
                                 vis->mesa_visual.numAuxBuffers > 0 );
-   
+
    /* insert buffer into linked list */
    b->Next = XMesaBufferList;
    XMesaBufferList = b;
@@ -2211,6 +2210,9 @@ void XMesaSwapBuffers( XMesaBuffer b )
 		      );
          /*_glthread_UNLOCK_MUTEX(_xmesa_lock);*/
       }
+
+      if (b->swAlpha)
+         _mesa_copy_soft_alpha_renderbuffers(ctx, &b->mesa_buffer);
    }
 #if !defined(XFree86Server)
    XSync( b->xm_visual->display, False );
diff --git a/src/mesa/drivers/x11/xm_buffer.c b/src/mesa/drivers/x11/xm_buffer.c
index c1fa23328f0..bb8fe31ce88 100644
--- a/src/mesa/drivers/x11/xm_buffer.c
+++ b/src/mesa/drivers/x11/xm_buffer.c
@@ -422,18 +422,6 @@ xmesa_delete_framebuffer(struct gl_framebuffer *fb)
       XMesaDestroyImage( b->rowimage );
    }
 
-   /* Note that XMesaBuffer renderbuffers normally have a refcount of 2
-    * (creation + binding) so we need to explicitly delete/unbind them here.
-    */
-   if (b->frontxrb) {
-      _mesa_unreference_renderbuffer((struct gl_renderbuffer **) &b->frontxrb);
-      ASSERT(b->frontxrb == NULL);
-   }
-   if (b->backxrb) {
-      _mesa_unreference_renderbuffer((struct gl_renderbuffer **) &b->backxrb);
-      ASSERT(b->backxrb == NULL);
-   }
-
    _mesa_free_framebuffer_data(fb);
    _mesa_free(fb);
 }
diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h
index 5516031ffe7..85cb6b66363 100644
--- a/src/mesa/drivers/x11/xmesaP.h
+++ b/src/mesa/drivers/x11/xmesaP.h
@@ -224,6 +224,7 @@ struct xmesa_buffer {
    GLint db_mode;		/* 0 = single buffered */
 				/* BACK_PIXMAP = use Pixmap for back buffer */
 				/* BACK_XIMAGE = use XImage for back buffer */
+   GLboolean swAlpha;
 
    GLuint shm;			/* X Shared Memory extension status:	*/
 				/*    0 = not available			*/
diff --git a/src/mesa/main/api_loopback.c b/src/mesa/main/api_loopback.c
index 717ef1fc8f0..efe5a77d581 100644
--- a/src/mesa/main/api_loopback.c
+++ b/src/mesa/main/api_loopback.c
@@ -146,7 +146,7 @@ static void GLAPIENTRY
 loopback_Color3iv_f( const GLint *v )
 {
    COLORF( INT_TO_FLOAT(v[0]), INT_TO_FLOAT(v[1]),
-	   INT_TO_FLOAT(v[2]), INT_TO_FLOAT(v[3]) );
+	   INT_TO_FLOAT(v[2]), 1.0 );
 }
 
 static void GLAPIENTRY
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index f7e870b49cd..fefa14e5036 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -559,7 +559,7 @@ _mesa_IsRenderbufferEXT(GLuint renderbuffer)
 void GLAPIENTRY
 _mesa_BindRenderbufferEXT(GLenum target, GLuint renderbuffer)
 {
-   struct gl_renderbuffer *newRb, *oldRb;
+   struct gl_renderbuffer *newRb;
    GET_CURRENT_CONTEXT(ctx);
 
    ASSERT_OUTSIDE_BEGIN_END(ctx);
@@ -593,21 +593,16 @@ _mesa_BindRenderbufferEXT(GLenum target, GLuint renderbuffer)
 	 }
          ASSERT(newRb->AllocStorage);
          _mesa_HashInsert(ctx->Shared->RenderBuffers, renderbuffer, newRb);
+         newRb->RefCount = 1; /* referenced by hash table */
       }
-      newRb->RefCount++;
    }
    else {
       newRb = NULL;
    }
 
-   oldRb = ctx->CurrentRenderbuffer;
-   if (oldRb) {
-      _mesa_unreference_renderbuffer(&oldRb);
-   }
-
    ASSERT(newRb != &DummyRenderbuffer);
 
-   ctx->CurrentRenderbuffer = newRb;
+   _mesa_reference_renderbuffer(&ctx->CurrentRenderbuffer, newRb);
 }
 
 
@@ -632,14 +627,15 @@ _mesa_DeleteRenderbuffersEXT(GLsizei n, const GLuint *renderbuffers)
                _mesa_BindRenderbufferEXT(GL_RENDERBUFFER_EXT, 0);
             }
 
-	    /* remove from hash table immediately, to free the ID */
+	    /* Remove from hash table immediately, to free the ID.
+             * But the object will not be freed until it's no longer
+             * referenced anywhere else.
+             */
 	    _mesa_HashRemove(ctx->Shared->RenderBuffers, renderbuffers[i]);
 
             if (rb != &DummyRenderbuffer) {
-               /* But the object will not be freed until it's no longer
-                * bound in any context.
-                */
-               _mesa_unreference_renderbuffer(&rb);
+               /* no longer referenced by hash table */
+               _mesa_reference_renderbuffer(&rb, NULL);
 	    }
 	 }
       }
diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index cd4f594aa29..1fd31a53211 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -70,42 +70,6 @@ compute_depth_max(struct gl_framebuffer *fb)
 
 
 /**
- * Set the framebuffer's _DepthBuffer field, taking care of
- * reference counts, etc.
- */
-static void
-set_depth_renderbuffer(struct gl_framebuffer *fb,
-                       struct gl_renderbuffer *rb)
-{
-   if (fb->_DepthBuffer) {
-      _mesa_unreference_renderbuffer(&fb->_DepthBuffer);
-   }
-   fb->_DepthBuffer = rb;
-   if (rb) {
-      rb->RefCount++;
-   }
-}
-
-
-/**
- * Set the framebuffer's _StencilBuffer field, taking care of
- * reference counts, etc.
- */
-static void
-set_stencil_renderbuffer(struct gl_framebuffer *fb,
-                         struct gl_renderbuffer *rb)
-{
-   if (fb->_StencilBuffer) {
-      _mesa_unreference_renderbuffer(&fb->_StencilBuffer);
-   }
-   fb->_StencilBuffer = rb;
-   if (rb) {
-      rb->RefCount++;
-   }
-}
-
-
-/**
  * Create and initialize a gl_framebuffer object.
  * This is intended for creating _window_system_ framebuffers, not generic
  * framebuffer objects ala GL_EXT_framebuffer_object.
@@ -223,7 +187,7 @@ _mesa_free_framebuffer_data(struct gl_framebuffer *fb)
    for (i = 0; i < BUFFER_COUNT; i++) {
       struct gl_renderbuffer_attachment *att = &fb->Attachment[i];
       if (att->Renderbuffer) {
-         _mesa_unreference_renderbuffer(&att->Renderbuffer);
+         _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
       }
       if (att->Texture) {
          /* render to texture */
@@ -239,9 +203,9 @@ _mesa_free_framebuffer_data(struct gl_framebuffer *fb)
       att->Texture = NULL;
    }
 
-   /* unbind depth/stencil to decr ref counts */
-   set_depth_renderbuffer(fb, NULL);
-   set_stencil_renderbuffer(fb, NULL);
+   /* unbind _Depth/_StencilBuffer to decr ref counts */
+   _mesa_reference_renderbuffer(&fb->_DepthBuffer, NULL);
+   _mesa_reference_renderbuffer(&fb->_StencilBuffer, NULL);
 }
 
 
@@ -569,13 +533,13 @@ _mesa_update_depth_buffer(GLcontext *ctx,
          /* need to update wrapper */
          struct gl_renderbuffer *wrapper
             = _mesa_new_z24_renderbuffer_wrapper(ctx, depthRb);
-         set_depth_renderbuffer(fb, wrapper);
+         _mesa_reference_renderbuffer(&fb->_DepthBuffer, wrapper);
          ASSERT(fb->_DepthBuffer->Wrapped == depthRb);
       }
    }
    else {
       /* depthRb may be null */
-      set_depth_renderbuffer(fb, depthRb);
+      _mesa_reference_renderbuffer(&fb->_DepthBuffer, depthRb);
    }
 }
 
@@ -610,13 +574,13 @@ _mesa_update_stencil_buffer(GLcontext *ctx,
          /* need to update wrapper */
          struct gl_renderbuffer *wrapper
             = _mesa_new_s8_renderbuffer_wrapper(ctx, stencilRb);
-         set_stencil_renderbuffer(fb, wrapper);
+         _mesa_reference_renderbuffer(&fb->_StencilBuffer, wrapper);
          ASSERT(fb->_StencilBuffer->Wrapped == stencilRb);
       }
    }
    else {
       /* stencilRb may be null */
-      set_stencil_renderbuffer(fb, stencilRb);
+      _mesa_reference_renderbuffer(&fb->_StencilBuffer, stencilRb);
    }
 }
 
diff --git a/src/mesa/main/rbadaptors.c b/src/mesa/main/rbadaptors.c
index 60f4948becf..c1ac0606c86 100644
--- a/src/mesa/main/rbadaptors.c
+++ b/src/mesa/main/rbadaptors.c
@@ -45,7 +45,7 @@ Delete_wrapper(struct gl_renderbuffer *rb)
    /* Decrement reference count on the buffer we're wrapping and delete
     * it if refcount hits zero.
     */
-   _mesa_unreference_renderbuffer(&rb->Wrapped);
+   _mesa_reference_renderbuffer(&rb->Wrapped, NULL);
 
    /* delete myself */
    _mesa_delete_renderbuffer(rb);
diff --git a/src/mesa/main/renderbuffer.c b/src/mesa/main/renderbuffer.c
index e387c42c345..49706b52516 100644
--- a/src/mesa/main/renderbuffer.c
+++ b/src/mesa/main/renderbuffer.c
@@ -1435,6 +1435,17 @@ put_mono_values_alpha8(GLcontext *ctx, struct gl_renderbuffer *arb,
 }
 
 
+static void
+copy_buffer_alpha8(struct gl_renderbuffer* dst, struct gl_renderbuffer* src)
+{
+   ASSERT(dst->_ActualFormat == GL_ALPHA8);
+   ASSERT(src->_ActualFormat == GL_ALPHA8);
+   ASSERT(dst->Width == src->Width);
+   ASSERT(dst->Height == src->Height);
+
+   _mesa_memcpy(dst->Data, src->Data, dst->Width * dst->Height * sizeof(GLubyte));
+}
+
 
 /**********************************************************************/
 /**********************************************************************/
@@ -1462,7 +1473,7 @@ _mesa_init_renderbuffer(struct gl_renderbuffer *rb, GLuint name)
 
    rb->ClassID = 0;
    rb->Name = name;
-   rb->RefCount = 1;
+   rb->RefCount = 0;
    rb->Delete = _mesa_delete_renderbuffer;
 
    /* The rest of these should be set later by the caller of this function or
@@ -1766,6 +1777,27 @@ _mesa_add_alpha_renderbuffers(GLcontext *ctx, struct gl_framebuffer *fb,
 
 
 /**
+ * For framebuffers that use a software alpha channel wrapper
+ * created by _mesa_add_alpha_renderbuffer or _mesa_add_soft_renderbuffers,
+ * copy the back buffer alpha channel into the front buffer alpha channel.
+ */
+void
+_mesa_copy_soft_alpha_renderbuffers(GLcontext *ctx, struct gl_framebuffer *fb)
+{
+   if (fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer &&
+       fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer)
+      copy_buffer_alpha8(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer,
+                         fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+
+
+   if (fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer &&
+       fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer)
+      copy_buffer_alpha8(fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer,
+                         fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer);
+}
+
+
+/**
  * Add a software-based depth renderbuffer to the given framebuffer.
  * This is a helper routine for device drivers when creating a
  * window system framebuffer (not a user-created render/framebuffer).
@@ -2073,9 +2105,7 @@ _mesa_add_renderbuffer(struct gl_framebuffer *fb,
 
    fb->Attachment[bufferName].Type = GL_RENDERBUFFER_EXT;
    fb->Attachment[bufferName].Complete = GL_TRUE;
-   fb->Attachment[bufferName].Renderbuffer = rb;
-
-   rb->RefCount++;
+   _mesa_reference_renderbuffer(&fb->Attachment[bufferName].Renderbuffer, rb);
 }
 
 
@@ -2093,38 +2123,55 @@ _mesa_remove_renderbuffer(struct gl_framebuffer *fb, GLuint bufferName)
    if (!rb)
       return;
 
-   _mesa_unreference_renderbuffer(&rb);
+   _mesa_reference_renderbuffer(&rb, NULL);
 
    fb->Attachment[bufferName].Renderbuffer = NULL;
 }
 
 
 /**
- * Decrement a renderbuffer object's reference count and delete it when
- * the refcount hits zero.
- * Note: we pass the address of a pointer.
+ * Set *ptr to point to rb.  If *ptr points to another renderbuffer,
+ * dereference that buffer first.  The new renderbuffer's refcount will
+ * be incremented.  The old renderbuffer's refcount will be decremented.
  */
 void
-_mesa_unreference_renderbuffer(struct gl_renderbuffer **rb)
+_mesa_reference_renderbuffer(struct gl_renderbuffer **ptr,
+                             struct gl_renderbuffer *rb)
 {
-   assert(rb);
-   if (*rb) {
+   assert(ptr);
+   if (*ptr == rb) {
+      /* no change */
+      return;
+   }
+
+   if (*ptr) {
+      /* Unreference the old renderbuffer */
       GLboolean deleteFlag = GL_FALSE;
+      struct gl_renderbuffer *oldRb = *ptr;
 
-      _glthread_LOCK_MUTEX((*rb)->Mutex);
-      ASSERT((*rb)->RefCount > 0);
-      (*rb)->RefCount--;
-      deleteFlag = ((*rb)->RefCount == 0);
-      _glthread_UNLOCK_MUTEX((*rb)->Mutex);
+      _glthread_LOCK_MUTEX(oldRb->Mutex);
+      ASSERT(oldRb->RefCount > 0);
+      oldRb->RefCount--;
+      /*printf("RB DECR %p to %d\n", (void*) oldRb, oldRb->RefCount);*/
+      deleteFlag = (oldRb->RefCount == 0);
+      _glthread_UNLOCK_MUTEX(oldRb->Mutex);
 
       if (deleteFlag)
-         (*rb)->Delete(*rb);
+         oldRb->Delete(oldRb);
 
-      *rb = NULL;
+      *ptr = NULL;
    }
-}
-
+   assert(!*ptr);
 
+   if (rb) {
+      /* reference new renderbuffer */
+      _glthread_LOCK_MUTEX(rb->Mutex);
+      rb->RefCount++;
+      /*printf("RB REF  %p to %d\n", (void*)rb, rb->RefCount);*/
+      _glthread_UNLOCK_MUTEX(rb->Mutex);
+      *ptr = rb;
+   }
+}
 
 
 /**
@@ -2148,4 +2195,3 @@ _mesa_new_depthstencil_renderbuffer(GLcontext *ctx, GLuint name)
 
    return dsrb;
 }
-
diff --git a/src/mesa/main/renderbuffer.h b/src/mesa/main/renderbuffer.h
index e1a0a559792..c9bf8885487 100644
--- a/src/mesa/main/renderbuffer.h
+++ b/src/mesa/main/renderbuffer.h
@@ -64,6 +64,9 @@ _mesa_add_alpha_renderbuffers(GLcontext *ctx, struct gl_framebuffer *fb,
                               GLboolean frontLeft, GLboolean backLeft,
                               GLboolean frontRight, GLboolean backRight);
 
+extern void
+_mesa_copy_soft_alpha_renderbuffers(GLcontext *ctx, struct gl_framebuffer *fb);
+
 extern GLboolean
 _mesa_add_depth_renderbuffer(GLcontext *ctx, struct gl_framebuffer *fb,
                              GLuint depthBits);
@@ -99,7 +102,8 @@ extern void
 _mesa_remove_renderbuffer(struct gl_framebuffer *fb, GLuint bufferName);
 
 extern void
-_mesa_unreference_renderbuffer(struct gl_renderbuffer **rb);
+_mesa_reference_renderbuffer(struct gl_renderbuffer **ptr,
+                             struct gl_renderbuffer *rb);
 
 extern struct gl_renderbuffer *
 _mesa_new_depthstencil_renderbuffer(GLcontext *ctx, GLuint name);
diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c
index 54ae7ce0a13..0c6fa82f112 100644
--- a/src/mesa/main/texenvprogram.c
+++ b/src/mesa/main/texenvprogram.c
@@ -1014,7 +1014,7 @@ create_new_program(GLcontext *ctx, struct state_key *key,
    p.program->Base.NumTexIndirections = 1;	/* correct? */
    p.program->Base.NumTexInstructions = 0;
    p.program->Base.NumAluInstructions = 0;
-   p.program->Base.String = 0;
+   p.program->Base.String = NULL;
    p.program->Base.NumInstructions =
    p.program->Base.NumTemporaries =
    p.program->Base.NumParameters =
@@ -1103,9 +1103,8 @@ create_new_program(GLcontext *ctx, struct state_key *key,
                   "generating tex env program");
       return;
    }
-   _mesa_memcpy(program->Base.Instructions, instBuffer,
-                sizeof(struct prog_instruction)
-                * program->Base.NumInstructions);
+   _mesa_copy_instructions(program->Base.Instructions, instBuffer,
+                           program->Base.NumInstructions);
 
    /* Notify driver the fragment program has (actually) changed.
     */
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 543d6efc984..706d3466955 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2061,12 +2061,6 @@ copytexsubimage_error_check2( GLcontext *ctx, GLuint dimensions,
    }
 
    if (teximage->IsCompressed) {
-      if (!_mesa_source_buffer_exists(ctx, teximage->_BaseFormat)) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glCopyTexSubImage%dD(missing readbuffer)", dimensions);
-         return GL_TRUE;
-      }
-
       if (target != GL_TEXTURE_2D) {
          _mesa_error(ctx, GL_INVALID_ENUM,
                      "glCopyTexSubImage%d(target)", dimensions);
@@ -2096,6 +2090,12 @@ copytexsubimage_error_check2( GLcontext *ctx, GLuint dimensions,
       return GL_TRUE;
    }
 
+   if (!_mesa_source_buffer_exists(ctx, teximage->_BaseFormat)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+               "glCopyTexSubImage%dD(missing readbuffer)", dimensions);
+      return GL_TRUE;
+   }
+
    if (teximage->_BaseFormat == GL_DEPTH_COMPONENT) {
       if (!ctx->ReadBuffer->_DepthBuffer) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
diff --git a/src/mesa/tnl/t_vp_build.c b/src/mesa/tnl/t_vp_build.c
index 8b8bb3a173a..63c99ee6ca0 100644
--- a/src/mesa/tnl/t_vp_build.c
+++ b/src/mesa/tnl/t_vp_build.c
@@ -1419,9 +1419,8 @@ create_new_program( const struct state_key *key,
    else
       p.temp_reserved = ~((1<<max_temps)-1);
 
-   p.program->Base.Instructions
-      = (struct prog_instruction*) MALLOC(sizeof(struct prog_instruction) * MAX_INSN);
-   p.program->Base.String = 0;
+   p.program->Base.Instructions = _mesa_alloc_instructions(MAX_INSN);
+   p.program->Base.String = NULL;
    p.program->Base.NumInstructions =
    p.program->Base.NumTemporaries =
    p.program->Base.NumParameters =