Merge remote branch 'origin/master' into pipe-video

Conflicts: src/gallium/drivers/r600/r600_asm.c
author: Christian König <[email protected]> 2011-01-20 22:43:18 +0100
committer: Christian König <[email protected]> 2011-01-20 22:43:18 +0100
commit: 78faf8d0e9c276a0ff1465e501d58fb3d66de2f7 (patch)
tree: 4e124bd6b511e408c5e113c4166b8fa97fd75b24 /src/mesa
parent: d2ff6b8715e817c1ef14d4bf12be58c19d894143 (diff)
parent: 37233f1ee0213a224611788bbab38840ba9f8308 (diff)
33 files changed, 868 insertions, 190 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 7c3ac0c14ef..b05ba35d65f 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -108,6 +108,7 @@ CXX_SOURCES = \
 	brw_fs.cpp \
 	brw_fs_channel_expressions.cpp \
 	brw_fs_reg_allocate.cpp \
+	brw_fs_schedule_instructions.cpp \
 	brw_fs_vector_splitting.cpp
 
 ASM_SOURCES = 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a35687d5991..9a71e5377df 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -316,7 +316,6 @@ int
 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
 {
    unsigned int offset = 0;
-   float *vec_values;
 
    if (type->is_matrix()) {
       const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
@@ -335,7 +334,6 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_BOOL:
-      vec_values = fp->Base.Parameters->ParameterValues[loc];
       for (unsigned int i = 0; i < type->vector_elements; i++) {
 	 unsigned int param = c->prog_data.nr_params++;
 
@@ -359,8 +357,8 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
 	    c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
 	    break;
 	 }
-
-	 c->prog_data.param[param] = &vec_values[i];
+	 this->param_index[param] = loc;
+	 this->param_offset[param] = i;
       }
       return 1;
 
@@ -431,7 +429,6 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
 	  */
 	 int index = _mesa_add_state_reference(this->fp->Base.Parameters,
 					       (gl_state_index *)tokens);
-	 float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
 
 	 /* Add each of the unique swizzles of the element as a
 	  * parameter.  This'll end up matching the expected layout of
@@ -446,7 +443,9 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
 
 	    c->prog_data.param_convert[c->prog_data.nr_params] =
 	       PARAM_NO_CONVERT;
-	    c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
+	    this->param_index[c->prog_data.nr_params] = index;
+	    this->param_offset[c->prog_data.nr_params] = swiz;
+	    c->prog_data.nr_params++;
 	 }
       }
    }
@@ -1370,10 +1369,13 @@ fs_visitor::visit(ir_texture *ir)
       fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
       GLuint index = _mesa_add_state_reference(params,
 					       (gl_state_index *)tokens);
-      float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
 
-      c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0];
-      c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1];
+      this->param_index[c->prog_data.nr_params] = index;
+      this->param_offset[c->prog_data.nr_params] = 0;
+      c->prog_data.nr_params++;
+      this->param_index[c->prog_data.nr_params] = index;
+      this->param_offset[c->prog_data.nr_params] = 1;
+      c->prog_data.nr_params++;
 
       fs_reg dst = fs_reg(this, ir->coordinate->type);
       fs_reg src = coordinate;
@@ -2500,6 +2502,22 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
    }
 }
 
+/**
+ * To be called after the last _mesa_add_state_reference() call, to
+ * set up prog_data.param[] for assign_curb_setup() and
+ * setup_pull_constants().
+ */
+void
+fs_visitor::setup_paramvalues_refs()
+{
+   /* Set up the pointers to ParamValues now that that array is finalized. */
+   for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+      c->prog_data.param[i] =
+	 fp->Base.Parameters->ParameterValues[this->param_index[i]] +
+	 this->param_offset[i];
+   }
+}
+
 void
 fs_visitor::assign_curb_setup()
 {
@@ -2629,10 +2647,7 @@ fs_visitor::split_virtual_grfs()
       fs_inst *inst = (fs_inst *)iter.get();
 
       /* Texturing produces 4 contiguous registers, so no splitting. */
-      if ((inst->opcode == FS_OPCODE_TEX ||
-	   inst->opcode == FS_OPCODE_TXB ||
-	   inst->opcode == FS_OPCODE_TXL) &&
-	  inst->dst.file == GRF) {
+      if (inst->is_tex()) {
 	 split_grf[inst->dst.reg] = false;
       }
    }
@@ -2920,7 +2935,7 @@ fs_visitor::propagate_constants()
 	 if (scan_inst->dst.file == GRF &&
 	     scan_inst->dst.reg == inst->dst.reg &&
 	     (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
-	      scan_inst->opcode == FS_OPCODE_TEX)) {
+	      scan_inst->is_tex())) {
 	    break;
 	 }
       }
@@ -3015,13 +3030,13 @@ fs_visitor::register_coalesce()
 	 if (scan_inst->dst.file == GRF) {
 	    if (scan_inst->dst.reg == inst->dst.reg &&
 		(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
-		 scan_inst->opcode == FS_OPCODE_TEX)) {
+		 scan_inst->is_tex())) {
 	       interfered = true;
 	       break;
 	    }
 	    if (scan_inst->dst.reg == inst->src[0].reg &&
 		(scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
-		 scan_inst->opcode == FS_OPCODE_TEX)) {
+		 scan_inst->is_tex())) {
 	       interfered = true;
 	       break;
 	    }
@@ -3102,7 +3117,7 @@ fs_visitor::compute_to_mrf()
 	     * into a compute-to-MRF.
 	     */
 
-	    if (scan_inst->opcode == FS_OPCODE_TEX) {
+	    if (scan_inst->is_tex()) {
 	       /* texturing writes several continuous regs, so we can't
 		* compute-to-mrf that.
 		*/
@@ -3123,14 +3138,7 @@ fs_visitor::compute_to_mrf()
 	       /* gen6 math instructions must have the destination be
 		* GRF, so no compute-to-MRF for them.
 		*/
-	       if (scan_inst->opcode == FS_OPCODE_RCP ||
-		   scan_inst->opcode == FS_OPCODE_RSQ ||
-		   scan_inst->opcode == FS_OPCODE_SQRT ||
-		   scan_inst->opcode == FS_OPCODE_EXP2 ||
-		   scan_inst->opcode == FS_OPCODE_LOG2 ||
-		   scan_inst->opcode == FS_OPCODE_SIN ||
-		   scan_inst->opcode == FS_OPCODE_COS ||
-		   scan_inst->opcode == FS_OPCODE_POW) {
+	       if (scan_inst->is_math()) {
 		  break;
 	       }
 	    }
@@ -3152,6 +3160,7 @@ fs_visitor::compute_to_mrf()
 	  */
 	 if (scan_inst->opcode == BRW_OPCODE_DO ||
 	     scan_inst->opcode == BRW_OPCODE_WHILE ||
+	     scan_inst->opcode == BRW_OPCODE_ELSE ||
 	     scan_inst->opcode == BRW_OPCODE_ENDIF) {
 	    break;
 	 }
@@ -3238,7 +3247,7 @@ fs_visitor::remove_duplicate_mrf_writes()
       }
 
       if (inst->mlen > 0) {
-	 /* Found a SEND instruction, which will include two of fewer
+	 /* Found a SEND instruction, which will include two or fewer
 	  * implied MRF writes.  We could do better here.
 	  */
 	 for (int i = 0; i < implied_mrf_writes(inst); i++) {
@@ -3662,10 +3671,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
       v.emit_fb_writes();
 
       v.split_virtual_grfs();
-      v.setup_pull_constants();
 
-      v.assign_curb_setup();
-      v.assign_urb_setup();
+      v.setup_paramvalues_refs();
+      v.setup_pull_constants();
 
       bool progress;
       do {
@@ -3679,6 +3687,11 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
 	 progress = v.dead_code_eliminate() || progress;
       } while (progress);
 
+      v.schedule_instructions();
+
+      v.assign_curb_setup();
+      v.assign_urb_setup();
+
       if (0) {
 	 /* Debug of register spilling: Go spill everything. */
 	 int virtual_grf_count = v.virtual_grf_next;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 82d96f6ac02..f0497957bc4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -306,6 +306,25 @@ public:
 	      offset == inst->offset);
    }
 
+   bool is_tex()
+   {
+      return (opcode == FS_OPCODE_TEX ||
+	      opcode == FS_OPCODE_TXB ||
+	      opcode == FS_OPCODE_TXL);
+   }
+
+   bool is_math()
+   {
+      return (opcode == FS_OPCODE_RCP ||
+	      opcode == FS_OPCODE_RSQ ||
+	      opcode == FS_OPCODE_SQRT ||
+	      opcode == FS_OPCODE_EXP2 ||
+	      opcode == FS_OPCODE_LOG2 ||
+	      opcode == FS_OPCODE_SIN ||
+	      opcode == FS_OPCODE_COS ||
+	      opcode == FS_OPCODE_POW);
+   }
+
    int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
    fs_reg dst;
    fs_reg src[3];
@@ -412,6 +431,7 @@ public:
    void visit(ir_function_signature *ir);
 
    fs_inst *emit(fs_inst inst);
+   void setup_paramvalues_refs();
    void assign_curb_setup();
    void calculate_urb_setup();
    void assign_urb_setup();
@@ -428,6 +448,8 @@ public:
    bool dead_code_eliminate();
    bool remove_duplicate_mrf_writes();
    bool virtual_grf_interferes(int a, int b);
+   void schedule_instructions();
+
    void generate_code();
    void generate_fb_write(fs_inst *inst);
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
@@ -476,6 +498,12 @@ public:
    void *mem_ctx;
    exec_list instructions;
 
+   /* Delayed setup of c->prog_data.params[] due to realloc of
+    * ParamValues[] during compile.
+    */
+   int param_index[MAX_UNIFORMS * 4];
+   int param_offset[MAX_UNIFORMS * 4];
+
    int *virtual_grf_sizes;
    int virtual_grf_next;
    int virtual_grf_array_size;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
new file mode 100644
index 00000000000..c8f0b27b76f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -0,0 +1,489 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <[email protected]>
+ *
+ */
+
+extern "C" {
+
+#include <sys/types.h>
+
+#include "main/macros.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "program/prog_optimize.h"
+#include "program/register_allocate.h"
+#include "program/sampler.h"
+#include "program/hash_table.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "talloc.h"
+}
+#include "brw_fs.h"
+#include "../glsl/glsl_types.h"
+#include "../glsl/ir_optimization.h"
+#include "../glsl/ir_print_visitor.h"
+
+/** @file brw_fs_schedule_instructions.cpp
+ *
+ * List scheduling of FS instructions.
+ *
+ * The basic model of the list scheduler is to take a basic block,
+ * compute a DAG of the dependencies (RAW ordering with latency, WAW
+ * ordering, WAR ordering), and make a list of the DAG heads.
+ * Heuristically pick a DAG head, then put all the children that are
+ * now DAG heads into the list of things to schedule.
+ *
+ * The heuristic is the important part.  We're trying to be cheap,
+ * since actually computing the optimal scheduling is NP complete.
+ * What we do is track a "current clock".  When we schedule a node, we
+ * update the earliest-unblocked clock time of its children, and
+ * increment the clock.  Then, when trying to schedule, we just pick
+ * the earliest-unblocked instruction to schedule.
+ *
+ * Note that often there will be many things which could execute
+ * immediately, and there are a range of heuristic options to choose
+ * from in picking among those.
+ */
+
+class schedule_node : public exec_node
+{
+public:
+   schedule_node(fs_inst *inst)
+   {
+      this->inst = inst;
+      this->child_array_size = 0;
+      this->children = NULL;
+      this->child_latency = NULL;
+      this->child_count = 0;
+      this->parent_count = 0;
+      this->unblocked_time = 0;
+
+      int chans = 8;
+      int math_latency = 22;
+
+      switch (inst->opcode) {
+      case FS_OPCODE_RCP:
+	 this->latency = 1 * chans * math_latency;
+	 break;
+      case FS_OPCODE_RSQ:
+	 this->latency = 2 * chans * math_latency;
+	 break;
+      case FS_OPCODE_SQRT:
+      case FS_OPCODE_LOG2:
+	 /* full precision log.  partial is 2. */
+	 this->latency = 3 * chans * math_latency;
+	 break;
+      case FS_OPCODE_EXP2:
+	 /* full precision.  partial is 3, same throughput. */
+	 this->latency = 4 * chans * math_latency;
+	 break;
+      case FS_OPCODE_POW:
+	 this->latency = 8 * chans * math_latency;
+	 break;
+      case FS_OPCODE_SIN:
+      case FS_OPCODE_COS:
+	 /* minimum latency, max is 12 rounds. */
+	 this->latency = 5 * chans * math_latency;
+	 break;
+      default:
+	 this->latency = 2;
+	 break;
+      }
+   }
+
+   fs_inst *inst;
+   schedule_node **children;
+   int *child_latency;
+   int child_count;
+   int parent_count;
+   int child_array_size;
+   int unblocked_time;
+   int latency;
+};
+
+class instruction_scheduler {
+public:
+   instruction_scheduler(fs_visitor *v, void *mem_ctx, int virtual_grf_count)
+   {
+      this->v = v;
+      this->mem_ctx = talloc_new(mem_ctx);
+      this->virtual_grf_count = virtual_grf_count;
+      this->instructions.make_empty();
+      this->instructions_to_schedule = 0;
+   }
+
+   ~instruction_scheduler()
+   {
+      talloc_free(this->mem_ctx);
+   }
+   void add_barrier_deps(schedule_node *n);
+   void add_dep(schedule_node *before, schedule_node *after, int latency);
+
+   void add_inst(fs_inst *inst);
+   void calculate_deps();
+   void schedule_instructions(fs_inst *next_block_header);
+
+   void *mem_ctx;
+
+   int instructions_to_schedule;
+   int virtual_grf_count;
+   exec_list instructions;
+   fs_visitor *v;
+};
+
+void
+instruction_scheduler::add_inst(fs_inst *inst)
+{
+   schedule_node *n = new(mem_ctx) schedule_node(inst);
+
+   assert(!inst->is_head_sentinel());
+   assert(!inst->is_tail_sentinel());
+
+   this->instructions_to_schedule++;
+
+   inst->remove();
+   instructions.push_tail(n);
+}
+
+/**
+ * Add a dependency between two instruction nodes.
+ *
+ * The @after node will be scheduled after @before.  We will try to
+ * schedule it @latency cycles after @before, but no guarantees there.
+ */
+void
+instruction_scheduler::add_dep(schedule_node *before, schedule_node *after,
+			       int latency)
+{
+   if (!before || !after)
+      return;
+
+   assert(before != after);
+
+   for (int i = 0; i < before->child_count; i++) {
+      if (before->children[i] == after) {
+	 before->child_latency[i] = MAX2(before->child_latency[i], latency);
+	 return;
+      }
+   }
+
+   if (before->child_array_size <= before->child_count) {
+      if (before->child_array_size < 16)
+	 before->child_array_size = 16;
+      else
+	 before->child_array_size *= 2;
+
+      before->children = talloc_realloc(mem_ctx, before->children,
+					schedule_node *,
+					before->child_array_size);
+      before->child_latency = talloc_realloc(mem_ctx, before->child_latency,
+					     int, before->child_array_size);
+   }
+
+   before->children[before->child_count] = after;
+   before->child_latency[before->child_count] = latency;
+   before->child_count++;
+   after->parent_count++;
+}
+
+/**
+ * Sometimes we really want this node to execute after everything that
+ * was before it and before everything that followed it.  This adds
+ * the deps to do so.
+ */
+void
+instruction_scheduler::add_barrier_deps(schedule_node *n)
+{
+   schedule_node *prev = (schedule_node *)n->prev;
+   schedule_node *next = (schedule_node *)n->next;
+
+   if (prev) {
+      while (!prev->is_head_sentinel()) {
+	 add_dep(prev, n, 0);
+	 prev = (schedule_node *)prev->prev;
+      }
+   }
+
+   if (next) {
+      while (!next->is_tail_sentinel()) {
+	 add_dep(n, next, 0);
+	 next = (schedule_node *)next->next;
+      }
+   }
+}
+
+void
+instruction_scheduler::calculate_deps()
+{
+   schedule_node *last_grf_write[virtual_grf_count];
+   schedule_node *last_mrf_write[BRW_MAX_MRF];
+   schedule_node *last_conditional_mod = NULL;
+
+   /* The last instruction always needs to still be the last
+    * instruction.  Either it's flow control (IF, ELSE, ENDIF, DO,
+    * WHILE) and scheduling other things after it would disturb the
+    * basic block, or it's FB_WRITE and we should do a better job at
+    * dead code elimination anyway.
+    */
+   schedule_node *last = (schedule_node *)instructions.get_tail();
+   add_barrier_deps(last);
+
+   memset(last_grf_write, 0, sizeof(last_grf_write));
+   memset(last_mrf_write, 0, sizeof(last_mrf_write));
+
+   /* top-to-bottom dependencies: RAW and WAW. */
+   foreach_iter(exec_list_iterator, iter, instructions) {
+      schedule_node *n = (schedule_node *)iter.get();
+      fs_inst *inst = n->inst;
+
+      /* read-after-write deps. */
+      for (int i = 0; i < 3; i++) {
+	 if (inst->src[i].file == GRF) {
+	    if (last_grf_write[inst->src[i].reg]) {
+	       add_dep(last_grf_write[inst->src[i].reg], n,
+		       last_grf_write[inst->src[i].reg]->latency);
+	    }
+	 } else if (inst->src[i].file != BAD_FILE &&
+		    inst->src[i].file != IMM &&
+		    inst->src[i].file != UNIFORM) {
+	    assert(inst->src[i].file != MRF);
+	    add_barrier_deps(n);
+	 }
+      }
+
+      for (int i = 0; i < inst->mlen; i++) {
+	 /* It looks like the MRF regs are released in the send
+	  * instruction once it's sent, not when the result comes
+	  * back.
+	  */
+	 if (last_mrf_write[inst->base_mrf + i]) {
+	    add_dep(last_mrf_write[inst->base_mrf + i], n,
+		    last_mrf_write[inst->base_mrf + i]->latency);
+	 }
+      }
+
+      if (inst->predicated) {
+	 assert(last_conditional_mod);
+	 add_dep(last_conditional_mod, n, last_conditional_mod->latency);
+      }
+
+      /* write-after-write deps. */
+      if (inst->dst.file == GRF) {
+	 if (last_grf_write[inst->dst.reg]) {
+	    add_dep(last_grf_write[inst->dst.reg], n,
+		    last_grf_write[inst->dst.reg]->latency);
+	 }
+	 last_grf_write[inst->dst.reg] = n;
+      } else if (inst->dst.file == MRF) {
+	 if (last_mrf_write[inst->dst.hw_reg]) {
+	    add_dep(last_mrf_write[inst->dst.hw_reg], n,
+		    last_mrf_write[inst->dst.hw_reg]->latency);
+	 }
+	 last_mrf_write[inst->dst.hw_reg] = n;
+      } else if (inst->dst.file != BAD_FILE) {
+	 add_barrier_deps(n);
+      }
+
+      if (inst->mlen > 0) {
+	 for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+	    if (last_mrf_write[inst->base_mrf + i]) {
+	       add_dep(last_mrf_write[inst->base_mrf + i], n,
+		       last_mrf_write[inst->base_mrf + i]->latency);
+	    }
+	    last_mrf_write[inst->base_mrf + i] = n;
+	 }
+      }
+
+      if (inst->conditional_mod) {
+	 add_dep(last_conditional_mod, n, 0);
+	 last_conditional_mod = n;
+      }
+   }
+
+   /* bottom-to-top dependencies: WAR */
+   memset(last_grf_write, 0, sizeof(last_grf_write));
+   memset(last_mrf_write, 0, sizeof(last_mrf_write));
+   last_conditional_mod = NULL;
+
+   exec_node *node;
+   exec_node *prev;
+   for (node = instructions.get_tail(), prev = node->prev;
+	!node->is_head_sentinel();
+	node = prev, prev = node->prev) {
+      schedule_node *n = (schedule_node *)node;
+      fs_inst *inst = n->inst;
+
+      /* write-after-read deps. */
+      for (int i = 0; i < 3; i++) {
+	 if (inst->src[i].file == GRF) {
+	    if (last_grf_write[inst->src[i].reg]) {
+	       add_dep(n, last_grf_write[inst->src[i].reg], n->latency);
+	    }
+	 } else if (inst->src[i].file != BAD_FILE &&
+		    inst->src[i].file != IMM &&
+		    inst->src[i].file != UNIFORM) {
+	    assert(inst->src[i].file != MRF);
+	    add_barrier_deps(n);
+	 }
+      }
+
+      for (int i = 0; i < inst->mlen; i++) {
+	 /* It looks like the MRF regs are released in the send
+	  * instruction once it's sent, not when the result comes
+	  * back.
+	  */
+	 add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+      }
+
+      if (inst->predicated) {
+	 if (last_conditional_mod) {
+	    add_dep(n, last_conditional_mod, n->latency);
+	 }
+      }
+
+      /* Update the things this instruction wrote, so earlier reads
+       * can mark this as WAR dependency.
+       */
+      if (inst->dst.file == GRF) {
+	 last_grf_write[inst->dst.reg] = n;
+      } else if (inst->dst.file == MRF) {
+	 last_mrf_write[inst->dst.hw_reg] = n;
+      } else if (inst->dst.file != BAD_FILE) {
+	 add_barrier_deps(n);
+      }
+
+      if (inst->mlen > 0) {
+	 for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+	    last_mrf_write[inst->base_mrf + i] = n;
+	 }
+      }
+
+      if (inst->conditional_mod)
+	 last_conditional_mod = n;
+   }
+}
+
+void
+instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
+{
+   int time = 0;
+
+   /* Remove non-DAG heads from the list. */
+   foreach_iter(exec_list_iterator, iter, instructions) {
+      schedule_node *n = (schedule_node *)iter.get();
+      if (n->parent_count != 0)
+	 n->remove();
+   }
+
+   while (!instructions.is_empty()) {
+      schedule_node *chosen = NULL;
+      int chosen_time = 0;
+
+      foreach_iter(exec_list_iterator, iter, instructions) {
+	 schedule_node *n = (schedule_node *)iter.get();
+
+	 if (!chosen || n->unblocked_time < chosen_time) {
+	    chosen = n;
+	    chosen_time = n->unblocked_time;
+	 }
+      }
+
+      /* Schedule this instruction. */
+      assert(chosen);
+      chosen->remove();
+      next_block_header->insert_before(chosen->inst);
+      instructions_to_schedule--;
+
+      /* Bump the clock.  If we expected a delay for scheduling, then
+       * bump the clock to reflect that.
+       */
+      time = MAX2(time + 1, chosen_time);
+
+      /* Now that we've scheduled a new instruction, some of its
+       * children can be promoted to the list of instructions ready to
+       * be scheduled.  Update the children's unblocked time for this
+       * DAG edge as we do so.
+       */
+      for (int i = 0; i < chosen->child_count; i++) {
+	 schedule_node *child = chosen->children[i];
+
+	 child->unblocked_time = MAX2(child->unblocked_time,
+				      time + chosen->child_latency[i]);
+
+	 child->parent_count--;
+	 if (child->parent_count == 0) {
+	    instructions.push_tail(child);
+	 }
+      }
+
+      /* Shared resource: the mathbox.  There's one per EU (on later
+       * generations, it's even more limited pre-gen6), so if we send
+       * something off to it then the next math isn't going to make
+       * progress until the first is done.
+       */
+      if (chosen->inst->is_math()) {
+	 foreach_iter(exec_list_iterator, iter, instructions) {
+	    schedule_node *n = (schedule_node *)iter.get();
+
+	    if (n->inst->is_math())
+	       n->unblocked_time = MAX2(n->unblocked_time,
+					time + chosen->latency);
+	 }
+      }
+   }
+
+   assert(instructions_to_schedule == 0);
+}
+
+void
+fs_visitor::schedule_instructions()
+{
+   fs_inst *next_block_header = (fs_inst *)instructions.head;
+   instruction_scheduler sched(this, mem_ctx, this->virtual_grf_next);
+
+   while (!next_block_header->is_tail_sentinel()) {
+      /* Add things to be scheduled until we get to a new BB. */
+      while (!next_block_header->is_tail_sentinel()) {
+	 fs_inst *inst = next_block_header;
+	 next_block_header = (fs_inst *)next_block_header->next;
+
+	 sched.add_inst(inst);
+	 if (inst->opcode == BRW_OPCODE_IF ||
+	     inst->opcode == BRW_OPCODE_ELSE ||
+	     inst->opcode == BRW_OPCODE_ENDIF ||
+	     inst->opcode == BRW_OPCODE_DO ||
+	     inst->opcode == BRW_OPCODE_WHILE ||
+	     inst->opcode == BRW_OPCODE_BREAK ||
+	     inst->opcode == BRW_OPCODE_CONTINUE) {
+	    break;
+	 }
+      }
+      sched.calculate_deps();
+      sched.schedule_instructions(next_block_header);
+   }
+
+   this->live_intervals_valid = false;
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
index 2480b1ea500..988208ff56e 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
@@ -113,8 +113,10 @@ nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti,
 			if (access & GL_MAP_WRITE_BIT)
 				flags |= NOUVEAU_BO_WR;
 
-			ret = nouveau_bo_map(s->bo, flags);
-			assert(!ret);
+			if (!s->bo->map) {
+				ret = nouveau_bo_map(s->bo, flags);
+				assert(!ret);
+			}
 
 			ti->Data = s->bo->map + y * s->pitch + x * s->cpp;
 		}
diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
index f6afb90d595..e173cce0860 100644
--- a/src/mesa/drivers/dri/r200/r200_state_init.c
+++ b/src/mesa/drivers/dri/r200/r200_state_init.c
@@ -587,7 +587,7 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
 
    if (rrb) {
      OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
-     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+     OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
 
      OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
      OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
index 24fb031ecb9..7adf9ad73ed 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -777,10 +777,9 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, type, format;
+	uint32_t internalFormat, format;
 	gl_format texFormat;
 
-	type = GL_BGRA;
 	format = GL_UNSIGNED_BYTE;
 	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
 
@@ -860,9 +859,20 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
 
         t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT)
 		   | ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT);
-        t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
-	t->pp_txpitch = pitch_val;
-        t->pp_txpitch -= 32;
+
+	if (target == GL_TEXTURE_RECTANGLE_NV) {
+		t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
+		t->pp_txpitch = pitch_val;
+		t->pp_txpitch -= 32;
+	} else {
+		t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
+				    R200_TXFORMAT_HEIGHT_MASK |
+				    R200_TXFORMAT_CUBIC_MAP_ENABLE |
+				    R200_TXFORMAT_F5_WIDTH_MASK |
+				    R200_TXFORMAT_F5_HEIGHT_MASK);
+		t->pp_txformat |= ((texImage->WidthLog2 << R200_TXFORMAT_WIDTH_SHIFT) |
+				   (texImage->HeightLog2 << R200_TXFORMAT_HEIGHT_SHIFT));
+	}
 
 	t->validated = GL_TRUE;
 	_mesa_unlock_texture(radeon->glCtx, texObj);
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index 2a99ded5d67..fe4f0e48661 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -265,9 +265,9 @@ static void r600SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa
 static void r600SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4])
 {
 	t->TD_PS_SAMPLER0_BORDER_ALPHA = *((uint32_t*)&(color[3]));
-	t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[2]));
+	t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[2]));
 	t->TD_PS_SAMPLER0_BORDER_GREEN = *((uint32_t*)&(color[1]));
-	t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[0]));
+	t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[0]));
         SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_REGISTER,
 		 BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask);
 }
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 2a6a39dfbac..0323e32d705 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -47,13 +47,13 @@
 void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog)
 {
     static const gl_state_index winstate[STATE_LENGTH]
-         = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0};
+         = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0};
     struct prog_instruction *newInst, *inst;
     GLint  win_size;  /* state reference */
     GLuint wpos_temp; /* temp register */
     int i, j;
 
-    /* PARAM win_size = STATE_FB_SIZE */
+    /* PARAM win_size = STATE_FB_WPOS_Y_TRANSFORM */
     win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
 
     wpos_temp = fprog->Base.NumTemporaries++;
@@ -74,9 +74,8 @@ void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog)
     _mesa_insert_instructions(&(fprog->Base), 0, 1);
 
     newInst = fprog->Base.Instructions;
-    /* invert wpos.y
-     * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */
-    newInst[0].Opcode = OPCODE_ADD;
+    /* possibly invert wpos.y depending on STATE_FB_WPOS_Y_TRANSFORM var */
+    newInst[0].Opcode = OPCODE_MAD;
     newInst[0].DstReg.File = PROGRAM_TEMPORARY;
     newInst[0].DstReg.Index = wpos_temp;
     newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
@@ -84,11 +83,14 @@ void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog)
     newInst[0].SrcReg[0].File = PROGRAM_INPUT;
     newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
     newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-    newInst[0].SrcReg[0].Negate = NEGATE_Y;
 
     newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
     newInst[0].SrcReg[1].Index = win_size;
-    newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
+    newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE);
+
+    newInst[0].SrcReg[2].File = PROGRAM_STATE_VAR;
+    newInst[0].SrcReg[2].Index = win_size;
+    newInst[0].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
 
 }
 
@@ -509,6 +511,7 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx)
     unsigned int ui, i;
     unsigned int unNumOfReg;
     unsigned int unBit;
+    unsigned int num_sq_ps_gprs;
     GLuint exportCount;
     GLboolean point_sprite = GL_FALSE;
 
@@ -619,6 +622,15 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx)
 
     SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
 
+    num_sq_ps_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_PS_GPRS_mask) >> NUM_PS_GPRS_shift);
+
+    if(ui > num_sq_ps_gprs)
+    {
+        /* care! thich changes sq - needs idle state */
+        R600_STATECHANGE(context, sq);
+        SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, ui, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask);
+    } 
+
     CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
 
     if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 7ba49d8f986..7d4be9180a0 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -605,6 +605,7 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx)
     struct gl_program_parameter_list *paramList;
     unsigned int unNumParamData;
     unsigned int ui;
+    unsigned int num_sq_vs_gprs;
 
     if(GL_FALSE == vp->loaded)
     {
@@ -656,6 +657,16 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx)
     SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
              NUM_GPRS_shift, NUM_GPRS_mask);
 
+    num_sq_vs_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_VS_GPRS_mask) >> NUM_VS_GPRS_shift);
+ 
+    if((vp->r700Shader.nRegs + 1) > num_sq_vs_gprs)
+    {
+        /* care! thich changes sq - needs idle state */
+        R600_STATECHANGE(context, sq);
+        SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, vp->r700Shader.nRegs + 1,
+                 NUM_VS_GPRS_shift, NUM_VS_GPRS_mask);
+    }
+
     if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
 	{
         SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index a1124483a6c..819d9dd5750 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -532,7 +532,7 @@ void radeon_prepare_render(radeonContextPtr radeon)
 
 	/* Intel driver does the equivalent of this, no clue if it is needed:*/
 	draw = drawable->driverPrivate;
-	radeon_draw_buffer(radeon->glCtx, &draw->base);
+	radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer);
 
 	driContext->dri2.draw_stamp = drawable->dri2.stamp;
     }
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
index 1c5326fe9dc..caf3f253d2a 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -60,7 +60,7 @@ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
 static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
 				 GLint x, GLint y)
 {
-    GLubyte *ptr = rrb->bo->ptr;
+    GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
     GLint offset;
     if (rrb->has_surface) {
 	offset = x * rrb->cpp + y * rrb->pitch;
@@ -85,7 +85,7 @@ static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
 static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
 				 GLint x, GLint y)
 {
-    GLubyte *ptr = rrb->bo->ptr;
+    GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
     GLint offset;
     if (rrb->has_surface) {
 	offset = x * rrb->cpp + y * rrb->pitch;
@@ -439,7 +439,7 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
 static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
 			     GLint x, GLint y)
 {
-    GLubyte *ptr = rrb->bo->ptr;
+    GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
     uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
     GLint offset;
 
@@ -479,7 +479,7 @@ static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
 static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
 				     GLint x, GLint y)
 {
-    GLubyte *ptr = rrb->bo->ptr;
+    GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
     uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
     GLint offset;
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
index ca42aa39474..e88e984354f 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -2091,6 +2091,9 @@ static GLboolean r100ValidateBuffers(struct gl_context *ctx)
 	 continue;
 
       t = rmesa->state.texture.unit[i].texobj;
+
+      if (!t)
+	 continue;
       if (t->image_override && t->bo)
 	radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->bo,
 			   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
index 698efb145c0..33b504cccf8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state_init.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
@@ -488,7 +488,7 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
 
    if (rrb) {
      OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
-     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+     OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
 
      OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
      OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 32c021cb545..9ba98e303a7 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -652,12 +652,11 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, type, format;
+	uint32_t internalFormat, format;
 	gl_format texFormat;
 
-	type = GL_BGRA;
 	format = GL_UNSIGNED_BYTE;
-	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
+	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA);
 
 	radeon = pDRICtx->driverPrivate;
 	rmesa = pDRICtx->driverPrivate;
@@ -739,6 +738,14 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form
 		t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
 		t->pp_txpitch = pitch_val;
 		t->pp_txpitch -= 32;
+	} else {
+	  t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
+			      RADEON_TXFORMAT_HEIGHT_MASK |
+			      RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
+			      RADEON_TXFORMAT_F5_WIDTH_MASK |
+			      RADEON_TXFORMAT_F5_HEIGHT_MASK);
+	  t->pp_txformat |= ((texImage->WidthLog2 << RADEON_TXFORMAT_WIDTH_SHIFT) |
+			     (texImage->HeightLog2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
 	}
 	t->validated = GL_TRUE;
 	_mesa_unlock_texture(radeon->glCtx, texObj);
diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile
index f94aae85f2c..6b2a13c974f 100644
--- a/src/mesa/drivers/x11/Makefile
+++ b/src/mesa/drivers/x11/Makefile
@@ -47,6 +47,9 @@ INCLUDE_DIRS = \
 
 CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mapi/glapi/libglapi.a
 
+ifeq ($(SHARED_GLAPI),1)
+GL_LIB_DEPS := -L$(TOP)/$(LIB_DIR) -l$(GLAPI_LIB) $(GL_LIB_DEPS)
+endif
 
 
 .c.o:
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index ac9709db3f1..7c4652f747f 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -329,7 +329,7 @@ _mesa_validate_DrawArrays(struct gl_context *ctx,
 
 GLboolean
 _mesa_validate_DrawArraysInstanced(struct gl_context *ctx, GLenum mode, GLint first,
-                                   GLsizei count, GLsizei primcount)
+                                   GLsizei count, GLsizei numInstances)
 {
    ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE);
 
@@ -346,10 +346,10 @@ _mesa_validate_DrawArraysInstanced(struct gl_context *ctx, GLenum mode, GLint fi
       return GL_FALSE;
    }
 
-   if (primcount <= 0) {
-      if (primcount < 0)
+   if (numInstances <= 0) {
+      if (numInstances < 0)
          _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glDrawArraysInstanced(primcount=%d)", primcount);
+                     "glDrawArraysInstanced(numInstances=%d)", numInstances);
       return GL_FALSE;
    }
 
@@ -374,7 +374,7 @@ _mesa_validate_DrawArraysInstanced(struct gl_context *ctx, GLenum mode, GLint fi
 GLboolean
 _mesa_validate_DrawElementsInstanced(struct gl_context *ctx,
                                      GLenum mode, GLsizei count, GLenum type,
-                                     const GLvoid *indices, GLsizei primcount)
+                                     const GLvoid *indices, GLsizei numInstances)
 {
    ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE);
 
@@ -399,10 +399,10 @@ _mesa_validate_DrawElementsInstanced(struct gl_context *ctx,
       return GL_FALSE;
    }
 
-   if (primcount <= 0) {
-      if (primcount < 0)
+   if (numInstances <= 0) {
+      if (numInstances < 0)
          _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glDrawElementsInstanced(primcount=%d)", primcount);
+                     "glDrawElementsInstanced(numInstances=%d)", numInstances);
       return GL_FALSE;
    }
 
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 958ea10a422..fe370fa369b 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -526,6 +526,16 @@ init_program_limits(GLenum type, struct gl_program_constants *prog)
    prog->MaxNativeTemps = 0;
    prog->MaxNativeAddressRegs = 0;
    prog->MaxNativeParameters = 0;
+
+   /* Set GLSL datatype range/precision info assuming IEEE float values.
+    * Drivers should override these defaults as needed.
+    */
+   prog->MediumFloat.RangeMin = 127;
+   prog->MediumFloat.RangeMax = 127;
+   prog->MediumFloat.Precision = 23;
+   prog->LowFloat = prog->HighFloat = prog->MediumFloat;
+   /* assume ints are stored as floats for now */
+   prog->LowInt = prog->MediumInt = prog->HighInt = prog->MediumFloat;
 }
 
 
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 2eede4268ca..749c30a4cc1 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -881,11 +881,6 @@ struct dd_function_table {
     */
    void (*ValidateTnlModule)( struct gl_context *ctx, GLuint new_state );
 
-
-#define PRIM_OUTSIDE_BEGIN_END   (GL_POLYGON+1)
-#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2)
-#define PRIM_UNKNOWN             (GL_POLYGON+3)
-
    /**
     * Set by the driver-supplied T&L engine.  
     *
diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c
index 79aa53585f9..a6a909b48ce 100644
--- a/src/mesa/main/debug.c
+++ b/src/mesa/main/debug.c
@@ -37,26 +37,6 @@
 #include "texobj.h"
 
 
-/**
- * Primitive names
- */
-const char *_mesa_prim_name[GL_POLYGON+4] = {
-   "GL_POINTS",
-   "GL_LINES",
-   "GL_LINE_LOOP",
-   "GL_LINE_STRIP",
-   "GL_TRIANGLES",
-   "GL_TRIANGLE_STRIP",
-   "GL_TRIANGLE_FAN",
-   "GL_QUADS",
-   "GL_QUAD_STRIP",
-   "GL_POLYGON",
-   "outside begin/end",
-   "inside unknown primitive",
-   "unknown state"
-};
-
-
 static const char *
 tex_target_name(GLenum tgt)
 {
diff --git a/src/mesa/main/enums.c b/src/mesa/main/enums.c
index c358fb246bc..83d7fb66c0a 100644
--- a/src/mesa/main/enums.c
+++ b/src/mesa/main/enums.c
@@ -29,6 +29,7 @@
 #include "main/mfeatures.h"
 #include "main/enums.h"
 #include "main/imports.h"
+#include "main/mtypes.h"
 
 typedef struct {
    size_t offset;
@@ -6243,29 +6244,39 @@ const char *_mesa_lookup_enum_by_nr( int nr )
    }
 }
 
+/**
+ * Primitive names
+ */
+static const char *prim_names[PRIM_UNKNOWN + 1] = {
+   "GL_POINTS",
+   "GL_LINES",
+   "GL_LINE_LOOP",
+   "GL_LINE_STRIP",
+   "GL_TRIANGLES",
+   "GL_TRIANGLE_STRIP",
+   "GL_TRIANGLE_FAN",
+   "GL_QUADS",
+   "GL_QUAD_STRIP",
+   "GL_POLYGON",
+   "outside begin/end",
+   "inside unknown primitive",
+   "unknown state"
+};
+
+
 /* Get the name of an enum given that it is a primitive type.  Avoids
  * GL_FALSE/GL_POINTS ambiguity and others.
  */
-const char *_mesa_lookup_prim_by_nr( int nr )
+const char *
+_mesa_lookup_prim_by_nr(unsigned nr)
 {
-   switch (nr) {
-   case GL_POINTS: return "GL_POINTS";
-   case GL_LINES: return "GL_LINES";
-   case GL_LINE_STRIP: return "GL_LINE_STRIP";
-   case GL_LINE_LOOP: return "GL_LINE_LOOP";
-   case GL_TRIANGLES: return "GL_TRIANGLES";
-   case GL_TRIANGLE_STRIP: return "GL_TRIANGLE_STRIP";
-   case GL_TRIANGLE_FAN: return "GL_TRIANGLE_FAN";
-   case GL_QUADS: return "GL_QUADS";
-   case GL_QUAD_STRIP: return "GL_QUAD_STRIP";
-   case GL_POLYGON: return "GL_POLYGON";
-   case GL_POLYGON+1: return "OUTSIDE_BEGIN_END";
-   default: return "<invalid>";
-   }
+   if (nr < Elements(prim_names))
+      return prim_names[nr];
+   else
+      return "invalid mode";
 }
 
 
-
 int _mesa_lookup_enum_by_name( const char *symbol )
 {
    enum_elt * f = NULL;
diff --git a/src/mesa/main/enums.h b/src/mesa/main/enums.h
index c03cd34da92..7733df22f91 100644
--- a/src/mesa/main/enums.h
+++ b/src/mesa/main/enums.h
@@ -45,7 +45,7 @@ extern const char *_mesa_lookup_enum_by_nr( int nr );
 /* Get the name of an enum given that it is a primitive type.  Avoids
  * GL_FALSE/GL_POINTS ambiguity and others.
  */
-const char *_mesa_lookup_prim_by_nr( int nr );
+const char *_mesa_lookup_prim_by_nr( unsigned nr );
 
 extern int _mesa_lookup_enum_by_name( const char *symbol );
 
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index a6445b18368..49dad4d4024 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -121,6 +121,11 @@ struct st_context;
 /*@}*/
 
 
+/** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */
+#define PRIM_OUTSIDE_BEGIN_END   (GL_POLYGON+1)
+#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2)
+#define PRIM_UNKNOWN             (GL_POLYGON+3)
+
 
 /**
  * Shader stages. Note that these will become 5 with tessellation.
@@ -296,8 +301,8 @@ typedef enum
 /**
  * Indexes for geometry program result attributes
  */
-/*@{*/
-typedef enum {
+typedef enum
+{
    GEOM_RESULT_POS  = 0,
    GEOM_RESULT_COL0  = 1,
    GEOM_RESULT_COL1  = 2,
@@ -320,7 +325,7 @@ typedef enum {
    /* ### we need to -2 because var0 is 18 instead 16 like in the others */
    GEOM_RESULT_MAX  =  (GEOM_RESULT_VAR0 + MAX_VARYING - 2)
 } gl_geom_result;
-/*@}*/
+
 
 /**
  * Indexes for fragment program input attributes.
@@ -1322,7 +1327,7 @@ struct gl_texture_object
    GLboolean _Complete;		/**< Is texture object complete? */
    GLboolean _RenderToTexture;  /**< Any rendering to this texture? */
    GLboolean Purgeable;         /**< Is the buffer purgeable under memory pressure? */
-   GLenum sRGBDecode;
+   GLenum sRGBDecode;           /**< GL_DECODE_EXT or GL_SKIP_DECODE_EXT */
 
    /** Actual texture images, indexed by [cube face] and [mipmap level] */
    struct gl_texture_image *Image[MAX_FACES][MAX_TEXTURE_LEVELS];
@@ -2549,6 +2554,17 @@ struct gl_framebuffer
 
 
 /**
+ * Precision info for shader datatypes.  See glGetShaderPrecisionFormat().
+ */
+struct gl_precision
+{
+   GLushort RangeMin;   /**< min value exponent */
+   GLushort RangeMax;   /**< max value exponent */
+   GLushort Precision;  /**< number of mantissa bits */
+};
+
+
+/**
  * Limits for vertex and fragment programs/shaders.
  */
 struct gl_program_constants
@@ -2582,6 +2598,9 @@ struct gl_program_constants
    GLuint MaxGeometryUniformComponents;
    GLuint MaxGeometryOutputVertices;
    GLuint MaxGeometryTotalOutputComponents;
+   /* ES 2.0 and GL_ARB_ES2_compatibility */
+   struct gl_precision LowFloat, MediumFloat, HighFloat;
+   struct gl_precision LowInt, MediumInt, HighInt;
 };
 
 
@@ -3074,15 +3093,18 @@ struct gl_dlist_state
    } Current;
 };
 
+
 /**
  * Enum for the OpenGL APIs we know about and may support.
  */
-typedef enum {
+typedef enum
+{
    API_OPENGL,
    API_OPENGLES,
    API_OPENGLES2
 } gl_api;
 
+
 /**
  * Mesa rendering context.
  *
@@ -3295,10 +3317,6 @@ struct gl_context
 };
 
 
-/** The string names for GL_POINT, GL_LINE_LOOP, etc */
-extern const char *_mesa_prim_name[GL_POLYGON+4];
-
-
 #ifdef DEBUG
 extern int MESA_VERBOSE;
 extern int MESA_DEBUG_FLAGS;
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 2ffd8be0eb4..e831175235e 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1628,12 +1628,51 @@ void GLAPIENTRY
 _mesa_GetShaderPrecisionFormat(GLenum shadertype, GLenum precisiontype,
                                GLint* range, GLint* precision)
 {
+   const struct gl_program_constants *limits;
+   const struct gl_precision *p;
    GET_CURRENT_CONTEXT(ctx);
-   (void) shadertype;
-   (void) precisiontype;
-   (void) range;
-   (void) precision;
-   _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__);
+
+   switch (shadertype) {
+   case GL_VERTEX_SHADER:
+      limits = &ctx->Const.VertexProgram;
+      break;
+   case GL_FRAGMENT_SHADER:
+      limits = &ctx->Const.FragmentProgram;
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetShaderPrecisionFormat(shadertype)");
+      return;
+   }
+
+   switch (precisiontype) {
+   case GL_LOW_FLOAT:
+      p = &limits->LowFloat;
+      break;
+   case GL_MEDIUM_FLOAT:
+      p = &limits->MediumFloat;
+      break;
+   case GL_HIGH_FLOAT:
+      p = &limits->HighFloat;
+      break;
+   case GL_LOW_INT:
+      p = &limits->LowInt;
+      break;
+   case GL_MEDIUM_INT:
+      p = &limits->MediumInt;
+      break;
+   case GL_HIGH_INT:
+      p = &limits->HighInt;
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetShaderPrecisionFormat(precisiontype)");
+      return;
+   }
+
+   range[0] = p->RangeMin;
+   range[1] = p->RangeMax;
+   precision[0] = p->Precision;
 }
 
 
diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c
index ada6e356419..f984e2f1402 100644
--- a/src/mesa/program/register_allocate.c
+++ b/src/mesa/program/register_allocate.c
@@ -38,8 +38,10 @@
 #include "register_allocate.h"
 
 struct ra_reg {
-   char *name;
    GLboolean *conflicts;
+   unsigned int *conflict_list;
+   unsigned int conflict_list_size;
+   unsigned int num_conflicts;
 };
 
 struct ra_regs {
@@ -68,6 +70,7 @@ struct ra_class {
 
 struct ra_node {
    GLboolean *adjacency;
+   unsigned int *adjacency_list;
    unsigned int class;
    unsigned int adjacency_count;
    unsigned int reg;
@@ -100,16 +103,39 @@ ra_alloc_reg_set(unsigned int count)
    for (i = 0; i < count; i++) {
       regs->regs[i].conflicts = talloc_zero_array(regs->regs, GLboolean, count);
       regs->regs[i].conflicts[i] = GL_TRUE;
+
+      regs->regs[i].conflict_list = talloc_array(regs->regs, unsigned int, 4);
+      regs->regs[i].conflict_list_size = 4;
+      regs->regs[i].conflict_list[0] = i;
+      regs->regs[i].num_conflicts = 1;
    }
 
    return regs;
 }
 
+static void
+ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2)
+{
+   struct ra_reg *reg1 = &regs->regs[r1];
+
+   if (reg1->conflict_list_size == reg1->num_conflicts) {
+      reg1->conflict_list_size *= 2;
+      reg1->conflict_list = talloc_realloc(regs,
+					   reg1->conflict_list,
+					   unsigned int,
+					   reg1->conflict_list_size);
+   }
+   reg1->conflict_list[reg1->num_conflicts++] = r2;
+   reg1->conflicts[r2] = GL_TRUE;
+}
+
 void
 ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
 {
-   regs->regs[r1].conflicts[r2] = GL_TRUE;
-   regs->regs[r2].conflicts[r1] = GL_TRUE;
+   if (!regs->regs[r1].conflicts[r2]) {
+      ra_add_conflict_list(regs, r1, r2);
+      ra_add_conflict_list(regs, r2, r1);
+   }
 }
 
 unsigned int
@@ -160,15 +186,15 @@ ra_set_finalize(struct ra_regs *regs)
 	 int max_conflicts = 0;
 
 	 for (rc = 0; rc < regs->count; rc++) {
-	    unsigned int rb;
 	    int conflicts = 0;
+	    int i;
 
 	    if (!regs->classes[c]->regs[rc])
 	       continue;
 
-	    for (rb = 0; rb < regs->count; rb++) {
-	       if (regs->classes[b]->regs[rb] &&
-		   regs->regs[rb].conflicts[rc])
+	    for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
+	       unsigned int rb = regs->regs[rc].conflict_list[i];
+	       if (regs->classes[b]->regs[rb])
 		  conflicts++;
 	    }
 	    max_conflicts = MAX2(max_conflicts, conflicts);
@@ -178,6 +204,14 @@ ra_set_finalize(struct ra_regs *regs)
    }
 }
 
+static void
+ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2)
+{
+   g->nodes[n1].adjacency[n2] = GL_TRUE;
+   g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2;
+   g->nodes[n1].adjacency_count++;
+}
+
 struct ra_graph *
 ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
 {
@@ -193,7 +227,9 @@ ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
 
    for (i = 0; i < count; i++) {
       g->nodes[i].adjacency = talloc_zero_array(g, GLboolean, count);
-      g->nodes[i].adjacency[i] = GL_TRUE;
+      g->nodes[i].adjacency_list = talloc_array(g, unsigned int, count);
+      g->nodes[i].adjacency_count = 0;
+      ra_add_node_adjacency(g, i, i);
       g->nodes[i].reg = ~0;
    }
 
@@ -211,13 +247,10 @@ void
 ra_add_node_interference(struct ra_graph *g,
 			 unsigned int n1, unsigned int n2)
 {
-   if (g->nodes[n1].adjacency[n2])
-      return;
-
-   g->nodes[n1].adjacency[n2] = GL_TRUE;
-   g->nodes[n2].adjacency_count++;
-   g->nodes[n2].adjacency[n1] = GL_TRUE;
-   g->nodes[n2].adjacency_count++;
+   if (!g->nodes[n1].adjacency[n2]) {
+      ra_add_node_adjacency(g, n1, n2);
+      ra_add_node_adjacency(g, n2, n1);
+   }
 }
 
 static GLboolean pq_test(struct ra_graph *g, unsigned int n)
@@ -226,13 +259,12 @@ static GLboolean pq_test(struct ra_graph *g, unsigned int n)
    unsigned int q = 0;
    int n_class = g->nodes[n].class;
 
-   for (j = 0; j < g->count; j++) {
-      if (j == n || g->nodes[j].in_stack)
-	 continue;
+   for (j = 0; j < g->nodes[n].adjacency_count; j++) {
+      unsigned int n2 = g->nodes[n].adjacency_list[j];
+      unsigned int n2_class = g->nodes[n2].class;
 
-      if (g->nodes[n].adjacency[j]) {
-	 unsigned int j_class = g->nodes[j].class;
-	 q += g->regs->classes[n_class]->q[j_class];
+      if (n != n2 && !g->nodes[n2].in_stack) {
+	 q += g->regs->classes[n_class]->q[n2_class];
       }
    }
 
@@ -303,14 +335,15 @@ ra_select(struct ra_graph *g)
 	    continue;
 
 	 /* Check if any of our neighbors conflict with this register choice. */
-	 for (i = 0; i < g->count; i++) {
-	    if (g->nodes[n].adjacency[i] &&
-	       !g->nodes[i].in_stack &&
-		g->regs->regs[r].conflicts[g->nodes[i].reg]) {
+	 for (i = 0; i < g->nodes[n].adjacency_count; i++) {
+	    unsigned int n2 = g->nodes[n].adjacency_list[i];
+
+	    if (!g->nodes[n2].in_stack &&
+		g->regs->regs[r].conflicts[g->nodes[n2].reg]) {
 	       break;
 	    }
 	 }
-	 if (i == g->count)
+	 if (i == g->nodes[n].adjacency_count)
 	    break;
       }
       if (r == g->regs->count)
@@ -368,17 +401,17 @@ ra_get_spill_benefit(struct ra_graph *g, unsigned int n)
    float benefit = 0;
    int n_class = g->nodes[n].class;
 
-   /* Define the benefit of eliminating an interference between n, j
+   /* Define the benefit of eliminating an interference between n, n2
     * through spilling as q(C, B) / p(C).  This is similar to the
     * "count number of edges" approach of traditional graph coloring,
     * but takes classes into account.
     */
-   for (j = 0; j < g->count; j++) {
-      if (j != n && g->nodes[n].adjacency[j]) {
-	 unsigned int j_class = g->nodes[j].class;
-	 benefit += ((float)g->regs->classes[n_class]->q[j_class] /
+   for (j = 0; j < g->nodes[n].adjacency_count; j++) {
+      unsigned int n2 = g->nodes[n].adjacency_list[j];
+      if (n != n2) {
+	 unsigned int n2_class = g->nodes[n2].class;
+	 benefit += ((float)g->regs->classes[n_class]->q[n2_class] /
 		     g->regs->classes[n_class]->p);
-	 break;
       }
    }
 
diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index 422ae43585b..fd03669e660 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -45,6 +45,7 @@
 #include "util/u_inlines.h"
 #include "cso_cache/cso_context.h"
 
+
 /**
  * Combine depth texture mode with "swizzle" so that depth mode swizzling
  * takes place before texture swizzling, and return the resulting swizzle.
@@ -54,8 +55,8 @@
  * \param swizzle    Texture swizzle, a bitmask computed using MAKE_SWIZZLE4.
  * \param depthmode  One of GL_LUMINANCE, GL_INTENSITY, GL_ALPHA, GL_RED.
  */
-static GLuint apply_depthmode(enum pipe_format format,
-                              GLuint swizzle, GLenum depthmode)
+static GLuint
+apply_depthmode(enum pipe_format format, GLuint swizzle, GLenum depthmode)
 {
    const struct util_format_description *desc =
          util_format_description(format);
@@ -109,6 +110,7 @@ static GLuint apply_depthmode(enum pipe_format format,
    return MAKE_SWIZZLE4(swiz[0], swiz[1], swiz[2], swiz[3]);
 }
 
+
 /**
  * Return TRUE if the swizzling described by "swizzle" and
  * "depthmode" (for depth textures only) is different from the swizzling
@@ -118,8 +120,9 @@ static GLuint apply_depthmode(enum pipe_format format,
  * \param swizzle    Texture swizzle, a bitmask computed using MAKE_SWIZZLE4.
  * \param depthmode  One of GL_LUMINANCE, GL_INTENSITY, GL_ALPHA.
  */
-static boolean check_sampler_swizzle(struct pipe_sampler_view *sv,
-                                     GLuint swizzle, GLenum depthmode)
+static boolean
+check_sampler_swizzle(struct pipe_sampler_view *sv,
+                      GLuint swizzle, GLenum depthmode)
 {
    swizzle = apply_depthmode(sv->texture->format, swizzle, depthmode);
 
@@ -127,15 +130,15 @@ static boolean check_sampler_swizzle(struct pipe_sampler_view *sv,
        (sv->swizzle_g != GET_SWZ(swizzle, 1)) ||
        (sv->swizzle_b != GET_SWZ(swizzle, 2)) ||
        (sv->swizzle_a != GET_SWZ(swizzle, 3)))
-      return true;
-   return false;
+      return TRUE;
+   return FALSE;
 }
 
+
 static INLINE struct pipe_sampler_view *
 st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
 					  struct st_texture_object *stObj,
 					  enum pipe_format format)
-					  
 {
    struct pipe_sampler_view templ;
    GLuint swizzle = apply_depthmode(stObj->pt->format,
@@ -161,19 +164,20 @@ static INLINE struct pipe_sampler_view *
 st_get_texture_sampler_view_from_stobj(struct st_texture_object *stObj,
 				       struct pipe_context *pipe,
 				       enum pipe_format format)
-
 {
    if (!stObj || !stObj->pt) {
       return NULL;
    }
 
    if (!stObj->sampler_view) {
-      stObj->sampler_view = st_create_texture_sampler_view_from_stobj(pipe, stObj, format);
+      stObj->sampler_view =
+         st_create_texture_sampler_view_from_stobj(pipe, stObj, format);
    }
 
    return stObj->sampler_view;
 }
 
+
 static void 
 update_textures(struct st_context *st)
 {
@@ -214,21 +218,29 @@ update_textures(struct st_context *st)
             continue;
          }
 
+         /* Determine the format of the texture sampler view */
 	 st_view_format = stObj->pt->format;
 	 {
-	    struct st_texture_image *firstImage;
-	    enum pipe_format firstImageFormat;
-	    firstImage = st_texture_image(stObj->base.Image[0][stObj->base.BaseLevel]);
-
-	    firstImageFormat = st_mesa_format_to_pipe_format(firstImage->base.TexFormat);
-	    if ((stObj->base.sRGBDecode == GL_SKIP_DECODE_EXT) && (_mesa_get_format_color_encoding(firstImage->base.TexFormat) == GL_SRGB)) {
-	       firstImageFormat = st_mesa_format_to_pipe_format(_mesa_get_srgb_format_linear(firstImage->base.TexFormat));
+	    const struct st_texture_image *firstImage =
+               st_texture_image(stObj->base.Image[0][stObj->base.BaseLevel]);
+            const gl_format texFormat = firstImage->base.TexFormat;
+	    enum pipe_format firstImageFormat =
+               st_mesa_format_to_pipe_format(texFormat);
+
+	    if ((stObj->base.sRGBDecode == GL_SKIP_DECODE_EXT) &&
+                (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) {
+               /* don't do sRGB->RGB conversion.  Interpret the texture
+                * texture data as linear values.
+                */
+               const gl_format linearFormat =
+                  _mesa_get_srgb_format_linear(texFormat);
+	       firstImageFormat = st_mesa_format_to_pipe_format(linearFormat);
 	    }
 
 	    if (firstImageFormat != stObj->pt->format)
 	       st_view_format = firstImageFormat;
-
 	 }
+
          st->state.num_textures = su + 1;
 
 	 /* if sampler view has changed dereference it */
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index abaf8235416..974fd78d7c8 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -432,6 +432,10 @@ void st_init_extensions(struct st_context *st)
       ctx->Extensions.EXT_draw_buffers2 = GL_TRUE;
    }
 
+   if (screen->get_param(screen, PIPE_CAP_INDEP_BLEND_FUNC)) {
+      ctx->Extensions.ARB_draw_buffers_blend = GL_TRUE;
+   }
+
    /* GL_ARB_half_float_vertex */
    if (screen->is_format_supported(screen, PIPE_FORMAT_R16G16B16A16_FLOAT,
                                    PIPE_BUFFER, 0,
@@ -439,10 +443,6 @@ void st_init_extensions(struct st_context *st)
       ctx->Extensions.ARB_half_float_vertex = GL_TRUE;
    }
 
-   if (screen->get_param(screen, PIPE_CAP_INDEP_BLEND_FUNC)) {
-      ctx->Extensions.ARB_draw_buffers_blend = GL_TRUE;
-   }
-
    if (screen->get_shader_param(screen, PIPE_SHADER_GEOMETRY, PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
 #if 0 /* XXX re-enable when GLSL compiler again supports geometry shaders */
       ctx->Extensions.ARB_geometry_shader4 = GL_TRUE;
diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h
index c78901c0360..bca856d7142 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -125,9 +125,7 @@ st_create_texture_sampler_view(struct pipe_context *pipe,
 {
    struct pipe_sampler_view templ;
 
-   u_sampler_view_default_template(&templ,
-                                   texture,
-                                   texture->format);
+   u_sampler_view_default_template(&templ, texture, texture->format);
 
    return pipe->create_sampler_view(pipe, texture, &templ);
 }
@@ -140,17 +138,15 @@ st_create_texture_sampler_view_format(struct pipe_context *pipe,
 {
    struct pipe_sampler_view templ;
 
-   u_sampler_view_default_template(&templ,
-                                   texture,
-                                   format);
+   u_sampler_view_default_template(&templ, texture, format);
 
    return pipe->create_sampler_view(pipe, texture, &templ);
 }
 
+
 static INLINE struct pipe_sampler_view *
 st_get_texture_sampler_view(struct st_texture_object *stObj,
                             struct pipe_context *pipe)
-
 {
    if (!stObj || !stObj->pt) {
       return NULL;
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index bdb893eba22..858b8281da3 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -468,6 +468,8 @@ void _tnl_draw_prims( struct gl_context *ctx,
 	       break;
 	 }
 
+         assert(prim[i].num_instances > 0);
+
 	 /* Binding inputs may imply mapping some vertex buffer objects.
 	  * They will need to be unmapped below.
 	  */
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index e221538bad4..37940efdc11 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -38,7 +38,7 @@ struct gl_client_array;
 struct gl_context;
 
 struct _mesa_prim {
-   GLuint mode:8;
+   GLuint mode:8;    /**< GL_POINTS, GL_LINES, GL_QUAD_STRIP, etc */
    GLuint indexed:1;
    GLuint begin:1;
    GLuint end:1;
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index dd36cc32a70..80085c17c5c 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -629,15 +629,15 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
  */
 static void GLAPIENTRY
 vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
-                             GLsizei primcount)
+                             GLsizei numInstances)
 {
    GET_CURRENT_CONTEXT(ctx);
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n",
-                  _mesa_lookup_enum_by_nr(mode), start, count, primcount);
+                  _mesa_lookup_enum_by_nr(mode), start, count, numInstances);
 
-   if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, primcount))
+   if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances))
       return;
 
    FLUSH_CURRENT( ctx, 0 );
@@ -649,7 +649,7 @@ vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
    if (0)
       check_draw_arrays_data(ctx, start, count);
 
-   vbo_draw_arrays(ctx, mode, start, count, primcount);
+   vbo_draw_arrays(ctx, mode, start, count, numInstances);
 
    if (0)
       print_draw_arrays(ctx, mode, start, count);
@@ -724,7 +724,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
 				GLuint start, GLuint end,
 				GLsizei count, GLenum type,
 				const GLvoid *indices,
-				GLint basevertex, GLint primcount)
+				GLint basevertex, GLint numInstances)
 {
    struct vbo_context *vbo = vbo_context(ctx);
    struct vbo_exec_context *exec = &vbo->exec;
@@ -757,7 +757,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
    prim[0].count = count;
    prim[0].indexed = 1;
    prim[0].basevertex = basevertex;
-   prim[0].num_instances = primcount;
+   prim[0].num_instances = numInstances;
 
    /* Need to give special consideration to rendering a range of
     * indices starting somewhere above zero.  Typically the
@@ -977,21 +977,21 @@ vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
  */
 static void GLAPIENTRY
 vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
-                               const GLvoid *indices, GLsizei primcount)
+                               const GLvoid *indices, GLsizei numInstances)
 {
    GET_CURRENT_CONTEXT(ctx);
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n",
                   _mesa_lookup_enum_by_nr(mode), count,
-                  _mesa_lookup_enum_by_nr(type), indices, primcount);
+                  _mesa_lookup_enum_by_nr(type), indices, numInstances);
 
    if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
-                                             primcount))
+                                             numInstances))
       return;
 
    vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
-				   count, type, indices, 0, primcount);
+				   count, type, indices, 0, numInstances);
 }
 
 
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index 26d0046e83d..8c981f93e5c 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -222,6 +222,7 @@ begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
 
    prim->mode = mode;
    prim->begin = begin_flag;
+   prim->num_instances = 1;
 }
 
 
diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c
index 789cf31364b..f6aa576b6c8 100644
--- a/src/mesa/vbo/vbo_split_inplace.c
+++ b/src/mesa/vbo/vbo_split_inplace.c
@@ -178,6 +178,7 @@ static void split_prims( struct split_context *split)
 	    outprim->end = (nr == remaining && prim->end);
 	    outprim->start = prim->start + j;
 	    outprim->count = nr;
+            outprim->num_instances = prim->num_instances;
 
 	    update_index_bounds(split, outprim);
 
@@ -221,6 +222,7 @@ static void split_prims( struct split_context *split)
 	 tmpprim.indexed = 1;
 	 tmpprim.start = 0;
 	 tmpprim.count = count;
+         tmpprim.num_instances = 1;
 
 	 flush_vertex(split);
author	Christian König <[email protected]>	2011-01-20 22:43:18 +0100
committer	Christian König <[email protected]>	2011-01-20 22:43:18 +0100
commit	78faf8d0e9c276a0ff1465e501d58fb3d66de2f7 (patch)
tree	4e124bd6b511e408c5e113c4166b8fa97fd75b24 /src/mesa
parent	d2ff6b8715e817c1ef14d4bf12be58c19d894143 (diff)
parent	37233f1ee0213a224611788bbab38840ba9f8308 (diff)