summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorChristian König <[email protected]>2011-01-20 22:43:18 +0100
committerChristian König <[email protected]>2011-01-20 22:43:18 +0100
commit78faf8d0e9c276a0ff1465e501d58fb3d66de2f7 (patch)
tree4e124bd6b511e408c5e113c4166b8fa97fd75b24 /src/mesa
parentd2ff6b8715e817c1ef14d4bf12be58c19d894143 (diff)
parent37233f1ee0213a224611788bbab38840ba9f8308 (diff)
Merge remote branch 'origin/master' into pipe-video
Conflicts: src/gallium/drivers/r600/r600_asm.c
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp71
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h28
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp489
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_texture.c6
-rw-r--r--src/mesa/drivers/dri/r200/r200_state_init.c2
-rw-r--r--src/mesa/drivers/dri/r200/r200_texstate.c20
-rw-r--r--src/mesa/drivers/dri/r600/r600_tex.c4
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c26
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c11
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_span.c8
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_state.c3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_state_init.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texstate.c13
-rw-r--r--src/mesa/drivers/x11/Makefile3
-rw-r--r--src/mesa/main/api_validate.c16
-rw-r--r--src/mesa/main/context.c10
-rw-r--r--src/mesa/main/dd.h5
-rw-r--r--src/mesa/main/debug.c20
-rw-r--r--src/mesa/main/enums.c43
-rw-r--r--src/mesa/main/enums.h2
-rw-r--r--src/mesa/main/mtypes.h36
-rw-r--r--src/mesa/main/shaderapi.c49
-rw-r--r--src/mesa/program/register_allocate.c97
-rw-r--r--src/mesa/state_tracker/st_atom_texture.c46
-rw-r--r--src/mesa/state_tracker/st_extensions.c8
-rw-r--r--src/mesa/state_tracker/st_texture.h10
-rw-r--r--src/mesa/tnl/t_draw.c2
-rw-r--r--src/mesa/vbo/vbo.h2
-rw-r--r--src/mesa/vbo/vbo_exec_array.c20
-rw-r--r--src/mesa/vbo/vbo_split_copy.c1
-rw-r--r--src/mesa/vbo/vbo_split_inplace.c2
33 files changed, 868 insertions, 190 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 7c3ac0c14ef..b05ba35d65f 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -108,6 +108,7 @@ CXX_SOURCES = \
brw_fs.cpp \
brw_fs_channel_expressions.cpp \
brw_fs_reg_allocate.cpp \
+ brw_fs_schedule_instructions.cpp \
brw_fs_vector_splitting.cpp
ASM_SOURCES =
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a35687d5991..9a71e5377df 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -316,7 +316,6 @@ int
fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
{
unsigned int offset = 0;
- float *vec_values;
if (type->is_matrix()) {
const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
@@ -335,7 +334,6 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
case GLSL_TYPE_UINT:
case GLSL_TYPE_INT:
case GLSL_TYPE_BOOL:
- vec_values = fp->Base.Parameters->ParameterValues[loc];
for (unsigned int i = 0; i < type->vector_elements; i++) {
unsigned int param = c->prog_data.nr_params++;
@@ -359,8 +357,8 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
break;
}
-
- c->prog_data.param[param] = &vec_values[i];
+ this->param_index[param] = loc;
+ this->param_offset[param] = i;
}
return 1;
@@ -431,7 +429,6 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
*/
int index = _mesa_add_state_reference(this->fp->Base.Parameters,
(gl_state_index *)tokens);
- float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
/* Add each of the unique swizzles of the element as a
* parameter. This'll end up matching the expected layout of
@@ -446,7 +443,9 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
c->prog_data.param_convert[c->prog_data.nr_params] =
PARAM_NO_CONVERT;
- c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
+ this->param_index[c->prog_data.nr_params] = index;
+ this->param_offset[c->prog_data.nr_params] = swiz;
+ c->prog_data.nr_params++;
}
}
}
@@ -1370,10 +1369,13 @@ fs_visitor::visit(ir_texture *ir)
fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
GLuint index = _mesa_add_state_reference(params,
(gl_state_index *)tokens);
- float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
- c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0];
- c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1];
+ this->param_index[c->prog_data.nr_params] = index;
+ this->param_offset[c->prog_data.nr_params] = 0;
+ c->prog_data.nr_params++;
+ this->param_index[c->prog_data.nr_params] = index;
+ this->param_offset[c->prog_data.nr_params] = 1;
+ c->prog_data.nr_params++;
fs_reg dst = fs_reg(this, ir->coordinate->type);
fs_reg src = coordinate;
@@ -2500,6 +2502,22 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
}
}
+/**
+ * To be called after the last _mesa_add_state_reference() call, to
+ * set up prog_data.param[] for assign_curb_setup() and
+ * setup_pull_constants().
+ */
+void
+fs_visitor::setup_paramvalues_refs()
+{
+ /* Set up the pointers to ParamValues now that that array is finalized. */
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+ c->prog_data.param[i] =
+ fp->Base.Parameters->ParameterValues[this->param_index[i]] +
+ this->param_offset[i];
+ }
+}
+
void
fs_visitor::assign_curb_setup()
{
@@ -2629,10 +2647,7 @@ fs_visitor::split_virtual_grfs()
fs_inst *inst = (fs_inst *)iter.get();
/* Texturing produces 4 contiguous registers, so no splitting. */
- if ((inst->opcode == FS_OPCODE_TEX ||
- inst->opcode == FS_OPCODE_TXB ||
- inst->opcode == FS_OPCODE_TXL) &&
- inst->dst.file == GRF) {
+ if (inst->is_tex()) {
split_grf[inst->dst.reg] = false;
}
}
@@ -2920,7 +2935,7 @@ fs_visitor::propagate_constants()
if (scan_inst->dst.file == GRF &&
scan_inst->dst.reg == inst->dst.reg &&
(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
- scan_inst->opcode == FS_OPCODE_TEX)) {
+ scan_inst->is_tex())) {
break;
}
}
@@ -3015,13 +3030,13 @@ fs_visitor::register_coalesce()
if (scan_inst->dst.file == GRF) {
if (scan_inst->dst.reg == inst->dst.reg &&
(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
- scan_inst->opcode == FS_OPCODE_TEX)) {
+ scan_inst->is_tex())) {
interfered = true;
break;
}
if (scan_inst->dst.reg == inst->src[0].reg &&
(scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
- scan_inst->opcode == FS_OPCODE_TEX)) {
+ scan_inst->is_tex())) {
interfered = true;
break;
}
@@ -3102,7 +3117,7 @@ fs_visitor::compute_to_mrf()
* into a compute-to-MRF.
*/
- if (scan_inst->opcode == FS_OPCODE_TEX) {
+ if (scan_inst->is_tex()) {
/* texturing writes several continuous regs, so we can't
* compute-to-mrf that.
*/
@@ -3123,14 +3138,7 @@ fs_visitor::compute_to_mrf()
/* gen6 math instructions must have the destination be
* GRF, so no compute-to-MRF for them.
*/
- if (scan_inst->opcode == FS_OPCODE_RCP ||
- scan_inst->opcode == FS_OPCODE_RSQ ||
- scan_inst->opcode == FS_OPCODE_SQRT ||
- scan_inst->opcode == FS_OPCODE_EXP2 ||
- scan_inst->opcode == FS_OPCODE_LOG2 ||
- scan_inst->opcode == FS_OPCODE_SIN ||
- scan_inst->opcode == FS_OPCODE_COS ||
- scan_inst->opcode == FS_OPCODE_POW) {
+ if (scan_inst->is_math()) {
break;
}
}
@@ -3152,6 +3160,7 @@ fs_visitor::compute_to_mrf()
*/
if (scan_inst->opcode == BRW_OPCODE_DO ||
scan_inst->opcode == BRW_OPCODE_WHILE ||
+ scan_inst->opcode == BRW_OPCODE_ELSE ||
scan_inst->opcode == BRW_OPCODE_ENDIF) {
break;
}
@@ -3238,7 +3247,7 @@ fs_visitor::remove_duplicate_mrf_writes()
}
if (inst->mlen > 0) {
- /* Found a SEND instruction, which will include two of fewer
+ /* Found a SEND instruction, which will include two or fewer
* implied MRF writes. We could do better here.
*/
for (int i = 0; i < implied_mrf_writes(inst); i++) {
@@ -3662,10 +3671,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
v.emit_fb_writes();
v.split_virtual_grfs();
- v.setup_pull_constants();
- v.assign_curb_setup();
- v.assign_urb_setup();
+ v.setup_paramvalues_refs();
+ v.setup_pull_constants();
bool progress;
do {
@@ -3679,6 +3687,11 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
progress = v.dead_code_eliminate() || progress;
} while (progress);
+ v.schedule_instructions();
+
+ v.assign_curb_setup();
+ v.assign_urb_setup();
+
if (0) {
/* Debug of register spilling: Go spill everything. */
int virtual_grf_count = v.virtual_grf_next;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 82d96f6ac02..f0497957bc4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -306,6 +306,25 @@ public:
offset == inst->offset);
}
+ bool is_tex()
+ {
+ return (opcode == FS_OPCODE_TEX ||
+ opcode == FS_OPCODE_TXB ||
+ opcode == FS_OPCODE_TXL);
+ }
+
+ bool is_math()
+ {
+ return (opcode == FS_OPCODE_RCP ||
+ opcode == FS_OPCODE_RSQ ||
+ opcode == FS_OPCODE_SQRT ||
+ opcode == FS_OPCODE_EXP2 ||
+ opcode == FS_OPCODE_LOG2 ||
+ opcode == FS_OPCODE_SIN ||
+ opcode == FS_OPCODE_COS ||
+ opcode == FS_OPCODE_POW);
+ }
+
int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
fs_reg dst;
fs_reg src[3];
@@ -412,6 +431,7 @@ public:
void visit(ir_function_signature *ir);
fs_inst *emit(fs_inst inst);
+ void setup_paramvalues_refs();
void assign_curb_setup();
void calculate_urb_setup();
void assign_urb_setup();
@@ -428,6 +448,8 @@ public:
bool dead_code_eliminate();
bool remove_duplicate_mrf_writes();
bool virtual_grf_interferes(int a, int b);
+ void schedule_instructions();
+
void generate_code();
void generate_fb_write(fs_inst *inst);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
@@ -476,6 +498,12 @@ public:
void *mem_ctx;
exec_list instructions;
+ /* Delayed setup of c->prog_data.params[] due to realloc of
+ * ParamValues[] during compile.
+ */
+ int param_index[MAX_UNIFORMS * 4];
+ int param_offset[MAX_UNIFORMS * 4];
+
int *virtual_grf_sizes;
int virtual_grf_next;
int virtual_grf_array_size;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
new file mode 100644
index 00000000000..c8f0b27b76f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -0,0 +1,489 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <[email protected]>
+ *
+ */
+
+extern "C" {
+
+#include <sys/types.h>
+
+#include "main/macros.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "program/prog_optimize.h"
+#include "program/register_allocate.h"
+#include "program/sampler.h"
+#include "program/hash_table.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "talloc.h"
+}
+#include "brw_fs.h"
+#include "../glsl/glsl_types.h"
+#include "../glsl/ir_optimization.h"
+#include "../glsl/ir_print_visitor.h"
+
+/** @file brw_fs_schedule_instructions.cpp
+ *
+ * List scheduling of FS instructions.
+ *
+ * The basic model of the list scheduler is to take a basic block,
+ * compute a DAG of the dependencies (RAW ordering with latency, WAW
+ * ordering, WAR ordering), and make a list of the DAG heads.
+ * Heuristically pick a DAG head, then put all the children that are
+ * now DAG heads into the list of things to schedule.
+ *
+ * The heuristic is the important part. We're trying to be cheap,
+ * since actually computing the optimal scheduling is NP complete.
+ * What we do is track a "current clock". When we schedule a node, we
+ * update the earliest-unblocked clock time of its children, and
+ * increment the clock. Then, when trying to schedule, we just pick
+ * the earliest-unblocked instruction to schedule.
+ *
+ * Note that often there will be many things which could execute
+ * immediately, and there are a range of heuristic options to choose
+ * from in picking among those.
+ */
+
+class schedule_node : public exec_node
+{
+public:
+ schedule_node(fs_inst *inst)
+ {
+ this->inst = inst;
+ this->child_array_size = 0;
+ this->children = NULL;
+ this->child_latency = NULL;
+ this->child_count = 0;
+ this->parent_count = 0;
+ this->unblocked_time = 0;
+
+ int chans = 8;
+ int math_latency = 22;
+
+ switch (inst->opcode) {
+ case FS_OPCODE_RCP:
+ this->latency = 1 * chans * math_latency;
+ break;
+ case FS_OPCODE_RSQ:
+ this->latency = 2 * chans * math_latency;
+ break;
+ case FS_OPCODE_SQRT:
+ case FS_OPCODE_LOG2:
+ /* full precision log. partial is 2. */
+ this->latency = 3 * chans * math_latency;
+ break;
+ case FS_OPCODE_EXP2:
+ /* full precision. partial is 3, same throughput. */
+ this->latency = 4 * chans * math_latency;
+ break;
+ case FS_OPCODE_POW:
+ this->latency = 8 * chans * math_latency;
+ break;
+ case FS_OPCODE_SIN:
+ case FS_OPCODE_COS:
+ /* minimum latency, max is 12 rounds. */
+ this->latency = 5 * chans * math_latency;
+ break;
+ default:
+ this->latency = 2;
+ break;
+ }
+ }
+
+ fs_inst *inst;
+ schedule_node **children;
+ int *child_latency;
+ int child_count;
+ int parent_count;
+ int child_array_size;
+ int unblocked_time;
+ int latency;
+};
+
+class instruction_scheduler {
+public:
+ instruction_scheduler(fs_visitor *v, void *mem_ctx, int virtual_grf_count)
+ {
+ this->v = v;
+ this->mem_ctx = talloc_new(mem_ctx);
+ this->virtual_grf_count = virtual_grf_count;
+ this->instructions.make_empty();
+ this->instructions_to_schedule = 0;
+ }
+
+ ~instruction_scheduler()
+ {
+ talloc_free(this->mem_ctx);
+ }
+ void add_barrier_deps(schedule_node *n);
+ void add_dep(schedule_node *before, schedule_node *after, int latency);
+
+ void add_inst(fs_inst *inst);
+ void calculate_deps();
+ void schedule_instructions(fs_inst *next_block_header);
+
+ void *mem_ctx;
+
+ int instructions_to_schedule;
+ int virtual_grf_count;
+ exec_list instructions;
+ fs_visitor *v;
+};
+
+void
+instruction_scheduler::add_inst(fs_inst *inst)
+{
+ schedule_node *n = new(mem_ctx) schedule_node(inst);
+
+ assert(!inst->is_head_sentinel());
+ assert(!inst->is_tail_sentinel());
+
+ this->instructions_to_schedule++;
+
+ inst->remove();
+ instructions.push_tail(n);
+}
+
+/**
+ * Add a dependency between two instruction nodes.
+ *
+ * The @after node will be scheduled after @before. We will try to
+ * schedule it @latency cycles after @before, but no guarantees there.
+ */
+void
+instruction_scheduler::add_dep(schedule_node *before, schedule_node *after,
+ int latency)
+{
+ if (!before || !after)
+ return;
+
+ assert(before != after);
+
+ for (int i = 0; i < before->child_count; i++) {
+ if (before->children[i] == after) {
+ before->child_latency[i] = MAX2(before->child_latency[i], latency);
+ return;
+ }
+ }
+
+ if (before->child_array_size <= before->child_count) {
+ if (before->child_array_size < 16)
+ before->child_array_size = 16;
+ else
+ before->child_array_size *= 2;
+
+ before->children = talloc_realloc(mem_ctx, before->children,
+ schedule_node *,
+ before->child_array_size);
+ before->child_latency = talloc_realloc(mem_ctx, before->child_latency,
+ int, before->child_array_size);
+ }
+
+ before->children[before->child_count] = after;
+ before->child_latency[before->child_count] = latency;
+ before->child_count++;
+ after->parent_count++;
+}
+
+/**
+ * Sometimes we really want this node to execute after everything that
+ * was before it and before everything that followed it. This adds
+ * the deps to do so.
+ */
+void
+instruction_scheduler::add_barrier_deps(schedule_node *n)
+{
+ schedule_node *prev = (schedule_node *)n->prev;
+ schedule_node *next = (schedule_node *)n->next;
+
+ if (prev) {
+ while (!prev->is_head_sentinel()) {
+ add_dep(prev, n, 0);
+ prev = (schedule_node *)prev->prev;
+ }
+ }
+
+ if (next) {
+ while (!next->is_tail_sentinel()) {
+ add_dep(n, next, 0);
+ next = (schedule_node *)next->next;
+ }
+ }
+}
+
+void
+instruction_scheduler::calculate_deps()
+{
+ schedule_node *last_grf_write[virtual_grf_count];
+ schedule_node *last_mrf_write[BRW_MAX_MRF];
+ schedule_node *last_conditional_mod = NULL;
+
+ /* The last instruction always needs to still be the last
+ * instruction. Either it's flow control (IF, ELSE, ENDIF, DO,
+ * WHILE) and scheduling other things after it would disturb the
+ * basic block, or it's FB_WRITE and we should do a better job at
+ * dead code elimination anyway.
+ */
+ schedule_node *last = (schedule_node *)instructions.get_tail();
+ add_barrier_deps(last);
+
+ memset(last_grf_write, 0, sizeof(last_grf_write));
+ memset(last_mrf_write, 0, sizeof(last_mrf_write));
+
+ /* top-to-bottom dependencies: RAW and WAW. */
+ foreach_iter(exec_list_iterator, iter, instructions) {
+ schedule_node *n = (schedule_node *)iter.get();
+ fs_inst *inst = n->inst;
+
+ /* read-after-write deps. */
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF) {
+ if (last_grf_write[inst->src[i].reg]) {
+ add_dep(last_grf_write[inst->src[i].reg], n,
+ last_grf_write[inst->src[i].reg]->latency);
+ }
+ } else if (inst->src[i].file != BAD_FILE &&
+ inst->src[i].file != IMM &&
+ inst->src[i].file != UNIFORM) {
+ assert(inst->src[i].file != MRF);
+ add_barrier_deps(n);
+ }
+ }
+
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ if (last_mrf_write[inst->base_mrf + i]) {
+ add_dep(last_mrf_write[inst->base_mrf + i], n,
+ last_mrf_write[inst->base_mrf + i]->latency);
+ }
+ }
+
+ if (inst->predicated) {
+ assert(last_conditional_mod);
+ add_dep(last_conditional_mod, n, last_conditional_mod->latency);
+ }
+
+ /* write-after-write deps. */
+ if (inst->dst.file == GRF) {
+ if (last_grf_write[inst->dst.reg]) {
+ add_dep(last_grf_write[inst->dst.reg], n,
+ last_grf_write[inst->dst.reg]->latency);
+ }
+ last_grf_write[inst->dst.reg] = n;
+ } else if (inst->dst.file == MRF) {
+ if (last_mrf_write[inst->dst.hw_reg]) {
+ add_dep(last_mrf_write[inst->dst.hw_reg], n,
+ last_mrf_write[inst->dst.hw_reg]->latency);
+ }
+ last_mrf_write[inst->dst.hw_reg] = n;
+ } else if (inst->dst.file != BAD_FILE) {
+ add_barrier_deps(n);
+ }
+
+ if (inst->mlen > 0) {
+ for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+ if (last_mrf_write[inst->base_mrf + i]) {
+ add_dep(last_mrf_write[inst->base_mrf + i], n,
+ last_mrf_write[inst->base_mrf + i]->latency);
+ }
+ last_mrf_write[inst->base_mrf + i] = n;
+ }
+ }
+
+ if (inst->conditional_mod) {
+ add_dep(last_conditional_mod, n, 0);
+ last_conditional_mod = n;
+ }
+ }
+
+ /* bottom-to-top dependencies: WAR */
+ memset(last_grf_write, 0, sizeof(last_grf_write));
+ memset(last_mrf_write, 0, sizeof(last_mrf_write));
+ last_conditional_mod = NULL;
+
+ exec_node *node;
+ exec_node *prev;
+ for (node = instructions.get_tail(), prev = node->prev;
+ !node->is_head_sentinel();
+ node = prev, prev = node->prev) {
+ schedule_node *n = (schedule_node *)node;
+ fs_inst *inst = n->inst;
+
+ /* write-after-read deps. */
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF) {
+ if (last_grf_write[inst->src[i].reg]) {
+ add_dep(n, last_grf_write[inst->src[i].reg], n->latency);
+ }
+ } else if (inst->src[i].file != BAD_FILE &&
+ inst->src[i].file != IMM &&
+ inst->src[i].file != UNIFORM) {
+ assert(inst->src[i].file != MRF);
+ add_barrier_deps(n);
+ }
+ }
+
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ }
+
+ if (inst->predicated) {
+ if (last_conditional_mod) {
+ add_dep(n, last_conditional_mod, n->latency);
+ }
+ }
+
+ /* Update the things this instruction wrote, so earlier reads
+ * can mark this as WAR dependency.
+ */
+ if (inst->dst.file == GRF) {
+ last_grf_write[inst->dst.reg] = n;
+ } else if (inst->dst.file == MRF) {
+ last_mrf_write[inst->dst.hw_reg] = n;
+ } else if (inst->dst.file != BAD_FILE) {
+ add_barrier_deps(n);
+ }
+
+ if (inst->mlen > 0) {
+ for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+ last_mrf_write[inst->base_mrf + i] = n;
+ }
+ }
+
+ if (inst->conditional_mod)
+ last_conditional_mod = n;
+ }
+}
+
+void
+instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
+{
+ int time = 0;
+
+ /* Remove non-DAG heads from the list. */
+ foreach_iter(exec_list_iterator, iter, instructions) {
+ schedule_node *n = (schedule_node *)iter.get();
+ if (n->parent_count != 0)
+ n->remove();
+ }
+
+ while (!instructions.is_empty()) {
+ schedule_node *chosen = NULL;
+ int chosen_time = 0;
+
+ foreach_iter(exec_list_iterator, iter, instructions) {
+ schedule_node *n = (schedule_node *)iter.get();
+
+ if (!chosen || n->unblocked_time < chosen_time) {
+ chosen = n;
+ chosen_time = n->unblocked_time;
+ }
+ }
+
+ /* Schedule this instruction. */
+ assert(chosen);
+ chosen->remove();
+ next_block_header->insert_before(chosen->inst);
+ instructions_to_schedule--;
+
+ /* Bump the clock. If we expected a delay for scheduling, then
+ * bump the clock to reflect that.
+ */
+ time = MAX2(time + 1, chosen_time);
+
+ /* Now that we've scheduled a new instruction, some of its
+ * children can be promoted to the list of instructions ready to
+ * be scheduled. Update the children's unblocked time for this
+ * DAG edge as we do so.
+ */
+ for (int i = 0; i < chosen->child_count; i++) {
+ schedule_node *child = chosen->children[i];
+
+ child->unblocked_time = MAX2(child->unblocked_time,
+ time + chosen->child_latency[i]);
+
+ child->parent_count--;
+ if (child->parent_count == 0) {
+ instructions.push_tail(child);
+ }
+ }
+
+ /* Shared resource: the mathbox. There's one per EU (on later
+ * generations, it's even more limited pre-gen6), so if we send
+ * something off to it then the next math isn't going to make
+ * progress until the first is done.
+ */
+ if (chosen->inst->is_math()) {
+ foreach_iter(exec_list_iterator, iter, instructions) {
+ schedule_node *n = (schedule_node *)iter.get();
+
+ if (n->inst->is_math())
+ n->unblocked_time = MAX2(n->unblocked_time,
+ time + chosen->latency);
+ }
+ }
+ }
+
+ assert(instructions_to_schedule == 0);
+}
+
+void
+fs_visitor::schedule_instructions()
+{
+ fs_inst *next_block_header = (fs_inst *)instructions.head;
+ instruction_scheduler sched(this, mem_ctx, this->virtual_grf_next);
+
+ while (!next_block_header->is_tail_sentinel()) {
+ /* Add things to be scheduled until we get to a new BB. */
+ while (!next_block_header->is_tail_sentinel()) {
+ fs_inst *inst = next_block_header;
+ next_block_header = (fs_inst *)next_block_header->next;
+
+ sched.add_inst(inst);
+ if (inst->opcode == BRW_OPCODE_IF ||
+ inst->opcode == BRW_OPCODE_ELSE ||
+ inst->opcode == BRW_OPCODE_ENDIF ||
+ inst->opcode == BRW_OPCODE_DO ||
+ inst->opcode == BRW_OPCODE_WHILE ||
+ inst->opcode == BRW_OPCODE_BREAK ||
+ inst->opcode == BRW_OPCODE_CONTINUE) {
+ break;
+ }
+ }
+ sched.calculate_deps();
+ sched.schedule_instructions(next_block_header);
+ }
+
+ this->live_intervals_valid = false;
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
index 2480b1ea500..988208ff56e 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
@@ -113,8 +113,10 @@ nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti,
if (access & GL_MAP_WRITE_BIT)
flags |= NOUVEAU_BO_WR;
- ret = nouveau_bo_map(s->bo, flags);
- assert(!ret);
+ if (!s->bo->map) {
+ ret = nouveau_bo_map(s->bo, flags);
+ assert(!ret);
+ }
ti->Data = s->bo->map + y * s->pitch + x * s->cpp;
}
diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
index f6afb90d595..e173cce0860 100644
--- a/src/mesa/drivers/dri/r200/r200_state_init.c
+++ b/src/mesa/drivers/dri/r200/r200_state_init.c
@@ -587,7 +587,7 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
if (rrb) {
OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
- OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
index 24fb031ecb9..7adf9ad73ed 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -777,10 +777,9 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, type, format;
+ uint32_t internalFormat, format;
gl_format texFormat;
- type = GL_BGRA;
format = GL_UNSIGNED_BYTE;
internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
@@ -860,9 +859,20 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT)
| ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT);
- t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
- t->pp_txpitch = pitch_val;
- t->pp_txpitch -= 32;
+
+ if (target == GL_TEXTURE_RECTANGLE_NV) {
+ t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
+ t->pp_txpitch = pitch_val;
+ t->pp_txpitch -= 32;
+ } else {
+ t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
+ R200_TXFORMAT_HEIGHT_MASK |
+ R200_TXFORMAT_CUBIC_MAP_ENABLE |
+ R200_TXFORMAT_F5_WIDTH_MASK |
+ R200_TXFORMAT_F5_HEIGHT_MASK);
+ t->pp_txformat |= ((texImage->WidthLog2 << R200_TXFORMAT_WIDTH_SHIFT) |
+ (texImage->HeightLog2 << R200_TXFORMAT_HEIGHT_SHIFT));
+ }
t->validated = GL_TRUE;
_mesa_unlock_texture(radeon->glCtx, texObj);
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index 2a99ded5d67..fe4f0e48661 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -265,9 +265,9 @@ static void r600SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa
static void r600SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4])
{
t->TD_PS_SAMPLER0_BORDER_ALPHA = *((uint32_t*)&(color[3]));
- t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[2]));
+ t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[2]));
t->TD_PS_SAMPLER0_BORDER_GREEN = *((uint32_t*)&(color[1]));
- t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[0]));
+ t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[0]));
SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_REGISTER,
BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask);
}
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 2a6a39dfbac..0323e32d705 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -47,13 +47,13 @@
void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog)
{
static const gl_state_index winstate[STATE_LENGTH]
- = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0};
+ = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0};
struct prog_instruction *newInst, *inst;
GLint win_size; /* state reference */
GLuint wpos_temp; /* temp register */
int i, j;
- /* PARAM win_size = STATE_FB_SIZE */
+ /* PARAM win_size = STATE_FB_WPOS_Y_TRANSFORM */
win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
wpos_temp = fprog->Base.NumTemporaries++;
@@ -74,9 +74,8 @@ void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog)
_mesa_insert_instructions(&(fprog->Base), 0, 1);
newInst = fprog->Base.Instructions;
- /* invert wpos.y
- * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */
- newInst[0].Opcode = OPCODE_ADD;
+ /* possibly invert wpos.y depending on STATE_FB_WPOS_Y_TRANSFORM var */
+ newInst[0].Opcode = OPCODE_MAD;
newInst[0].DstReg.File = PROGRAM_TEMPORARY;
newInst[0].DstReg.Index = wpos_temp;
newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
@@ -84,11 +83,14 @@ void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog)
newInst[0].SrcReg[0].File = PROGRAM_INPUT;
newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
- newInst[0].SrcReg[0].Negate = NEGATE_Y;
newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
newInst[0].SrcReg[1].Index = win_size;
- newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
+ newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE);
+
+ newInst[0].SrcReg[2].File = PROGRAM_STATE_VAR;
+ newInst[0].SrcReg[2].Index = win_size;
+ newInst[0].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
}
@@ -509,6 +511,7 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx)
unsigned int ui, i;
unsigned int unNumOfReg;
unsigned int unBit;
+ unsigned int num_sq_ps_gprs;
GLuint exportCount;
GLboolean point_sprite = GL_FALSE;
@@ -619,6 +622,15 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx)
SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
+ num_sq_ps_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_PS_GPRS_mask) >> NUM_PS_GPRS_shift);
+
+ if(ui > num_sq_ps_gprs)
+ {
+ /* care! thich changes sq - needs idle state */
+ R600_STATECHANGE(context, sq);
+ SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, ui, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask);
+ }
+
CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 7ba49d8f986..7d4be9180a0 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -605,6 +605,7 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx)
struct gl_program_parameter_list *paramList;
unsigned int unNumParamData;
unsigned int ui;
+ unsigned int num_sq_vs_gprs;
if(GL_FALSE == vp->loaded)
{
@@ -656,6 +657,16 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx)
SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
NUM_GPRS_shift, NUM_GPRS_mask);
+ num_sq_vs_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_VS_GPRS_mask) >> NUM_VS_GPRS_shift);
+
+ if((vp->r700Shader.nRegs + 1) > num_sq_vs_gprs)
+ {
+ /* care! thich changes sq - needs idle state */
+ R600_STATECHANGE(context, sq);
+ SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, vp->r700Shader.nRegs + 1,
+ NUM_VS_GPRS_shift, NUM_VS_GPRS_mask);
+ }
+
if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
{
SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index a1124483a6c..819d9dd5750 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -532,7 +532,7 @@ void radeon_prepare_render(radeonContextPtr radeon)
/* Intel driver does the equivalent of this, no clue if it is needed:*/
draw = drawable->driverPrivate;
- radeon_draw_buffer(radeon->glCtx, &draw->base);
+ radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer);
driContext->dri2.draw_stamp = drawable->dri2.stamp;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
index 1c5326fe9dc..caf3f253d2a 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -60,7 +60,7 @@ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
- GLubyte *ptr = rrb->bo->ptr;
+ GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
GLint offset;
if (rrb->has_surface) {
offset = x * rrb->cpp + y * rrb->pitch;
@@ -85,7 +85,7 @@ static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
- GLubyte *ptr = rrb->bo->ptr;
+ GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
GLint offset;
if (rrb->has_surface) {
offset = x * rrb->cpp + y * rrb->pitch;
@@ -439,7 +439,7 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
- GLubyte *ptr = rrb->bo->ptr;
+ GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
GLint offset;
@@ -479,7 +479,7 @@ static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
- GLubyte *ptr = rrb->bo->ptr;
+ GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
GLint offset;
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
index ca42aa39474..e88e984354f 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -2091,6 +2091,9 @@ static GLboolean r100ValidateBuffers(struct gl_context *ctx)
continue;
t = rmesa->state.texture.unit[i].texobj;
+
+ if (!t)
+ continue;
if (t->image_override && t->bo)
radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->bo,
RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
index 698efb145c0..33b504cccf8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state_init.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
@@ -488,7 +488,7 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
if (rrb) {
OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
- OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 32c021cb545..9ba98e303a7 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -652,12 +652,11 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, type, format;
+ uint32_t internalFormat, format;
gl_format texFormat;
- type = GL_BGRA;
format = GL_UNSIGNED_BYTE;
- internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
+ internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA);
radeon = pDRICtx->driverPrivate;
rmesa = pDRICtx->driverPrivate;
@@ -739,6 +738,14 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form
t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
t->pp_txpitch = pitch_val;
t->pp_txpitch -= 32;
+ } else {
+ t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
+ RADEON_TXFORMAT_HEIGHT_MASK |
+ RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
+ RADEON_TXFORMAT_F5_WIDTH_MASK |
+ RADEON_TXFORMAT_F5_HEIGHT_MASK);
+ t->pp_txformat |= ((texImage->WidthLog2 << RADEON_TXFORMAT_WIDTH_SHIFT) |
+ (texImage->HeightLog2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
}
t->validated = GL_TRUE;
_mesa_unlock_texture(radeon->glCtx, texObj);
diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile
index f94aae85f2c..6b2a13c974f 100644
--- a/src/mesa/drivers/x11/Makefile
+++ b/src/mesa/drivers/x11/Makefile
@@ -47,6 +47,9 @@ INCLUDE_DIRS = \
CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mapi/glapi/libglapi.a
+ifeq ($(SHARED_GLAPI),1)
+GL_LIB_DEPS := -L$(TOP)/$(LIB_DIR) -l$(GLAPI_LIB) $(GL_LIB_DEPS)
+endif
.c.o:
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index ac9709db3f1..7c4652f747f 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -329,7 +329,7 @@ _mesa_validate_DrawArrays(struct gl_context *ctx,
GLboolean
_mesa_validate_DrawArraysInstanced(struct gl_context *ctx, GLenum mode, GLint first,
- GLsizei count, GLsizei primcount)
+ GLsizei count, GLsizei numInstances)
{
ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE);
@@ -346,10 +346,10 @@ _mesa_validate_DrawArraysInstanced(struct gl_context *ctx, GLenum mode, GLint fi
return GL_FALSE;
}
- if (primcount <= 0) {
- if (primcount < 0)
+ if (numInstances <= 0) {
+ if (numInstances < 0)
_mesa_error(ctx, GL_INVALID_VALUE,
- "glDrawArraysInstanced(primcount=%d)", primcount);
+ "glDrawArraysInstanced(numInstances=%d)", numInstances);
return GL_FALSE;
}
@@ -374,7 +374,7 @@ _mesa_validate_DrawArraysInstanced(struct gl_context *ctx, GLenum mode, GLint fi
GLboolean
_mesa_validate_DrawElementsInstanced(struct gl_context *ctx,
GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices, GLsizei primcount)
+ const GLvoid *indices, GLsizei numInstances)
{
ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE);
@@ -399,10 +399,10 @@ _mesa_validate_DrawElementsInstanced(struct gl_context *ctx,
return GL_FALSE;
}
- if (primcount <= 0) {
- if (primcount < 0)
+ if (numInstances <= 0) {
+ if (numInstances < 0)
_mesa_error(ctx, GL_INVALID_VALUE,
- "glDrawElementsInstanced(primcount=%d)", primcount);
+ "glDrawElementsInstanced(numInstances=%d)", numInstances);
return GL_FALSE;
}
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 958ea10a422..fe370fa369b 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -526,6 +526,16 @@ init_program_limits(GLenum type, struct gl_program_constants *prog)
prog->MaxNativeTemps = 0;
prog->MaxNativeAddressRegs = 0;
prog->MaxNativeParameters = 0;
+
+ /* Set GLSL datatype range/precision info assuming IEEE float values.
+ * Drivers should override these defaults as needed.
+ */
+ prog->MediumFloat.RangeMin = 127;
+ prog->MediumFloat.RangeMax = 127;
+ prog->MediumFloat.Precision = 23;
+ prog->LowFloat = prog->HighFloat = prog->MediumFloat;
+ /* assume ints are stored as floats for now */
+ prog->LowInt = prog->MediumInt = prog->HighInt = prog->MediumFloat;
}
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 2eede4268ca..749c30a4cc1 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -881,11 +881,6 @@ struct dd_function_table {
*/
void (*ValidateTnlModule)( struct gl_context *ctx, GLuint new_state );
-
-#define PRIM_OUTSIDE_BEGIN_END (GL_POLYGON+1)
-#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2)
-#define PRIM_UNKNOWN (GL_POLYGON+3)
-
/**
* Set by the driver-supplied T&L engine.
*
diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c
index 79aa53585f9..a6a909b48ce 100644
--- a/src/mesa/main/debug.c
+++ b/src/mesa/main/debug.c
@@ -37,26 +37,6 @@
#include "texobj.h"
-/**
- * Primitive names
- */
-const char *_mesa_prim_name[GL_POLYGON+4] = {
- "GL_POINTS",
- "GL_LINES",
- "GL_LINE_LOOP",
- "GL_LINE_STRIP",
- "GL_TRIANGLES",
- "GL_TRIANGLE_STRIP",
- "GL_TRIANGLE_FAN",
- "GL_QUADS",
- "GL_QUAD_STRIP",
- "GL_POLYGON",
- "outside begin/end",
- "inside unknown primitive",
- "unknown state"
-};
-
-
static const char *
tex_target_name(GLenum tgt)
{
diff --git a/src/mesa/main/enums.c b/src/mesa/main/enums.c
index c358fb246bc..83d7fb66c0a 100644
--- a/src/mesa/main/enums.c
+++ b/src/mesa/main/enums.c
@@ -29,6 +29,7 @@
#include "main/mfeatures.h"
#include "main/enums.h"
#include "main/imports.h"
+#include "main/mtypes.h"
typedef struct {
size_t offset;
@@ -6243,29 +6244,39 @@ const char *_mesa_lookup_enum_by_nr( int nr )
}
}
+/**
+ * Primitive names
+ */
+static const char *prim_names[PRIM_UNKNOWN + 1] = {
+ "GL_POINTS",
+ "GL_LINES",
+ "GL_LINE_LOOP",
+ "GL_LINE_STRIP",
+ "GL_TRIANGLES",
+ "GL_TRIANGLE_STRIP",
+ "GL_TRIANGLE_FAN",
+ "GL_QUADS",
+ "GL_QUAD_STRIP",
+ "GL_POLYGON",
+ "outside begin/end",
+ "inside unknown primitive",
+ "unknown state"
+};
+
+
/* Get the name of an enum given that it is a primitive type. Avoids
* GL_FALSE/GL_POINTS ambiguity and others.
*/
-const char *_mesa_lookup_prim_by_nr( int nr )
+const char *
+_mesa_lookup_prim_by_nr(unsigned nr)
{
- switch (nr) {
- case GL_POINTS: return "GL_POINTS";
- case GL_LINES: return "GL_LINES";
- case GL_LINE_STRIP: return "GL_LINE_STRIP";
- case GL_LINE_LOOP: return "GL_LINE_LOOP";
- case GL_TRIANGLES: return "GL_TRIANGLES";
- case GL_TRIANGLE_STRIP: return "GL_TRIANGLE_STRIP";
- case GL_TRIANGLE_FAN: return "GL_TRIANGLE_FAN";
- case GL_QUADS: return "GL_QUADS";
- case GL_QUAD_STRIP: return "GL_QUAD_STRIP";
- case GL_POLYGON: return "GL_POLYGON";
- case GL_POLYGON+1: return "OUTSIDE_BEGIN_END";
- default: return "<invalid>";
- }
+ if (nr < Elements(prim_names))
+ return prim_names[nr];
+ else
+ return "invalid mode";
}
-
int _mesa_lookup_enum_by_name( const char *symbol )
{
enum_elt * f = NULL;
diff --git a/src/mesa/main/enums.h b/src/mesa/main/enums.h
index c03cd34da92..7733df22f91 100644
--- a/src/mesa/main/enums.h
+++ b/src/mesa/main/enums.h
@@ -45,7 +45,7 @@ extern const char *_mesa_lookup_enum_by_nr( int nr );
/* Get the name of an enum given that it is a primitive type. Avoids
* GL_FALSE/GL_POINTS ambiguity and others.
*/
-const char *_mesa_lookup_prim_by_nr( int nr );
+const char *_mesa_lookup_prim_by_nr( unsigned nr );
extern int _mesa_lookup_enum_by_name( const char *symbol );
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index a6445b18368..49dad4d4024 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -121,6 +121,11 @@ struct st_context;
/*@}*/
+/** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */
+#define PRIM_OUTSIDE_BEGIN_END (GL_POLYGON+1)
+#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2)
+#define PRIM_UNKNOWN (GL_POLYGON+3)
+
/**
* Shader stages. Note that these will become 5 with tessellation.
@@ -296,8 +301,8 @@ typedef enum
/**
* Indexes for geometry program result attributes
*/
-/*@{*/
-typedef enum {
+typedef enum
+{
GEOM_RESULT_POS = 0,
GEOM_RESULT_COL0 = 1,
GEOM_RESULT_COL1 = 2,
@@ -320,7 +325,7 @@ typedef enum {
/* ### we need to -2 because var0 is 18 instead 16 like in the others */
GEOM_RESULT_MAX = (GEOM_RESULT_VAR0 + MAX_VARYING - 2)
} gl_geom_result;
-/*@}*/
+
/**
* Indexes for fragment program input attributes.
@@ -1322,7 +1327,7 @@ struct gl_texture_object
GLboolean _Complete; /**< Is texture object complete? */
GLboolean _RenderToTexture; /**< Any rendering to this texture? */
GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */
- GLenum sRGBDecode;
+ GLenum sRGBDecode; /**< GL_DECODE_EXT or GL_SKIP_DECODE_EXT */
/** Actual texture images, indexed by [cube face] and [mipmap level] */
struct gl_texture_image *Image[MAX_FACES][MAX_TEXTURE_LEVELS];
@@ -2549,6 +2554,17 @@ struct gl_framebuffer
/**
+ * Precision info for shader datatypes. See glGetShaderPrecisionFormat().
+ */
+struct gl_precision
+{
+ GLushort RangeMin; /**< min value exponent */
+ GLushort RangeMax; /**< max value exponent */
+ GLushort Precision; /**< number of mantissa bits */
+};
+
+
+/**
* Limits for vertex and fragment programs/shaders.
*/
struct gl_program_constants
@@ -2582,6 +2598,9 @@ struct gl_program_constants
GLuint MaxGeometryUniformComponents;
GLuint MaxGeometryOutputVertices;
GLuint MaxGeometryTotalOutputComponents;
+ /* ES 2.0 and GL_ARB_ES2_compatibility */
+ struct gl_precision LowFloat, MediumFloat, HighFloat;
+ struct gl_precision LowInt, MediumInt, HighInt;
};
@@ -3074,15 +3093,18 @@ struct gl_dlist_state
} Current;
};
+
/**
* Enum for the OpenGL APIs we know about and may support.
*/
-typedef enum {
+typedef enum
+{
API_OPENGL,
API_OPENGLES,
API_OPENGLES2
} gl_api;
+
/**
* Mesa rendering context.
*
@@ -3295,10 +3317,6 @@ struct gl_context
};
-/** The string names for GL_POINT, GL_LINE_LOOP, etc */
-extern const char *_mesa_prim_name[GL_POLYGON+4];
-
-
#ifdef DEBUG
extern int MESA_VERBOSE;
extern int MESA_DEBUG_FLAGS;
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 2ffd8be0eb4..e831175235e 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1628,12 +1628,51 @@ void GLAPIENTRY
_mesa_GetShaderPrecisionFormat(GLenum shadertype, GLenum precisiontype,
GLint* range, GLint* precision)
{
+ const struct gl_program_constants *limits;
+ const struct gl_precision *p;
GET_CURRENT_CONTEXT(ctx);
- (void) shadertype;
- (void) precisiontype;
- (void) range;
- (void) precision;
- _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__);
+
+ switch (shadertype) {
+ case GL_VERTEX_SHADER:
+ limits = &ctx->Const.VertexProgram;
+ break;
+ case GL_FRAGMENT_SHADER:
+ limits = &ctx->Const.FragmentProgram;
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glGetShaderPrecisionFormat(shadertype)");
+ return;
+ }
+
+ switch (precisiontype) {
+ case GL_LOW_FLOAT:
+ p = &limits->LowFloat;
+ break;
+ case GL_MEDIUM_FLOAT:
+ p = &limits->MediumFloat;
+ break;
+ case GL_HIGH_FLOAT:
+ p = &limits->HighFloat;
+ break;
+ case GL_LOW_INT:
+ p = &limits->LowInt;
+ break;
+ case GL_MEDIUM_INT:
+ p = &limits->MediumInt;
+ break;
+ case GL_HIGH_INT:
+ p = &limits->HighInt;
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glGetShaderPrecisionFormat(precisiontype)");
+ return;
+ }
+
+ range[0] = p->RangeMin;
+ range[1] = p->RangeMax;
+ precision[0] = p->Precision;
}
diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c
index ada6e356419..f984e2f1402 100644
--- a/src/mesa/program/register_allocate.c
+++ b/src/mesa/program/register_allocate.c
@@ -38,8 +38,10 @@
#include "register_allocate.h"
struct ra_reg {
- char *name;
GLboolean *conflicts;
+ unsigned int *conflict_list;
+ unsigned int conflict_list_size;
+ unsigned int num_conflicts;
};
struct ra_regs {
@@ -68,6 +70,7 @@ struct ra_class {
struct ra_node {
GLboolean *adjacency;
+ unsigned int *adjacency_list;
unsigned int class;
unsigned int adjacency_count;
unsigned int reg;
@@ -100,16 +103,39 @@ ra_alloc_reg_set(unsigned int count)
for (i = 0; i < count; i++) {
regs->regs[i].conflicts = talloc_zero_array(regs->regs, GLboolean, count);
regs->regs[i].conflicts[i] = GL_TRUE;
+
+ regs->regs[i].conflict_list = talloc_array(regs->regs, unsigned int, 4);
+ regs->regs[i].conflict_list_size = 4;
+ regs->regs[i].conflict_list[0] = i;
+ regs->regs[i].num_conflicts = 1;
}
return regs;
}
+static void
+ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2)
+{
+ struct ra_reg *reg1 = &regs->regs[r1];
+
+ if (reg1->conflict_list_size == reg1->num_conflicts) {
+ reg1->conflict_list_size *= 2;
+ reg1->conflict_list = talloc_realloc(regs,
+ reg1->conflict_list,
+ unsigned int,
+ reg1->conflict_list_size);
+ }
+ reg1->conflict_list[reg1->num_conflicts++] = r2;
+ reg1->conflicts[r2] = GL_TRUE;
+}
+
void
ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
{
- regs->regs[r1].conflicts[r2] = GL_TRUE;
- regs->regs[r2].conflicts[r1] = GL_TRUE;
+ if (!regs->regs[r1].conflicts[r2]) {
+ ra_add_conflict_list(regs, r1, r2);
+ ra_add_conflict_list(regs, r2, r1);
+ }
}
unsigned int
@@ -160,15 +186,15 @@ ra_set_finalize(struct ra_regs *regs)
int max_conflicts = 0;
for (rc = 0; rc < regs->count; rc++) {
- unsigned int rb;
int conflicts = 0;
+ int i;
if (!regs->classes[c]->regs[rc])
continue;
- for (rb = 0; rb < regs->count; rb++) {
- if (regs->classes[b]->regs[rb] &&
- regs->regs[rb].conflicts[rc])
+ for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
+ unsigned int rb = regs->regs[rc].conflict_list[i];
+ if (regs->classes[b]->regs[rb])
conflicts++;
}
max_conflicts = MAX2(max_conflicts, conflicts);
@@ -178,6 +204,14 @@ ra_set_finalize(struct ra_regs *regs)
}
}
+static void
+ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2)
+{
+ g->nodes[n1].adjacency[n2] = GL_TRUE;
+ g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2;
+ g->nodes[n1].adjacency_count++;
+}
+
struct ra_graph *
ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
{
@@ -193,7 +227,9 @@ ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
for (i = 0; i < count; i++) {
g->nodes[i].adjacency = talloc_zero_array(g, GLboolean, count);
- g->nodes[i].adjacency[i] = GL_TRUE;
+ g->nodes[i].adjacency_list = talloc_array(g, unsigned int, count);
+ g->nodes[i].adjacency_count = 0;
+ ra_add_node_adjacency(g, i, i);
g->nodes[i].reg = ~0;
}
@@ -211,13 +247,10 @@ void
ra_add_node_interference(struct ra_graph *g,
unsigned int n1, unsigned int n2)
{
- if (g->nodes[n1].adjacency[n2])
- return;
-
- g->nodes[n1].adjacency[n2] = GL_TRUE;
- g->nodes[n2].adjacency_count++;
- g->nodes[n2].adjacency[n1] = GL_TRUE;
- g->nodes[n2].adjacency_count++;
+ if (!g->nodes[n1].adjacency[n2]) {
+ ra_add_node_adjacency(g, n1, n2);
+ ra_add_node_adjacency(g, n2, n1);
+ }
}
static GLboolean pq_test(struct ra_graph *g, unsigned int n)
@@ -226,13 +259,12 @@ static GLboolean pq_test(struct ra_graph *g, unsigned int n)
unsigned int q = 0;
int n_class = g->nodes[n].class;
- for (j = 0; j < g->count; j++) {
- if (j == n || g->nodes[j].in_stack)
- continue;
+ for (j = 0; j < g->nodes[n].adjacency_count; j++) {
+ unsigned int n2 = g->nodes[n].adjacency_list[j];
+ unsigned int n2_class = g->nodes[n2].class;
- if (g->nodes[n].adjacency[j]) {
- unsigned int j_class = g->nodes[j].class;
- q += g->regs->classes[n_class]->q[j_class];
+ if (n != n2 && !g->nodes[n2].in_stack) {
+ q += g->regs->classes[n_class]->q[n2_class];
}
}
@@ -303,14 +335,15 @@ ra_select(struct ra_graph *g)
continue;
/* Check if any of our neighbors conflict with this register choice. */
- for (i = 0; i < g->count; i++) {
- if (g->nodes[n].adjacency[i] &&
- !g->nodes[i].in_stack &&
- g->regs->regs[r].conflicts[g->nodes[i].reg]) {
+ for (i = 0; i < g->nodes[n].adjacency_count; i++) {
+ unsigned int n2 = g->nodes[n].adjacency_list[i];
+
+ if (!g->nodes[n2].in_stack &&
+ g->regs->regs[r].conflicts[g->nodes[n2].reg]) {
break;
}
}
- if (i == g->count)
+ if (i == g->nodes[n].adjacency_count)
break;
}
if (r == g->regs->count)
@@ -368,17 +401,17 @@ ra_get_spill_benefit(struct ra_graph *g, unsigned int n)
float benefit = 0;
int n_class = g->nodes[n].class;
- /* Define the benefit of eliminating an interference between n, j
+ /* Define the benefit of eliminating an interference between n, n2
* through spilling as q(C, B) / p(C). This is similar to the
* "count number of edges" approach of traditional graph coloring,
* but takes classes into account.
*/
- for (j = 0; j < g->count; j++) {
- if (j != n && g->nodes[n].adjacency[j]) {
- unsigned int j_class = g->nodes[j].class;
- benefit += ((float)g->regs->classes[n_class]->q[j_class] /
+ for (j = 0; j < g->nodes[n].adjacency_count; j++) {
+ unsigned int n2 = g->nodes[n].adjacency_list[j];
+ if (n != n2) {
+ unsigned int n2_class = g->nodes[n2].class;
+ benefit += ((float)g->regs->classes[n_class]->q[n2_class] /
g->regs->classes[n_class]->p);
- break;
}
}
diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index 422ae43585b..fd03669e660 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -45,6 +45,7 @@
#include "util/u_inlines.h"
#include "cso_cache/cso_context.h"
+
/**
* Combine depth texture mode with "swizzle" so that depth mode swizzling
* takes place before texture swizzling, and return the resulting swizzle.
@@ -54,8 +55,8 @@
* \param swizzle Texture swizzle, a bitmask computed using MAKE_SWIZZLE4.
* \param depthmode One of GL_LUMINANCE, GL_INTENSITY, GL_ALPHA, GL_RED.
*/
-static GLuint apply_depthmode(enum pipe_format format,
- GLuint swizzle, GLenum depthmode)
+static GLuint
+apply_depthmode(enum pipe_format format, GLuint swizzle, GLenum depthmode)
{
const struct util_format_description *desc =
util_format_description(format);
@@ -109,6 +110,7 @@ static GLuint apply_depthmode(enum pipe_format format,
return MAKE_SWIZZLE4(swiz[0], swiz[1], swiz[2], swiz[3]);
}
+
/**
* Return TRUE if the swizzling described by "swizzle" and
* "depthmode" (for depth textures only) is different from the swizzling
@@ -118,8 +120,9 @@ static GLuint apply_depthmode(enum pipe_format format,
* \param swizzle Texture swizzle, a bitmask computed using MAKE_SWIZZLE4.
* \param depthmode One of GL_LUMINANCE, GL_INTENSITY, GL_ALPHA.
*/
-static boolean check_sampler_swizzle(struct pipe_sampler_view *sv,
- GLuint swizzle, GLenum depthmode)
+static boolean
+check_sampler_swizzle(struct pipe_sampler_view *sv,
+ GLuint swizzle, GLenum depthmode)
{
swizzle = apply_depthmode(sv->texture->format, swizzle, depthmode);
@@ -127,15 +130,15 @@ static boolean check_sampler_swizzle(struct pipe_sampler_view *sv,
(sv->swizzle_g != GET_SWZ(swizzle, 1)) ||
(sv->swizzle_b != GET_SWZ(swizzle, 2)) ||
(sv->swizzle_a != GET_SWZ(swizzle, 3)))
- return true;
- return false;
+ return TRUE;
+ return FALSE;
}
+
static INLINE struct pipe_sampler_view *
st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
struct st_texture_object *stObj,
enum pipe_format format)
-
{
struct pipe_sampler_view templ;
GLuint swizzle = apply_depthmode(stObj->pt->format,
@@ -161,19 +164,20 @@ static INLINE struct pipe_sampler_view *
st_get_texture_sampler_view_from_stobj(struct st_texture_object *stObj,
struct pipe_context *pipe,
enum pipe_format format)
-
{
if (!stObj || !stObj->pt) {
return NULL;
}
if (!stObj->sampler_view) {
- stObj->sampler_view = st_create_texture_sampler_view_from_stobj(pipe, stObj, format);
+ stObj->sampler_view =
+ st_create_texture_sampler_view_from_stobj(pipe, stObj, format);
}
return stObj->sampler_view;
}
+
static void
update_textures(struct st_context *st)
{
@@ -214,21 +218,29 @@ update_textures(struct st_context *st)
continue;
}
+ /* Determine the format of the texture sampler view */
st_view_format = stObj->pt->format;
{
- struct st_texture_image *firstImage;
- enum pipe_format firstImageFormat;
- firstImage = st_texture_image(stObj->base.Image[0][stObj->base.BaseLevel]);
-
- firstImageFormat = st_mesa_format_to_pipe_format(firstImage->base.TexFormat);
- if ((stObj->base.sRGBDecode == GL_SKIP_DECODE_EXT) && (_mesa_get_format_color_encoding(firstImage->base.TexFormat) == GL_SRGB)) {
- firstImageFormat = st_mesa_format_to_pipe_format(_mesa_get_srgb_format_linear(firstImage->base.TexFormat));
+ const struct st_texture_image *firstImage =
+ st_texture_image(stObj->base.Image[0][stObj->base.BaseLevel]);
+ const gl_format texFormat = firstImage->base.TexFormat;
+ enum pipe_format firstImageFormat =
+ st_mesa_format_to_pipe_format(texFormat);
+
+ if ((stObj->base.sRGBDecode == GL_SKIP_DECODE_EXT) &&
+ (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) {
+ /* don't do sRGB->RGB conversion. Interpret the texture
+ * texture data as linear values.
+ */
+ const gl_format linearFormat =
+ _mesa_get_srgb_format_linear(texFormat);
+ firstImageFormat = st_mesa_format_to_pipe_format(linearFormat);
}
if (firstImageFormat != stObj->pt->format)
st_view_format = firstImageFormat;
-
}
+
st->state.num_textures = su + 1;
/* if sampler view has changed dereference it */
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index abaf8235416..974fd78d7c8 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -432,6 +432,10 @@ void st_init_extensions(struct st_context *st)
ctx->Extensions.EXT_draw_buffers2 = GL_TRUE;
}
+ if (screen->get_param(screen, PIPE_CAP_INDEP_BLEND_FUNC)) {
+ ctx->Extensions.ARB_draw_buffers_blend = GL_TRUE;
+ }
+
/* GL_ARB_half_float_vertex */
if (screen->is_format_supported(screen, PIPE_FORMAT_R16G16B16A16_FLOAT,
PIPE_BUFFER, 0,
@@ -439,10 +443,6 @@ void st_init_extensions(struct st_context *st)
ctx->Extensions.ARB_half_float_vertex = GL_TRUE;
}
- if (screen->get_param(screen, PIPE_CAP_INDEP_BLEND_FUNC)) {
- ctx->Extensions.ARB_draw_buffers_blend = GL_TRUE;
- }
-
if (screen->get_shader_param(screen, PIPE_SHADER_GEOMETRY, PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
#if 0 /* XXX re-enable when GLSL compiler again supports geometry shaders */
ctx->Extensions.ARB_geometry_shader4 = GL_TRUE;
diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h
index c78901c0360..bca856d7142 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -125,9 +125,7 @@ st_create_texture_sampler_view(struct pipe_context *pipe,
{
struct pipe_sampler_view templ;
- u_sampler_view_default_template(&templ,
- texture,
- texture->format);
+ u_sampler_view_default_template(&templ, texture, texture->format);
return pipe->create_sampler_view(pipe, texture, &templ);
}
@@ -140,17 +138,15 @@ st_create_texture_sampler_view_format(struct pipe_context *pipe,
{
struct pipe_sampler_view templ;
- u_sampler_view_default_template(&templ,
- texture,
- format);
+ u_sampler_view_default_template(&templ, texture, format);
return pipe->create_sampler_view(pipe, texture, &templ);
}
+
static INLINE struct pipe_sampler_view *
st_get_texture_sampler_view(struct st_texture_object *stObj,
struct pipe_context *pipe)
-
{
if (!stObj || !stObj->pt) {
return NULL;
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index bdb893eba22..858b8281da3 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -468,6 +468,8 @@ void _tnl_draw_prims( struct gl_context *ctx,
break;
}
+ assert(prim[i].num_instances > 0);
+
/* Binding inputs may imply mapping some vertex buffer objects.
* They will need to be unmapped below.
*/
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index e221538bad4..37940efdc11 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -38,7 +38,7 @@ struct gl_client_array;
struct gl_context;
struct _mesa_prim {
- GLuint mode:8;
+ GLuint mode:8; /**< GL_POINTS, GL_LINES, GL_QUAD_STRIP, etc */
GLuint indexed:1;
GLuint begin:1;
GLuint end:1;
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index dd36cc32a70..80085c17c5c 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -629,15 +629,15 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
*/
static void GLAPIENTRY
vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
- GLsizei primcount)
+ GLsizei numInstances)
{
GET_CURRENT_CONTEXT(ctx);
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n",
- _mesa_lookup_enum_by_nr(mode), start, count, primcount);
+ _mesa_lookup_enum_by_nr(mode), start, count, numInstances);
- if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, primcount))
+ if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances))
return;
FLUSH_CURRENT( ctx, 0 );
@@ -649,7 +649,7 @@ vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
if (0)
check_draw_arrays_data(ctx, start, count);
- vbo_draw_arrays(ctx, mode, start, count, primcount);
+ vbo_draw_arrays(ctx, mode, start, count, numInstances);
if (0)
print_draw_arrays(ctx, mode, start, count);
@@ -724,7 +724,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
GLuint start, GLuint end,
GLsizei count, GLenum type,
const GLvoid *indices,
- GLint basevertex, GLint primcount)
+ GLint basevertex, GLint numInstances)
{
struct vbo_context *vbo = vbo_context(ctx);
struct vbo_exec_context *exec = &vbo->exec;
@@ -757,7 +757,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
prim[0].count = count;
prim[0].indexed = 1;
prim[0].basevertex = basevertex;
- prim[0].num_instances = primcount;
+ prim[0].num_instances = numInstances;
/* Need to give special consideration to rendering a range of
* indices starting somewhere above zero. Typically the
@@ -977,21 +977,21 @@ vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
*/
static void GLAPIENTRY
vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices, GLsizei primcount)
+ const GLvoid *indices, GLsizei numInstances)
{
GET_CURRENT_CONTEXT(ctx);
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n",
_mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices, primcount);
+ _mesa_lookup_enum_by_nr(type), indices, numInstances);
if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
- primcount))
+ numInstances))
return;
vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
- count, type, indices, 0, primcount);
+ count, type, indices, 0, numInstances);
}
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index 26d0046e83d..8c981f93e5c 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -222,6 +222,7 @@ begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
prim->mode = mode;
prim->begin = begin_flag;
+ prim->num_instances = 1;
}
diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c
index 789cf31364b..f6aa576b6c8 100644
--- a/src/mesa/vbo/vbo_split_inplace.c
+++ b/src/mesa/vbo/vbo_split_inplace.c
@@ -178,6 +178,7 @@ static void split_prims( struct split_context *split)
outprim->end = (nr == remaining && prim->end);
outprim->start = prim->start + j;
outprim->count = nr;
+ outprim->num_instances = prim->num_instances;
update_index_bounds(split, outprim);
@@ -221,6 +222,7 @@ static void split_prims( struct split_context *split)
tmpprim.indexed = 1;
tmpprim.start = 0;
tmpprim.count = count;
+ tmpprim.num_instances = 1;
flush_vertex(split);