diff options
author | Christian König <[email protected]> | 2011-01-20 22:43:18 +0100 |
---|---|---|
committer | Christian König <[email protected]> | 2011-01-20 22:43:18 +0100 |
commit | 78faf8d0e9c276a0ff1465e501d58fb3d66de2f7 (patch) | |
tree | 4e124bd6b511e408c5e113c4166b8fa97fd75b24 /src/mesa | |
parent | d2ff6b8715e817c1ef14d4bf12be58c19d894143 (diff) | |
parent | 37233f1ee0213a224611788bbab38840ba9f8308 (diff) |
Merge remote branch 'origin/master' into pipe-video
Conflicts:
src/gallium/drivers/r600/r600_asm.c
Diffstat (limited to 'src/mesa')
33 files changed, 868 insertions, 190 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 7c3ac0c14ef..b05ba35d65f 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -108,6 +108,7 @@ CXX_SOURCES = \ brw_fs.cpp \ brw_fs_channel_expressions.cpp \ brw_fs_reg_allocate.cpp \ + brw_fs_schedule_instructions.cpp \ brw_fs_vector_splitting.cpp ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a35687d5991..9a71e5377df 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -316,7 +316,6 @@ int fs_visitor::setup_uniform_values(int loc, const glsl_type *type) { unsigned int offset = 0; - float *vec_values; if (type->is_matrix()) { const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, @@ -335,7 +334,6 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_BOOL: - vec_values = fp->Base.Parameters->ParameterValues[loc]; for (unsigned int i = 0; i < type->vector_elements; i++) { unsigned int param = c->prog_data.nr_params++; @@ -359,8 +357,8 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) c->prog_data.param_convert[param] = PARAM_NO_CONVERT; break; } - - c->prog_data.param[param] = &vec_values[i]; + this->param_index[param] = loc; + this->param_offset[param] = i; } return 1; @@ -431,7 +429,6 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir) */ int index = _mesa_add_state_reference(this->fp->Base.Parameters, (gl_state_index *)tokens); - float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; /* Add each of the unique swizzles of the element as a * parameter. This'll end up matching the expected layout of @@ -446,7 +443,9 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir) c->prog_data.param_convert[c->prog_data.nr_params] = PARAM_NO_CONVERT; - c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = swiz; + c->prog_data.nr_params++; } } } @@ -1370,10 +1369,13 @@ fs_visitor::visit(ir_texture *ir) fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); GLuint index = _mesa_add_state_reference(params, (gl_state_index *)tokens); - float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; - c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0]; - c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1]; + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = 0; + c->prog_data.nr_params++; + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = 1; + c->prog_data.nr_params++; fs_reg dst = fs_reg(this, ir->coordinate->type); fs_reg src = coordinate; @@ -2500,6 +2502,22 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) } } +/** + * To be called after the last _mesa_add_state_reference() call, to + * set up prog_data.param[] for assign_curb_setup() and + * setup_pull_constants(). + */ +void +fs_visitor::setup_paramvalues_refs() +{ + /* Set up the pointers to ParamValues now that that array is finalized. */ + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + c->prog_data.param[i] = + fp->Base.Parameters->ParameterValues[this->param_index[i]] + + this->param_offset[i]; + } +} + void fs_visitor::assign_curb_setup() { @@ -2629,10 +2647,7 @@ fs_visitor::split_virtual_grfs() fs_inst *inst = (fs_inst *)iter.get(); /* Texturing produces 4 contiguous registers, so no splitting. */ - if ((inst->opcode == FS_OPCODE_TEX || - inst->opcode == FS_OPCODE_TXB || - inst->opcode == FS_OPCODE_TXL) && - inst->dst.file == GRF) { + if (inst->is_tex()) { split_grf[inst->dst.reg] = false; } } @@ -2920,7 +2935,7 @@ fs_visitor::propagate_constants() if (scan_inst->dst.file == GRF && scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || - scan_inst->opcode == FS_OPCODE_TEX)) { + scan_inst->is_tex())) { break; } } @@ -3015,13 +3030,13 @@ fs_visitor::register_coalesce() if (scan_inst->dst.file == GRF) { if (scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || - scan_inst->opcode == FS_OPCODE_TEX)) { + scan_inst->is_tex())) { interfered = true; break; } if (scan_inst->dst.reg == inst->src[0].reg && (scan_inst->dst.reg_offset == inst->src[0].reg_offset || - scan_inst->opcode == FS_OPCODE_TEX)) { + scan_inst->is_tex())) { interfered = true; break; } @@ -3102,7 +3117,7 @@ fs_visitor::compute_to_mrf() * into a compute-to-MRF. */ - if (scan_inst->opcode == FS_OPCODE_TEX) { + if (scan_inst->is_tex()) { /* texturing writes several continuous regs, so we can't * compute-to-mrf that. */ @@ -3123,14 +3138,7 @@ fs_visitor::compute_to_mrf() /* gen6 math instructions must have the destination be * GRF, so no compute-to-MRF for them. */ - if (scan_inst->opcode == FS_OPCODE_RCP || - scan_inst->opcode == FS_OPCODE_RSQ || - scan_inst->opcode == FS_OPCODE_SQRT || - scan_inst->opcode == FS_OPCODE_EXP2 || - scan_inst->opcode == FS_OPCODE_LOG2 || - scan_inst->opcode == FS_OPCODE_SIN || - scan_inst->opcode == FS_OPCODE_COS || - scan_inst->opcode == FS_OPCODE_POW) { + if (scan_inst->is_math()) { break; } } @@ -3152,6 +3160,7 @@ fs_visitor::compute_to_mrf() */ if (scan_inst->opcode == BRW_OPCODE_DO || scan_inst->opcode == BRW_OPCODE_WHILE || + scan_inst->opcode == BRW_OPCODE_ELSE || scan_inst->opcode == BRW_OPCODE_ENDIF) { break; } @@ -3238,7 +3247,7 @@ fs_visitor::remove_duplicate_mrf_writes() } if (inst->mlen > 0) { - /* Found a SEND instruction, which will include two of fewer + /* Found a SEND instruction, which will include two or fewer * implied MRF writes. We could do better here. */ for (int i = 0; i < implied_mrf_writes(inst); i++) { @@ -3662,10 +3671,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) v.emit_fb_writes(); v.split_virtual_grfs(); - v.setup_pull_constants(); - v.assign_curb_setup(); - v.assign_urb_setup(); + v.setup_paramvalues_refs(); + v.setup_pull_constants(); bool progress; do { @@ -3679,6 +3687,11 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) progress = v.dead_code_eliminate() || progress; } while (progress); + v.schedule_instructions(); + + v.assign_curb_setup(); + v.assign_urb_setup(); + if (0) { /* Debug of register spilling: Go spill everything. */ int virtual_grf_count = v.virtual_grf_next; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 82d96f6ac02..f0497957bc4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -306,6 +306,25 @@ public: offset == inst->offset); } + bool is_tex() + { + return (opcode == FS_OPCODE_TEX || + opcode == FS_OPCODE_TXB || + opcode == FS_OPCODE_TXL); + } + + bool is_math() + { + return (opcode == FS_OPCODE_RCP || + opcode == FS_OPCODE_RSQ || + opcode == FS_OPCODE_SQRT || + opcode == FS_OPCODE_EXP2 || + opcode == FS_OPCODE_LOG2 || + opcode == FS_OPCODE_SIN || + opcode == FS_OPCODE_COS || + opcode == FS_OPCODE_POW); + } + int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ fs_reg dst; fs_reg src[3]; @@ -412,6 +431,7 @@ public: void visit(ir_function_signature *ir); fs_inst *emit(fs_inst inst); + void setup_paramvalues_refs(); void assign_curb_setup(); void calculate_urb_setup(); void assign_urb_setup(); @@ -428,6 +448,8 @@ public: bool dead_code_eliminate(); bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); + void schedule_instructions(); + void generate_code(); void generate_fb_write(fs_inst *inst); void generate_linterp(fs_inst *inst, struct brw_reg dst, @@ -476,6 +498,12 @@ public: void *mem_ctx; exec_list instructions; + /* Delayed setup of c->prog_data.params[] due to realloc of + * ParamValues[] during compile. + */ + int param_index[MAX_UNIFORMS * 4]; + int param_offset[MAX_UNIFORMS * 4]; + int *virtual_grf_sizes; int virtual_grf_next; int virtual_grf_array_size; diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp new file mode 100644 index 00000000000..c8f0b27b76f --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -0,0 +1,489 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +extern "C" { + +#include <sys/types.h> + +#include "main/macros.h" +#include "main/shaderobj.h" +#include "main/uniforms.h" +#include "program/prog_optimize.h" +#include "program/register_allocate.h" +#include "program/sampler.h" +#include "program/hash_table.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "talloc.h" +} +#include "brw_fs.h" +#include "../glsl/glsl_types.h" +#include "../glsl/ir_optimization.h" +#include "../glsl/ir_print_visitor.h" + +/** @file brw_fs_schedule_instructions.cpp + * + * List scheduling of FS instructions. + * + * The basic model of the list scheduler is to take a basic block, + * compute a DAG of the dependencies (RAW ordering with latency, WAW + * ordering, WAR ordering), and make a list of the DAG heads. + * Heuristically pick a DAG head, then put all the children that are + * now DAG heads into the list of things to schedule. + * + * The heuristic is the important part. We're trying to be cheap, + * since actually computing the optimal scheduling is NP complete. + * What we do is track a "current clock". When we schedule a node, we + * update the earliest-unblocked clock time of its children, and + * increment the clock. Then, when trying to schedule, we just pick + * the earliest-unblocked instruction to schedule. + * + * Note that often there will be many things which could execute + * immediately, and there are a range of heuristic options to choose + * from in picking among those. + */ + +class schedule_node : public exec_node +{ +public: + schedule_node(fs_inst *inst) + { + this->inst = inst; + this->child_array_size = 0; + this->children = NULL; + this->child_latency = NULL; + this->child_count = 0; + this->parent_count = 0; + this->unblocked_time = 0; + + int chans = 8; + int math_latency = 22; + + switch (inst->opcode) { + case FS_OPCODE_RCP: + this->latency = 1 * chans * math_latency; + break; + case FS_OPCODE_RSQ: + this->latency = 2 * chans * math_latency; + break; + case FS_OPCODE_SQRT: + case FS_OPCODE_LOG2: + /* full precision log. partial is 2. */ + this->latency = 3 * chans * math_latency; + break; + case FS_OPCODE_EXP2: + /* full precision. partial is 3, same throughput. */ + this->latency = 4 * chans * math_latency; + break; + case FS_OPCODE_POW: + this->latency = 8 * chans * math_latency; + break; + case FS_OPCODE_SIN: + case FS_OPCODE_COS: + /* minimum latency, max is 12 rounds. */ + this->latency = 5 * chans * math_latency; + break; + default: + this->latency = 2; + break; + } + } + + fs_inst *inst; + schedule_node **children; + int *child_latency; + int child_count; + int parent_count; + int child_array_size; + int unblocked_time; + int latency; +}; + +class instruction_scheduler { +public: + instruction_scheduler(fs_visitor *v, void *mem_ctx, int virtual_grf_count) + { + this->v = v; + this->mem_ctx = talloc_new(mem_ctx); + this->virtual_grf_count = virtual_grf_count; + this->instructions.make_empty(); + this->instructions_to_schedule = 0; + } + + ~instruction_scheduler() + { + talloc_free(this->mem_ctx); + } + void add_barrier_deps(schedule_node *n); + void add_dep(schedule_node *before, schedule_node *after, int latency); + + void add_inst(fs_inst *inst); + void calculate_deps(); + void schedule_instructions(fs_inst *next_block_header); + + void *mem_ctx; + + int instructions_to_schedule; + int virtual_grf_count; + exec_list instructions; + fs_visitor *v; +}; + +void +instruction_scheduler::add_inst(fs_inst *inst) +{ + schedule_node *n = new(mem_ctx) schedule_node(inst); + + assert(!inst->is_head_sentinel()); + assert(!inst->is_tail_sentinel()); + + this->instructions_to_schedule++; + + inst->remove(); + instructions.push_tail(n); +} + +/** + * Add a dependency between two instruction nodes. + * + * The @after node will be scheduled after @before. We will try to + * schedule it @latency cycles after @before, but no guarantees there. + */ +void +instruction_scheduler::add_dep(schedule_node *before, schedule_node *after, + int latency) +{ + if (!before || !after) + return; + + assert(before != after); + + for (int i = 0; i < before->child_count; i++) { + if (before->children[i] == after) { + before->child_latency[i] = MAX2(before->child_latency[i], latency); + return; + } + } + + if (before->child_array_size <= before->child_count) { + if (before->child_array_size < 16) + before->child_array_size = 16; + else + before->child_array_size *= 2; + + before->children = talloc_realloc(mem_ctx, before->children, + schedule_node *, + before->child_array_size); + before->child_latency = talloc_realloc(mem_ctx, before->child_latency, + int, before->child_array_size); + } + + before->children[before->child_count] = after; + before->child_latency[before->child_count] = latency; + before->child_count++; + after->parent_count++; +} + +/** + * Sometimes we really want this node to execute after everything that + * was before it and before everything that followed it. This adds + * the deps to do so. + */ +void +instruction_scheduler::add_barrier_deps(schedule_node *n) +{ + schedule_node *prev = (schedule_node *)n->prev; + schedule_node *next = (schedule_node *)n->next; + + if (prev) { + while (!prev->is_head_sentinel()) { + add_dep(prev, n, 0); + prev = (schedule_node *)prev->prev; + } + } + + if (next) { + while (!next->is_tail_sentinel()) { + add_dep(n, next, 0); + next = (schedule_node *)next->next; + } + } +} + +void +instruction_scheduler::calculate_deps() +{ + schedule_node *last_grf_write[virtual_grf_count]; + schedule_node *last_mrf_write[BRW_MAX_MRF]; + schedule_node *last_conditional_mod = NULL; + + /* The last instruction always needs to still be the last + * instruction. Either it's flow control (IF, ELSE, ENDIF, DO, + * WHILE) and scheduling other things after it would disturb the + * basic block, or it's FB_WRITE and we should do a better job at + * dead code elimination anyway. + */ + schedule_node *last = (schedule_node *)instructions.get_tail(); + add_barrier_deps(last); + + memset(last_grf_write, 0, sizeof(last_grf_write)); + memset(last_mrf_write, 0, sizeof(last_mrf_write)); + + /* top-to-bottom dependencies: RAW and WAW. */ + foreach_iter(exec_list_iterator, iter, instructions) { + schedule_node *n = (schedule_node *)iter.get(); + fs_inst *inst = n->inst; + + /* read-after-write deps. */ + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { + if (last_grf_write[inst->src[i].reg]) { + add_dep(last_grf_write[inst->src[i].reg], n, + last_grf_write[inst->src[i].reg]->latency); + } + } else if (inst->src[i].file != BAD_FILE && + inst->src[i].file != IMM && + inst->src[i].file != UNIFORM) { + assert(inst->src[i].file != MRF); + add_barrier_deps(n); + } + } + + for (int i = 0; i < inst->mlen; i++) { + /* It looks like the MRF regs are released in the send + * instruction once it's sent, not when the result comes + * back. + */ + if (last_mrf_write[inst->base_mrf + i]) { + add_dep(last_mrf_write[inst->base_mrf + i], n, + last_mrf_write[inst->base_mrf + i]->latency); + } + } + + if (inst->predicated) { + assert(last_conditional_mod); + add_dep(last_conditional_mod, n, last_conditional_mod->latency); + } + + /* write-after-write deps. */ + if (inst->dst.file == GRF) { + if (last_grf_write[inst->dst.reg]) { + add_dep(last_grf_write[inst->dst.reg], n, + last_grf_write[inst->dst.reg]->latency); + } + last_grf_write[inst->dst.reg] = n; + } else if (inst->dst.file == MRF) { + if (last_mrf_write[inst->dst.hw_reg]) { + add_dep(last_mrf_write[inst->dst.hw_reg], n, + last_mrf_write[inst->dst.hw_reg]->latency); + } + last_mrf_write[inst->dst.hw_reg] = n; + } else if (inst->dst.file != BAD_FILE) { + add_barrier_deps(n); + } + + if (inst->mlen > 0) { + for (int i = 0; i < v->implied_mrf_writes(inst); i++) { + if (last_mrf_write[inst->base_mrf + i]) { + add_dep(last_mrf_write[inst->base_mrf + i], n, + last_mrf_write[inst->base_mrf + i]->latency); + } + last_mrf_write[inst->base_mrf + i] = n; + } + } + + if (inst->conditional_mod) { + add_dep(last_conditional_mod, n, 0); + last_conditional_mod = n; + } + } + + /* bottom-to-top dependencies: WAR */ + memset(last_grf_write, 0, sizeof(last_grf_write)); + memset(last_mrf_write, 0, sizeof(last_mrf_write)); + last_conditional_mod = NULL; + + exec_node *node; + exec_node *prev; + for (node = instructions.get_tail(), prev = node->prev; + !node->is_head_sentinel(); + node = prev, prev = node->prev) { + schedule_node *n = (schedule_node *)node; + fs_inst *inst = n->inst; + + /* write-after-read deps. */ + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { + if (last_grf_write[inst->src[i].reg]) { + add_dep(n, last_grf_write[inst->src[i].reg], n->latency); + } + } else if (inst->src[i].file != BAD_FILE && + inst->src[i].file != IMM && + inst->src[i].file != UNIFORM) { + assert(inst->src[i].file != MRF); + add_barrier_deps(n); + } + } + + for (int i = 0; i < inst->mlen; i++) { + /* It looks like the MRF regs are released in the send + * instruction once it's sent, not when the result comes + * back. + */ + add_dep(n, last_mrf_write[inst->base_mrf + i], 2); + } + + if (inst->predicated) { + if (last_conditional_mod) { + add_dep(n, last_conditional_mod, n->latency); + } + } + + /* Update the things this instruction wrote, so earlier reads + * can mark this as WAR dependency. + */ + if (inst->dst.file == GRF) { + last_grf_write[inst->dst.reg] = n; + } else if (inst->dst.file == MRF) { + last_mrf_write[inst->dst.hw_reg] = n; + } else if (inst->dst.file != BAD_FILE) { + add_barrier_deps(n); + } + + if (inst->mlen > 0) { + for (int i = 0; i < v->implied_mrf_writes(inst); i++) { + last_mrf_write[inst->base_mrf + i] = n; + } + } + + if (inst->conditional_mod) + last_conditional_mod = n; + } +} + +void +instruction_scheduler::schedule_instructions(fs_inst *next_block_header) +{ + int time = 0; + + /* Remove non-DAG heads from the list. */ + foreach_iter(exec_list_iterator, iter, instructions) { + schedule_node *n = (schedule_node *)iter.get(); + if (n->parent_count != 0) + n->remove(); + } + + while (!instructions.is_empty()) { + schedule_node *chosen = NULL; + int chosen_time = 0; + + foreach_iter(exec_list_iterator, iter, instructions) { + schedule_node *n = (schedule_node *)iter.get(); + + if (!chosen || n->unblocked_time < chosen_time) { + chosen = n; + chosen_time = n->unblocked_time; + } + } + + /* Schedule this instruction. */ + assert(chosen); + chosen->remove(); + next_block_header->insert_before(chosen->inst); + instructions_to_schedule--; + + /* Bump the clock. If we expected a delay for scheduling, then + * bump the clock to reflect that. + */ + time = MAX2(time + 1, chosen_time); + + /* Now that we've scheduled a new instruction, some of its + * children can be promoted to the list of instructions ready to + * be scheduled. Update the children's unblocked time for this + * DAG edge as we do so. + */ + for (int i = 0; i < chosen->child_count; i++) { + schedule_node *child = chosen->children[i]; + + child->unblocked_time = MAX2(child->unblocked_time, + time + chosen->child_latency[i]); + + child->parent_count--; + if (child->parent_count == 0) { + instructions.push_tail(child); + } + } + + /* Shared resource: the mathbox. There's one per EU (on later + * generations, it's even more limited pre-gen6), so if we send + * something off to it then the next math isn't going to make + * progress until the first is done. + */ + if (chosen->inst->is_math()) { + foreach_iter(exec_list_iterator, iter, instructions) { + schedule_node *n = (schedule_node *)iter.get(); + + if (n->inst->is_math()) + n->unblocked_time = MAX2(n->unblocked_time, + time + chosen->latency); + } + } + } + + assert(instructions_to_schedule == 0); +} + +void +fs_visitor::schedule_instructions() +{ + fs_inst *next_block_header = (fs_inst *)instructions.head; + instruction_scheduler sched(this, mem_ctx, this->virtual_grf_next); + + while (!next_block_header->is_tail_sentinel()) { + /* Add things to be scheduled until we get to a new BB. */ + while (!next_block_header->is_tail_sentinel()) { + fs_inst *inst = next_block_header; + next_block_header = (fs_inst *)next_block_header->next; + + sched.add_inst(inst); + if (inst->opcode == BRW_OPCODE_IF || + inst->opcode == BRW_OPCODE_ELSE || + inst->opcode == BRW_OPCODE_ENDIF || + inst->opcode == BRW_OPCODE_DO || + inst->opcode == BRW_OPCODE_WHILE || + inst->opcode == BRW_OPCODE_BREAK || + inst->opcode == BRW_OPCODE_CONTINUE) { + break; + } + } + sched.calculate_deps(); + sched.schedule_instructions(next_block_header); + } + + this->live_intervals_valid = false; +} diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c index 2480b1ea500..988208ff56e 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c @@ -113,8 +113,10 @@ nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti, if (access & GL_MAP_WRITE_BIT) flags |= NOUVEAU_BO_WR; - ret = nouveau_bo_map(s->bo, flags); - assert(!ret); + if (!s->bo->map) { + ret = nouveau_bo_map(s->bo, flags); + assert(!ret); + } ti->Data = s->bo->map + y * s->pitch + x * s->cpp; } diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index f6afb90d595..e173cce0860 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -587,7 +587,7 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom) if (rrb) { OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0)); - OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 24fb031ecb9..7adf9ad73ed 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -777,10 +777,9 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; + uint32_t internalFormat, format; gl_format texFormat; - type = GL_BGRA; format = GL_UNSIGNED_BYTE; internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); @@ -860,9 +859,20 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT) | ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT); - t->pp_txformat |= R200_TXFORMAT_NON_POWER2; - t->pp_txpitch = pitch_val; - t->pp_txpitch -= 32; + + if (target == GL_TEXTURE_RECTANGLE_NV) { + t->pp_txformat |= R200_TXFORMAT_NON_POWER2; + t->pp_txpitch = pitch_val; + t->pp_txpitch -= 32; + } else { + t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK | + R200_TXFORMAT_HEIGHT_MASK | + R200_TXFORMAT_CUBIC_MAP_ENABLE | + R200_TXFORMAT_F5_WIDTH_MASK | + R200_TXFORMAT_F5_HEIGHT_MASK); + t->pp_txformat |= ((texImage->WidthLog2 << R200_TXFORMAT_WIDTH_SHIFT) | + (texImage->HeightLog2 << R200_TXFORMAT_HEIGHT_SHIFT)); + } t->validated = GL_TRUE; _mesa_unlock_texture(radeon->glCtx, texObj); diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index 2a99ded5d67..fe4f0e48661 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -265,9 +265,9 @@ static void r600SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa static void r600SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4]) { t->TD_PS_SAMPLER0_BORDER_ALPHA = *((uint32_t*)&(color[3])); - t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[2])); + t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[2])); t->TD_PS_SAMPLER0_BORDER_GREEN = *((uint32_t*)&(color[1])); - t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[0])); + t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[0])); SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_REGISTER, BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask); } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 2a6a39dfbac..0323e32d705 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -47,13 +47,13 @@ void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog) { static const gl_state_index winstate[STATE_LENGTH] - = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0}; + = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0}; struct prog_instruction *newInst, *inst; GLint win_size; /* state reference */ GLuint wpos_temp; /* temp register */ int i, j; - /* PARAM win_size = STATE_FB_SIZE */ + /* PARAM win_size = STATE_FB_WPOS_Y_TRANSFORM */ win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate); wpos_temp = fprog->Base.NumTemporaries++; @@ -74,9 +74,8 @@ void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog) _mesa_insert_instructions(&(fprog->Base), 0, 1); newInst = fprog->Base.Instructions; - /* invert wpos.y - * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */ - newInst[0].Opcode = OPCODE_ADD; + /* possibly invert wpos.y depending on STATE_FB_WPOS_Y_TRANSFORM var */ + newInst[0].Opcode = OPCODE_MAD; newInst[0].DstReg.File = PROGRAM_TEMPORARY; newInst[0].DstReg.Index = wpos_temp; newInst[0].DstReg.WriteMask = WRITEMASK_XYZW; @@ -84,11 +83,14 @@ void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog) newInst[0].SrcReg[0].File = PROGRAM_INPUT; newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS; newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW; - newInst[0].SrcReg[0].Negate = NEGATE_Y; newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR; newInst[0].SrcReg[1].Index = win_size; - newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO); + newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE); + + newInst[0].SrcReg[2].File = PROGRAM_STATE_VAR; + newInst[0].SrcReg[2].Index = win_size; + newInst[0].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO); } @@ -509,6 +511,7 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) unsigned int ui, i; unsigned int unNumOfReg; unsigned int unBit; + unsigned int num_sq_ps_gprs; GLuint exportCount; GLboolean point_sprite = GL_FALSE; @@ -619,6 +622,15 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); + num_sq_ps_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_PS_GPRS_mask) >> NUM_PS_GPRS_shift); + + if(ui > num_sq_ps_gprs) + { + /* care! thich changes sq - needs idle state */ + R600_STATECHANGE(context, sq); + SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, ui, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask); + } + CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit); if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 7ba49d8f986..7d4be9180a0 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -605,6 +605,7 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx) struct gl_program_parameter_list *paramList; unsigned int unNumParamData; unsigned int ui; + unsigned int num_sq_vs_gprs; if(GL_FALSE == vp->loaded) { @@ -656,6 +657,16 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx) SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1, NUM_GPRS_shift, NUM_GPRS_mask); + num_sq_vs_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_VS_GPRS_mask) >> NUM_VS_GPRS_shift); + + if((vp->r700Shader.nRegs + 1) > num_sq_vs_gprs) + { + /* care! thich changes sq - needs idle state */ + R600_STATECHANGE(context, sq); + SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, vp->r700Shader.nRegs + 1, + NUM_VS_GPRS_shift, NUM_VS_GPRS_mask); + } + if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ { SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize, diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index a1124483a6c..819d9dd5750 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -532,7 +532,7 @@ void radeon_prepare_render(radeonContextPtr radeon) /* Intel driver does the equivalent of this, no clue if it is needed:*/ draw = drawable->driverPrivate; - radeon_draw_buffer(radeon->glCtx, &draw->base); + radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer); driContext->dri2.draw_stamp = drawable->dri2.stamp; } diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index 1c5326fe9dc..caf3f253d2a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -60,7 +60,7 @@ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { - GLubyte *ptr = rrb->bo->ptr; + GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset; GLint offset; if (rrb->has_surface) { offset = x * rrb->cpp + y * rrb->pitch; @@ -85,7 +85,7 @@ static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb, static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { - GLubyte *ptr = rrb->bo->ptr; + GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset; GLint offset; if (rrb->has_surface) { offset = x * rrb->cpp + y * rrb->pitch; @@ -439,7 +439,7 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb, static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { - GLubyte *ptr = rrb->bo->ptr; + GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset; uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; GLint offset; @@ -479,7 +479,7 @@ static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb, static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { - GLubyte *ptr = rrb->bo->ptr; + GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset; uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; GLint offset; diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index ca42aa39474..e88e984354f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -2091,6 +2091,9 @@ static GLboolean r100ValidateBuffers(struct gl_context *ctx) continue; t = rmesa->state.texture.unit[i].texobj; + + if (!t) + continue; if (t->image_override && t->bo) radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c index 698efb145c0..33b504cccf8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -488,7 +488,7 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom) if (rrb) { OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0)); - OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 32c021cb545..9ba98e303a7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -652,12 +652,11 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; + uint32_t internalFormat, format; gl_format texFormat; - type = GL_BGRA; format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); + internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA); radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; @@ -739,6 +738,14 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2; t->pp_txpitch = pitch_val; t->pp_txpitch -= 32; + } else { + t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK | + RADEON_TXFORMAT_HEIGHT_MASK | + RADEON_TXFORMAT_CUBIC_MAP_ENABLE | + RADEON_TXFORMAT_F5_WIDTH_MASK | + RADEON_TXFORMAT_F5_HEIGHT_MASK); + t->pp_txformat |= ((texImage->WidthLog2 << RADEON_TXFORMAT_WIDTH_SHIFT) | + (texImage->HeightLog2 << RADEON_TXFORMAT_HEIGHT_SHIFT)); } t->validated = GL_TRUE; _mesa_unlock_texture(radeon->glCtx, texObj); diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile index f94aae85f2c..6b2a13c974f 100644 --- a/src/mesa/drivers/x11/Makefile +++ b/src/mesa/drivers/x11/Makefile @@ -47,6 +47,9 @@ INCLUDE_DIRS = \ CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mapi/glapi/libglapi.a +ifeq ($(SHARED_GLAPI),1) +GL_LIB_DEPS := -L$(TOP)/$(LIB_DIR) -l$(GLAPI_LIB) $(GL_LIB_DEPS) +endif .c.o: diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index ac9709db3f1..7c4652f747f 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -329,7 +329,7 @@ _mesa_validate_DrawArrays(struct gl_context *ctx, GLboolean _mesa_validate_DrawArraysInstanced(struct gl_context *ctx, GLenum mode, GLint first, - GLsizei count, GLsizei primcount) + GLsizei count, GLsizei numInstances) { ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE); @@ -346,10 +346,10 @@ _mesa_validate_DrawArraysInstanced(struct gl_context *ctx, GLenum mode, GLint fi return GL_FALSE; } - if (primcount <= 0) { - if (primcount < 0) + if (numInstances <= 0) { + if (numInstances < 0) _mesa_error(ctx, GL_INVALID_VALUE, - "glDrawArraysInstanced(primcount=%d)", primcount); + "glDrawArraysInstanced(numInstances=%d)", numInstances); return GL_FALSE; } @@ -374,7 +374,7 @@ _mesa_validate_DrawArraysInstanced(struct gl_context *ctx, GLenum mode, GLint fi GLboolean _mesa_validate_DrawElementsInstanced(struct gl_context *ctx, GLenum mode, GLsizei count, GLenum type, - const GLvoid *indices, GLsizei primcount) + const GLvoid *indices, GLsizei numInstances) { ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE); @@ -399,10 +399,10 @@ _mesa_validate_DrawElementsInstanced(struct gl_context *ctx, return GL_FALSE; } - if (primcount <= 0) { - if (primcount < 0) + if (numInstances <= 0) { + if (numInstances < 0) _mesa_error(ctx, GL_INVALID_VALUE, - "glDrawElementsInstanced(primcount=%d)", primcount); + "glDrawElementsInstanced(numInstances=%d)", numInstances); return GL_FALSE; } diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 958ea10a422..fe370fa369b 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -526,6 +526,16 @@ init_program_limits(GLenum type, struct gl_program_constants *prog) prog->MaxNativeTemps = 0; prog->MaxNativeAddressRegs = 0; prog->MaxNativeParameters = 0; + + /* Set GLSL datatype range/precision info assuming IEEE float values. + * Drivers should override these defaults as needed. + */ + prog->MediumFloat.RangeMin = 127; + prog->MediumFloat.RangeMax = 127; + prog->MediumFloat.Precision = 23; + prog->LowFloat = prog->HighFloat = prog->MediumFloat; + /* assume ints are stored as floats for now */ + prog->LowInt = prog->MediumInt = prog->HighInt = prog->MediumFloat; } diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 2eede4268ca..749c30a4cc1 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -881,11 +881,6 @@ struct dd_function_table { */ void (*ValidateTnlModule)( struct gl_context *ctx, GLuint new_state ); - -#define PRIM_OUTSIDE_BEGIN_END (GL_POLYGON+1) -#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2) -#define PRIM_UNKNOWN (GL_POLYGON+3) - /** * Set by the driver-supplied T&L engine. * diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c index 79aa53585f9..a6a909b48ce 100644 --- a/src/mesa/main/debug.c +++ b/src/mesa/main/debug.c @@ -37,26 +37,6 @@ #include "texobj.h" -/** - * Primitive names - */ -const char *_mesa_prim_name[GL_POLYGON+4] = { - "GL_POINTS", - "GL_LINES", - "GL_LINE_LOOP", - "GL_LINE_STRIP", - "GL_TRIANGLES", - "GL_TRIANGLE_STRIP", - "GL_TRIANGLE_FAN", - "GL_QUADS", - "GL_QUAD_STRIP", - "GL_POLYGON", - "outside begin/end", - "inside unknown primitive", - "unknown state" -}; - - static const char * tex_target_name(GLenum tgt) { diff --git a/src/mesa/main/enums.c b/src/mesa/main/enums.c index c358fb246bc..83d7fb66c0a 100644 --- a/src/mesa/main/enums.c +++ b/src/mesa/main/enums.c @@ -29,6 +29,7 @@ #include "main/mfeatures.h" #include "main/enums.h" #include "main/imports.h" +#include "main/mtypes.h" typedef struct { size_t offset; @@ -6243,29 +6244,39 @@ const char *_mesa_lookup_enum_by_nr( int nr ) } } +/** + * Primitive names + */ +static const char *prim_names[PRIM_UNKNOWN + 1] = { + "GL_POINTS", + "GL_LINES", + "GL_LINE_LOOP", + "GL_LINE_STRIP", + "GL_TRIANGLES", + "GL_TRIANGLE_STRIP", + "GL_TRIANGLE_FAN", + "GL_QUADS", + "GL_QUAD_STRIP", + "GL_POLYGON", + "outside begin/end", + "inside unknown primitive", + "unknown state" +}; + + /* Get the name of an enum given that it is a primitive type. Avoids * GL_FALSE/GL_POINTS ambiguity and others. */ -const char *_mesa_lookup_prim_by_nr( int nr ) +const char * +_mesa_lookup_prim_by_nr(unsigned nr) { - switch (nr) { - case GL_POINTS: return "GL_POINTS"; - case GL_LINES: return "GL_LINES"; - case GL_LINE_STRIP: return "GL_LINE_STRIP"; - case GL_LINE_LOOP: return "GL_LINE_LOOP"; - case GL_TRIANGLES: return "GL_TRIANGLES"; - case GL_TRIANGLE_STRIP: return "GL_TRIANGLE_STRIP"; - case GL_TRIANGLE_FAN: return "GL_TRIANGLE_FAN"; - case GL_QUADS: return "GL_QUADS"; - case GL_QUAD_STRIP: return "GL_QUAD_STRIP"; - case GL_POLYGON: return "GL_POLYGON"; - case GL_POLYGON+1: return "OUTSIDE_BEGIN_END"; - default: return "<invalid>"; - } + if (nr < Elements(prim_names)) + return prim_names[nr]; + else + return "invalid mode"; } - int _mesa_lookup_enum_by_name( const char *symbol ) { enum_elt * f = NULL; diff --git a/src/mesa/main/enums.h b/src/mesa/main/enums.h index c03cd34da92..7733df22f91 100644 --- a/src/mesa/main/enums.h +++ b/src/mesa/main/enums.h @@ -45,7 +45,7 @@ extern const char *_mesa_lookup_enum_by_nr( int nr ); /* Get the name of an enum given that it is a primitive type. Avoids * GL_FALSE/GL_POINTS ambiguity and others. */ -const char *_mesa_lookup_prim_by_nr( int nr ); +const char *_mesa_lookup_prim_by_nr( unsigned nr ); extern int _mesa_lookup_enum_by_name( const char *symbol ); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index a6445b18368..49dad4d4024 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -121,6 +121,11 @@ struct st_context; /*@}*/ +/** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */ +#define PRIM_OUTSIDE_BEGIN_END (GL_POLYGON+1) +#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2) +#define PRIM_UNKNOWN (GL_POLYGON+3) + /** * Shader stages. Note that these will become 5 with tessellation. @@ -296,8 +301,8 @@ typedef enum /** * Indexes for geometry program result attributes */ -/*@{*/ -typedef enum { +typedef enum +{ GEOM_RESULT_POS = 0, GEOM_RESULT_COL0 = 1, GEOM_RESULT_COL1 = 2, @@ -320,7 +325,7 @@ typedef enum { /* ### we need to -2 because var0 is 18 instead 16 like in the others */ GEOM_RESULT_MAX = (GEOM_RESULT_VAR0 + MAX_VARYING - 2) } gl_geom_result; -/*@}*/ + /** * Indexes for fragment program input attributes. @@ -1322,7 +1327,7 @@ struct gl_texture_object GLboolean _Complete; /**< Is texture object complete? */ GLboolean _RenderToTexture; /**< Any rendering to this texture? */ GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */ - GLenum sRGBDecode; + GLenum sRGBDecode; /**< GL_DECODE_EXT or GL_SKIP_DECODE_EXT */ /** Actual texture images, indexed by [cube face] and [mipmap level] */ struct gl_texture_image *Image[MAX_FACES][MAX_TEXTURE_LEVELS]; @@ -2549,6 +2554,17 @@ struct gl_framebuffer /** + * Precision info for shader datatypes. See glGetShaderPrecisionFormat(). + */ +struct gl_precision +{ + GLushort RangeMin; /**< min value exponent */ + GLushort RangeMax; /**< max value exponent */ + GLushort Precision; /**< number of mantissa bits */ +}; + + +/** * Limits for vertex and fragment programs/shaders. */ struct gl_program_constants @@ -2582,6 +2598,9 @@ struct gl_program_constants GLuint MaxGeometryUniformComponents; GLuint MaxGeometryOutputVertices; GLuint MaxGeometryTotalOutputComponents; + /* ES 2.0 and GL_ARB_ES2_compatibility */ + struct gl_precision LowFloat, MediumFloat, HighFloat; + struct gl_precision LowInt, MediumInt, HighInt; }; @@ -3074,15 +3093,18 @@ struct gl_dlist_state } Current; }; + /** * Enum for the OpenGL APIs we know about and may support. */ -typedef enum { +typedef enum +{ API_OPENGL, API_OPENGLES, API_OPENGLES2 } gl_api; + /** * Mesa rendering context. * @@ -3295,10 +3317,6 @@ struct gl_context }; -/** The string names for GL_POINT, GL_LINE_LOOP, etc */ -extern const char *_mesa_prim_name[GL_POLYGON+4]; - - #ifdef DEBUG extern int MESA_VERBOSE; extern int MESA_DEBUG_FLAGS; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 2ffd8be0eb4..e831175235e 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1628,12 +1628,51 @@ void GLAPIENTRY _mesa_GetShaderPrecisionFormat(GLenum shadertype, GLenum precisiontype, GLint* range, GLint* precision) { + const struct gl_program_constants *limits; + const struct gl_precision *p; GET_CURRENT_CONTEXT(ctx); - (void) shadertype; - (void) precisiontype; - (void) range; - (void) precision; - _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__); + + switch (shadertype) { + case GL_VERTEX_SHADER: + limits = &ctx->Const.VertexProgram; + break; + case GL_FRAGMENT_SHADER: + limits = &ctx->Const.FragmentProgram; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetShaderPrecisionFormat(shadertype)"); + return; + } + + switch (precisiontype) { + case GL_LOW_FLOAT: + p = &limits->LowFloat; + break; + case GL_MEDIUM_FLOAT: + p = &limits->MediumFloat; + break; + case GL_HIGH_FLOAT: + p = &limits->HighFloat; + break; + case GL_LOW_INT: + p = &limits->LowInt; + break; + case GL_MEDIUM_INT: + p = &limits->MediumInt; + break; + case GL_HIGH_INT: + p = &limits->HighInt; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetShaderPrecisionFormat(precisiontype)"); + return; + } + + range[0] = p->RangeMin; + range[1] = p->RangeMax; + precision[0] = p->Precision; } diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c index ada6e356419..f984e2f1402 100644 --- a/src/mesa/program/register_allocate.c +++ b/src/mesa/program/register_allocate.c @@ -38,8 +38,10 @@ #include "register_allocate.h" struct ra_reg { - char *name; GLboolean *conflicts; + unsigned int *conflict_list; + unsigned int conflict_list_size; + unsigned int num_conflicts; }; struct ra_regs { @@ -68,6 +70,7 @@ struct ra_class { struct ra_node { GLboolean *adjacency; + unsigned int *adjacency_list; unsigned int class; unsigned int adjacency_count; unsigned int reg; @@ -100,16 +103,39 @@ ra_alloc_reg_set(unsigned int count) for (i = 0; i < count; i++) { regs->regs[i].conflicts = talloc_zero_array(regs->regs, GLboolean, count); regs->regs[i].conflicts[i] = GL_TRUE; + + regs->regs[i].conflict_list = talloc_array(regs->regs, unsigned int, 4); + regs->regs[i].conflict_list_size = 4; + regs->regs[i].conflict_list[0] = i; + regs->regs[i].num_conflicts = 1; } return regs; } +static void +ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2) +{ + struct ra_reg *reg1 = ®s->regs[r1]; + + if (reg1->conflict_list_size == reg1->num_conflicts) { + reg1->conflict_list_size *= 2; + reg1->conflict_list = talloc_realloc(regs, + reg1->conflict_list, + unsigned int, + reg1->conflict_list_size); + } + reg1->conflict_list[reg1->num_conflicts++] = r2; + reg1->conflicts[r2] = GL_TRUE; +} + void ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) { - regs->regs[r1].conflicts[r2] = GL_TRUE; - regs->regs[r2].conflicts[r1] = GL_TRUE; + if (!regs->regs[r1].conflicts[r2]) { + ra_add_conflict_list(regs, r1, r2); + ra_add_conflict_list(regs, r2, r1); + } } unsigned int @@ -160,15 +186,15 @@ ra_set_finalize(struct ra_regs *regs) int max_conflicts = 0; for (rc = 0; rc < regs->count; rc++) { - unsigned int rb; int conflicts = 0; + int i; if (!regs->classes[c]->regs[rc]) continue; - for (rb = 0; rb < regs->count; rb++) { - if (regs->classes[b]->regs[rb] && - regs->regs[rb].conflicts[rc]) + for (i = 0; i < regs->regs[rc].num_conflicts; i++) { + unsigned int rb = regs->regs[rc].conflict_list[i]; + if (regs->classes[b]->regs[rb]) conflicts++; } max_conflicts = MAX2(max_conflicts, conflicts); @@ -178,6 +204,14 @@ ra_set_finalize(struct ra_regs *regs) } } +static void +ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2) +{ + g->nodes[n1].adjacency[n2] = GL_TRUE; + g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2; + g->nodes[n1].adjacency_count++; +} + struct ra_graph * ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) { @@ -193,7 +227,9 @@ ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) for (i = 0; i < count; i++) { g->nodes[i].adjacency = talloc_zero_array(g, GLboolean, count); - g->nodes[i].adjacency[i] = GL_TRUE; + g->nodes[i].adjacency_list = talloc_array(g, unsigned int, count); + g->nodes[i].adjacency_count = 0; + ra_add_node_adjacency(g, i, i); g->nodes[i].reg = ~0; } @@ -211,13 +247,10 @@ void ra_add_node_interference(struct ra_graph *g, unsigned int n1, unsigned int n2) { - if (g->nodes[n1].adjacency[n2]) - return; - - g->nodes[n1].adjacency[n2] = GL_TRUE; - g->nodes[n2].adjacency_count++; - g->nodes[n2].adjacency[n1] = GL_TRUE; - g->nodes[n2].adjacency_count++; + if (!g->nodes[n1].adjacency[n2]) { + ra_add_node_adjacency(g, n1, n2); + ra_add_node_adjacency(g, n2, n1); + } } static GLboolean pq_test(struct ra_graph *g, unsigned int n) @@ -226,13 +259,12 @@ static GLboolean pq_test(struct ra_graph *g, unsigned int n) unsigned int q = 0; int n_class = g->nodes[n].class; - for (j = 0; j < g->count; j++) { - if (j == n || g->nodes[j].in_stack) - continue; + for (j = 0; j < g->nodes[n].adjacency_count; j++) { + unsigned int n2 = g->nodes[n].adjacency_list[j]; + unsigned int n2_class = g->nodes[n2].class; - if (g->nodes[n].adjacency[j]) { - unsigned int j_class = g->nodes[j].class; - q += g->regs->classes[n_class]->q[j_class]; + if (n != n2 && !g->nodes[n2].in_stack) { + q += g->regs->classes[n_class]->q[n2_class]; } } @@ -303,14 +335,15 @@ ra_select(struct ra_graph *g) continue; /* Check if any of our neighbors conflict with this register choice. */ - for (i = 0; i < g->count; i++) { - if (g->nodes[n].adjacency[i] && - !g->nodes[i].in_stack && - g->regs->regs[r].conflicts[g->nodes[i].reg]) { + for (i = 0; i < g->nodes[n].adjacency_count; i++) { + unsigned int n2 = g->nodes[n].adjacency_list[i]; + + if (!g->nodes[n2].in_stack && + g->regs->regs[r].conflicts[g->nodes[n2].reg]) { break; } } - if (i == g->count) + if (i == g->nodes[n].adjacency_count) break; } if (r == g->regs->count) @@ -368,17 +401,17 @@ ra_get_spill_benefit(struct ra_graph *g, unsigned int n) float benefit = 0; int n_class = g->nodes[n].class; - /* Define the benefit of eliminating an interference between n, j + /* Define the benefit of eliminating an interference between n, n2 * through spilling as q(C, B) / p(C). This is similar to the * "count number of edges" approach of traditional graph coloring, * but takes classes into account. */ - for (j = 0; j < g->count; j++) { - if (j != n && g->nodes[n].adjacency[j]) { - unsigned int j_class = g->nodes[j].class; - benefit += ((float)g->regs->classes[n_class]->q[j_class] / + for (j = 0; j < g->nodes[n].adjacency_count; j++) { + unsigned int n2 = g->nodes[n].adjacency_list[j]; + if (n != n2) { + unsigned int n2_class = g->nodes[n2].class; + benefit += ((float)g->regs->classes[n_class]->q[n2_class] / g->regs->classes[n_class]->p); - break; } } diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 422ae43585b..fd03669e660 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -45,6 +45,7 @@ #include "util/u_inlines.h" #include "cso_cache/cso_context.h" + /** * Combine depth texture mode with "swizzle" so that depth mode swizzling * takes place before texture swizzling, and return the resulting swizzle. @@ -54,8 +55,8 @@ * \param swizzle Texture swizzle, a bitmask computed using MAKE_SWIZZLE4. * \param depthmode One of GL_LUMINANCE, GL_INTENSITY, GL_ALPHA, GL_RED. */ -static GLuint apply_depthmode(enum pipe_format format, - GLuint swizzle, GLenum depthmode) +static GLuint +apply_depthmode(enum pipe_format format, GLuint swizzle, GLenum depthmode) { const struct util_format_description *desc = util_format_description(format); @@ -109,6 +110,7 @@ static GLuint apply_depthmode(enum pipe_format format, return MAKE_SWIZZLE4(swiz[0], swiz[1], swiz[2], swiz[3]); } + /** * Return TRUE if the swizzling described by "swizzle" and * "depthmode" (for depth textures only) is different from the swizzling @@ -118,8 +120,9 @@ static GLuint apply_depthmode(enum pipe_format format, * \param swizzle Texture swizzle, a bitmask computed using MAKE_SWIZZLE4. * \param depthmode One of GL_LUMINANCE, GL_INTENSITY, GL_ALPHA. */ -static boolean check_sampler_swizzle(struct pipe_sampler_view *sv, - GLuint swizzle, GLenum depthmode) +static boolean +check_sampler_swizzle(struct pipe_sampler_view *sv, + GLuint swizzle, GLenum depthmode) { swizzle = apply_depthmode(sv->texture->format, swizzle, depthmode); @@ -127,15 +130,15 @@ static boolean check_sampler_swizzle(struct pipe_sampler_view *sv, (sv->swizzle_g != GET_SWZ(swizzle, 1)) || (sv->swizzle_b != GET_SWZ(swizzle, 2)) || (sv->swizzle_a != GET_SWZ(swizzle, 3))) - return true; - return false; + return TRUE; + return FALSE; } + static INLINE struct pipe_sampler_view * st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe, struct st_texture_object *stObj, enum pipe_format format) - { struct pipe_sampler_view templ; GLuint swizzle = apply_depthmode(stObj->pt->format, @@ -161,19 +164,20 @@ static INLINE struct pipe_sampler_view * st_get_texture_sampler_view_from_stobj(struct st_texture_object *stObj, struct pipe_context *pipe, enum pipe_format format) - { if (!stObj || !stObj->pt) { return NULL; } if (!stObj->sampler_view) { - stObj->sampler_view = st_create_texture_sampler_view_from_stobj(pipe, stObj, format); + stObj->sampler_view = + st_create_texture_sampler_view_from_stobj(pipe, stObj, format); } return stObj->sampler_view; } + static void update_textures(struct st_context *st) { @@ -214,21 +218,29 @@ update_textures(struct st_context *st) continue; } + /* Determine the format of the texture sampler view */ st_view_format = stObj->pt->format; { - struct st_texture_image *firstImage; - enum pipe_format firstImageFormat; - firstImage = st_texture_image(stObj->base.Image[0][stObj->base.BaseLevel]); - - firstImageFormat = st_mesa_format_to_pipe_format(firstImage->base.TexFormat); - if ((stObj->base.sRGBDecode == GL_SKIP_DECODE_EXT) && (_mesa_get_format_color_encoding(firstImage->base.TexFormat) == GL_SRGB)) { - firstImageFormat = st_mesa_format_to_pipe_format(_mesa_get_srgb_format_linear(firstImage->base.TexFormat)); + const struct st_texture_image *firstImage = + st_texture_image(stObj->base.Image[0][stObj->base.BaseLevel]); + const gl_format texFormat = firstImage->base.TexFormat; + enum pipe_format firstImageFormat = + st_mesa_format_to_pipe_format(texFormat); + + if ((stObj->base.sRGBDecode == GL_SKIP_DECODE_EXT) && + (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) { + /* don't do sRGB->RGB conversion. Interpret the texture + * texture data as linear values. + */ + const gl_format linearFormat = + _mesa_get_srgb_format_linear(texFormat); + firstImageFormat = st_mesa_format_to_pipe_format(linearFormat); } if (firstImageFormat != stObj->pt->format) st_view_format = firstImageFormat; - } + st->state.num_textures = su + 1; /* if sampler view has changed dereference it */ diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index abaf8235416..974fd78d7c8 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -432,6 +432,10 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.EXT_draw_buffers2 = GL_TRUE; } + if (screen->get_param(screen, PIPE_CAP_INDEP_BLEND_FUNC)) { + ctx->Extensions.ARB_draw_buffers_blend = GL_TRUE; + } + /* GL_ARB_half_float_vertex */ if (screen->is_format_supported(screen, PIPE_FORMAT_R16G16B16A16_FLOAT, PIPE_BUFFER, 0, @@ -439,10 +443,6 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.ARB_half_float_vertex = GL_TRUE; } - if (screen->get_param(screen, PIPE_CAP_INDEP_BLEND_FUNC)) { - ctx->Extensions.ARB_draw_buffers_blend = GL_TRUE; - } - if (screen->get_shader_param(screen, PIPE_SHADER_GEOMETRY, PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) { #if 0 /* XXX re-enable when GLSL compiler again supports geometry shaders */ ctx->Extensions.ARB_geometry_shader4 = GL_TRUE; diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index c78901c0360..bca856d7142 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -125,9 +125,7 @@ st_create_texture_sampler_view(struct pipe_context *pipe, { struct pipe_sampler_view templ; - u_sampler_view_default_template(&templ, - texture, - texture->format); + u_sampler_view_default_template(&templ, texture, texture->format); return pipe->create_sampler_view(pipe, texture, &templ); } @@ -140,17 +138,15 @@ st_create_texture_sampler_view_format(struct pipe_context *pipe, { struct pipe_sampler_view templ; - u_sampler_view_default_template(&templ, - texture, - format); + u_sampler_view_default_template(&templ, texture, format); return pipe->create_sampler_view(pipe, texture, &templ); } + static INLINE struct pipe_sampler_view * st_get_texture_sampler_view(struct st_texture_object *stObj, struct pipe_context *pipe) - { if (!stObj || !stObj->pt) { return NULL; diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index bdb893eba22..858b8281da3 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -468,6 +468,8 @@ void _tnl_draw_prims( struct gl_context *ctx, break; } + assert(prim[i].num_instances > 0); + /* Binding inputs may imply mapping some vertex buffer objects. * They will need to be unmapped below. */ diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index e221538bad4..37940efdc11 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -38,7 +38,7 @@ struct gl_client_array; struct gl_context; struct _mesa_prim { - GLuint mode:8; + GLuint mode:8; /**< GL_POINTS, GL_LINES, GL_QUAD_STRIP, etc */ GLuint indexed:1; GLuint begin:1; GLuint end:1; diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index dd36cc32a70..80085c17c5c 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -629,15 +629,15 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) */ static void GLAPIENTRY vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count, - GLsizei primcount) + GLsizei numInstances) { GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n", - _mesa_lookup_enum_by_nr(mode), start, count, primcount); + _mesa_lookup_enum_by_nr(mode), start, count, numInstances); - if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, primcount)) + if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances)) return; FLUSH_CURRENT( ctx, 0 ); @@ -649,7 +649,7 @@ vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count, if (0) check_draw_arrays_data(ctx, start, count); - vbo_draw_arrays(ctx, mode, start, count, primcount); + vbo_draw_arrays(ctx, mode, start, count, numInstances); if (0) print_draw_arrays(ctx, mode, start, count); @@ -724,7 +724,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices, - GLint basevertex, GLint primcount) + GLint basevertex, GLint numInstances) { struct vbo_context *vbo = vbo_context(ctx); struct vbo_exec_context *exec = &vbo->exec; @@ -757,7 +757,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode, prim[0].count = count; prim[0].indexed = 1; prim[0].basevertex = basevertex; - prim[0].num_instances = primcount; + prim[0].num_instances = numInstances; /* Need to give special consideration to rendering a range of * indices starting somewhere above zero. Typically the @@ -977,21 +977,21 @@ vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, */ static void GLAPIENTRY vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type, - const GLvoid *indices, GLsizei primcount) + const GLvoid *indices, GLsizei numInstances) { GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n", _mesa_lookup_enum_by_nr(mode), count, - _mesa_lookup_enum_by_nr(type), indices, primcount); + _mesa_lookup_enum_by_nr(type), indices, numInstances); if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices, - primcount)) + numInstances)) return; vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0, - count, type, indices, 0, primcount); + count, type, indices, 0, numInstances); } diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c index 26d0046e83d..8c981f93e5c 100644 --- a/src/mesa/vbo/vbo_split_copy.c +++ b/src/mesa/vbo/vbo_split_copy.c @@ -222,6 +222,7 @@ begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag ) prim->mode = mode; prim->begin = begin_flag; + prim->num_instances = 1; } diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c index 789cf31364b..f6aa576b6c8 100644 --- a/src/mesa/vbo/vbo_split_inplace.c +++ b/src/mesa/vbo/vbo_split_inplace.c @@ -178,6 +178,7 @@ static void split_prims( struct split_context *split) outprim->end = (nr == remaining && prim->end); outprim->start = prim->start + j; outprim->count = nr; + outprim->num_instances = prim->num_instances; update_index_bounds(split, outprim); @@ -221,6 +222,7 @@ static void split_prims( struct split_context *split) tmpprim.indexed = 1; tmpprim.start = 0; tmpprim.count = count; + tmpprim.num_instances = 1; flush_vertex(split); |