diff options
Diffstat (limited to 'src/mesa/program')
-rw-r--r-- | src/mesa/program/hash_table.c | 19 | ||||
-rw-r--r-- | src/mesa/program/hash_table.h | 7 | ||||
-rw-r--r-- | src/mesa/program/ir_to_mesa.cpp | 5 | ||||
-rw-r--r-- | src/mesa/program/prog_print.c | 7 | ||||
-rw-r--r-- | src/mesa/program/prog_statevars.c | 4 | ||||
-rw-r--r-- | src/mesa/program/prog_statevars.h | 2 | ||||
-rw-r--r-- | src/mesa/program/program_parse.y | 17 | ||||
-rw-r--r-- | src/mesa/program/register_allocate.c | 94 | ||||
-rw-r--r-- | src/mesa/program/register_allocate.h | 1 |
9 files changed, 140 insertions, 16 deletions
diff --git a/src/mesa/program/hash_table.c b/src/mesa/program/hash_table.c index f7ef366c1a0..877a9e2ffc3 100644 --- a/src/mesa/program/hash_table.c +++ b/src/mesa/program/hash_table.c @@ -160,6 +160,25 @@ hash_table_remove(struct hash_table *ht, const void *key) } } +void +hash_table_call_foreach(struct hash_table *ht, + void (*callback)(const void *key, + void *data, + void *closure), + void *closure) +{ + int bucket; + + for (bucket = 0; bucket < ht->num_buckets; bucket++) { + struct node *node, *temp; + foreach_s(node, temp, &ht->buckets[bucket]) { + struct hash_node *hn = (struct hash_node *) node; + + callback(hn->key, hn->data, closure); + } + } +} + unsigned hash_table_string_hash(const void *key) { diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h index f1c4fdcd1fa..e715bb1cc62 100644 --- a/src/mesa/program/hash_table.h +++ b/src/mesa/program/hash_table.h @@ -144,6 +144,13 @@ hash_table_pointer_hash(const void *key); int hash_table_pointer_compare(const void *key1, const void *key2); +void +hash_table_call_foreach(struct hash_table *ht, + void (*callback)(const void *key, + void *data, + void *closure), + void *closure); + #ifdef __cplusplus } #endif diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 3c9b9733832..bc10b455b8b 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1481,7 +1481,6 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir) if (index) { src.index += index->value.i[0] * element_size; } else { - src_reg array_base = this->result; /* Variable index array dereference. It eats the "vec4" of the * base of the array and an index that offsets the Mesa register * index. @@ -2163,7 +2162,7 @@ ir_to_mesa_visitor::visit(ir_discard *ir) void ir_to_mesa_visitor::visit(ir_if *ir) { - ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL; + ir_to_mesa_instruction *cond_inst, *if_inst; ir_to_mesa_instruction *prev_inst; prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); @@ -2195,7 +2194,7 @@ ir_to_mesa_visitor::visit(ir_if *ir) visit_exec_list(&ir->then_instructions, this); if (!ir->else_instructions.is_empty()) { - else_inst = emit(ir->condition, OPCODE_ELSE); + emit(ir->condition, OPCODE_ELSE); visit_exec_list(&ir->else_instructions, this); } diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index 484596af760..7c3b4909e73 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -647,6 +647,7 @@ _mesa_fprint_instruction_opt(FILE *f, case OPCODE_TXP: case OPCODE_TXL: case OPCODE_TXB: + case OPCODE_TXD: fprintf(f, "%s", _mesa_opcode_string(inst->Opcode)); if (inst->SaturateMode == SATURATE_ZERO_ONE) fprintf(f, "_SAT"); @@ -654,6 +655,12 @@ _mesa_fprint_instruction_opt(FILE *f, fprint_dst_reg(f, &inst->DstReg, mode, prog); fprintf(f, ", "); fprint_src_reg(f, &inst->SrcReg[0], mode, prog); + if (inst->Opcode == OPCODE_TXD) { + fprintf(f, ", "); + fprint_src_reg(f, &inst->SrcReg[1], mode, prog); + fprintf(f, ", "); + fprint_src_reg(f, &inst->SrcReg[2], mode, prog); + } fprintf(f, ", texture[%d], ", inst->TexSrcUnit); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: fprintf(f, "1D"); break; diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index 1fd26f44d56..d94d7fe5dfb 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -602,11 +602,11 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[], value[0] = 1.0F; value[1] = 0.0F; value[2] = -1.0F; - value[3] = (GLfloat) (ctx->DrawBuffer->Height - 1); + value[3] = (GLfloat) ctx->DrawBuffer->Height; } else { /* Flipping Y upside down (XY) followed by identity (ZW). */ value[0] = -1.0F; - value[1] = (GLfloat) (ctx->DrawBuffer->Height - 1); + value[1] = (GLfloat) ctx->DrawBuffer->Height; value[2] = 1.0F; value[3] = 0.0F; } diff --git a/src/mesa/program/prog_statevars.h b/src/mesa/program/prog_statevars.h index 9fe8d81b3dd..04af3f4cf3a 100644 --- a/src/mesa/program/prog_statevars.h +++ b/src/mesa/program/prog_statevars.h @@ -120,7 +120,7 @@ typedef enum gl_state_index_ { STATE_PT_BIAS, /**< Pixel transfer RGBA bias */ STATE_SHADOW_AMBIENT, /**< ARB_shadow_ambient fail value; token[2] is texture unit index */ STATE_FB_SIZE, /**< (width-1, height-1, 0, 0) */ - STATE_FB_WPOS_Y_TRANSFORM, /**< (1, 0, -1, height-1) if a FBO is bound, (-1, height-1, 1, 0) otherwise */ + STATE_FB_WPOS_Y_TRANSFORM, /**< (1, 0, -1, height) if a FBO is bound, (-1, height, 1, 0) otherwise */ STATE_ROT_MATRIX_0, /**< ATI_envmap_bumpmap, rot matrix row 0 */ STATE_ROT_MATRIX_1, /**< ATI_envmap_bumpmap, rot matrix row 1 */ STATE_INTERNAL_DRIVER /* first available state index for drivers (must be last) */ diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y index b35bc5a7cae..dbf5abaa617 100644 --- a/src/mesa/program/program_parse.y +++ b/src/mesa/program/program_parse.y @@ -1258,7 +1258,11 @@ optArraySize: | INTEGER { if (($1 < 1) || ((unsigned) $1 > state->limits->MaxParameters)) { - yyerror(& @1, state, "invalid parameter array size"); + char msg[100]; + _mesa_snprintf(msg, sizeof(msg), + "invalid parameter array size (size=%d max=%u)", + $1, state->limits->MaxParameters); + yyerror(& @1, state, msg); YYERROR; } else { $$ = $1; @@ -2060,9 +2064,14 @@ resultColBinding: COLOR optResultFaceType optResultColorType optResultFaceType: { - $$ = (state->mode == ARB_vertex) - ? VERT_RESULT_COL0 - : FRAG_RESULT_COLOR; + if (state->mode == ARB_vertex) { + $$ = VERT_RESULT_COL0; + } else { + if (state->option.DrawBuffers) + $$ = FRAG_RESULT_DATA0; + else + $$ = FRAG_RESULT_COLOR; + } } | '[' INTEGER ']' { diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c index 95a9bde401a..de96eb42c9b 100644 --- a/src/mesa/program/register_allocate.c +++ b/src/mesa/program/register_allocate.c @@ -28,6 +28,46 @@ /** @file register_allocate.c * * Graph-coloring register allocator. + * + * The basic idea of graph coloring is to make a node in a graph for + * every thing that needs a register (color) number assigned, and make + * edges in the graph between nodes that interfere (can't be allocated + * to the same register at the same time). + * + * During the "simplify" process, any any node with fewer edges than + * there are registers means that that edge can get assigned a + * register regardless of what its neighbors choose, so that node is + * pushed on a stack and removed (with its edges) from the graph. + * That likely causes other nodes to become trivially colorable as well. + * + * Then during the "select" process, nodes are popped off of that + * stack, their edges restored, and assigned a color different from + * their neighbors. Because they were pushed on the stack only when + * they were trivially colorable, any color chosen won't interfere + * with the registers to be popped later. + * + * The downside to most graph coloring is that real hardware often has + * limitations, like registers that need to be allocated to a node in + * pairs, or aligned on some boundary. This implementation follows + * the paper "Retargetable Graph-Coloring Register Allocation for + * Irregular Architectures" by Johan Runeson and Sven-Olof Nyström. + * + * In this system, there are register classes each containing various + * registers, and registers may interfere with other registers. For + * example, one might have a class of base registers, and a class of + * aligned register pairs that would each interfere with their pair of + * the base registers. Each node has a register class it needs to be + * assigned to. Define p(B) to be the size of register class B, and + * q(B,C) to be the number of registers in B that the worst choice + * register in C could conflict with. Then, this system replaces the + * basic graph coloring test of "fewer edges from this node than there + * are registers" with "For this node of class B, the sum of q(B,C) + * for each neighbor node of class C is less than pB". + * + * A nice feature of the pq test is that q(B,C) can be computed once + * up front and stored in a 2-dimensional array, so that the cost of + * coloring a node is constant with the number of registers. We do + * this during ra_set_finalize(). */ #include <ralloc.h> @@ -37,6 +77,8 @@ #include "main/mtypes.h" #include "register_allocate.h" +#define NO_REG ~0 + struct ra_reg { GLboolean *conflicts; unsigned int *conflict_list; @@ -56,25 +98,47 @@ struct ra_class { GLboolean *regs; /** - * p_B in Runeson/Nyström paper. + * p(B) in Runeson/Nyström paper. * * This is "how many regs are in the set." */ unsigned int p; /** - * q_B,C in Runeson/Nyström paper. + * q(B,C) (indexed by C, B is this register class) in + * Runeson/Nyström paper. This is "how many registers of B could + * the worst choice register from C conflict with". */ unsigned int *q; }; struct ra_node { + /** @{ + * + * List of which nodes this node interferes with. This should be + * symmetric with the other node. + */ GLboolean *adjacency; unsigned int *adjacency_list; - unsigned int class; unsigned int adjacency_count; + /** @} */ + + unsigned int class; + + /* Register, if assigned, or NO_REG. */ unsigned int reg; + + /** + * Set when the node is in the trivially colorable stack. When + * set, the adjacency to this node is ignored, to implement the + * "remove the edge from the graph" in simplification without + * having to actually modify the adjacency_list. + */ GLboolean in_stack; + + /* For an implementation that needs register spilling, this is the + * approximate cost of spilling this node. + */ float spill_cost; }; @@ -227,7 +291,7 @@ ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) g->nodes[i].adjacency_list = ralloc_array(g, unsigned int, count); g->nodes[i].adjacency_count = 0; ra_add_node_adjacency(g, i, i); - g->nodes[i].reg = ~0; + g->nodes[i].reg = NO_REG; } return g; @@ -287,7 +351,7 @@ ra_simplify(struct ra_graph *g) progress = GL_FALSE; for (i = g->count - 1; i >= 0; i--) { - if (g->nodes[i].in_stack) + if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG) continue; if (pq_test(g, i)) { @@ -367,7 +431,7 @@ ra_optimistic_color(struct ra_graph *g) unsigned int i; for (i = 0; i < g->count; i++) { - if (g->nodes[i].in_stack) + if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG) continue; g->stack[g->stack_count] = i; @@ -391,6 +455,24 @@ ra_get_node_reg(struct ra_graph *g, unsigned int n) return g->nodes[n].reg; } +/** + * Forces a node to a specific register. This can be used to avoid + * creating a register class containing one node when handling data + * that must live in a fixed location and is known to not conflict + * with other forced register assignment (as is common with shader + * input data). These nodes do not end up in the stack during + * ra_simplify(), and thus at ra_select() time it is as if they were + * the first popped off the stack and assigned their fixed locations. + * + * Must be called before ra_simplify(). + */ +void +ra_set_node_reg(struct ra_graph *g, unsigned int n, unsigned int reg) +{ + g->nodes[n].reg = reg; + g->nodes[n].in_stack = GL_FALSE; +} + static float ra_get_spill_benefit(struct ra_graph *g, unsigned int n) { diff --git a/src/mesa/program/register_allocate.h b/src/mesa/program/register_allocate.h index 198b89f2d7d..5b95833f394 100644 --- a/src/mesa/program/register_allocate.h +++ b/src/mesa/program/register_allocate.h @@ -65,6 +65,7 @@ GLboolean ra_select(struct ra_graph *g); GLboolean ra_allocate_no_spills(struct ra_graph *g); unsigned int ra_get_node_reg(struct ra_graph *g, unsigned int n); +void ra_set_node_reg(struct ra_graph * g, unsigned int n, unsigned int reg); void ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost); int ra_get_best_spill_node(struct ra_graph *g); /** @} */ |