summaryrefslogtreecommitdiffstats
path: root/src/mesa/program
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/program')
-rw-r--r--src/mesa/program/hash_table.c19
-rw-r--r--src/mesa/program/hash_table.h7
-rw-r--r--src/mesa/program/ir_to_mesa.cpp5
-rw-r--r--src/mesa/program/prog_print.c7
-rw-r--r--src/mesa/program/prog_statevars.c4
-rw-r--r--src/mesa/program/prog_statevars.h2
-rw-r--r--src/mesa/program/program_parse.y17
-rw-r--r--src/mesa/program/register_allocate.c94
-rw-r--r--src/mesa/program/register_allocate.h1
9 files changed, 140 insertions, 16 deletions
diff --git a/src/mesa/program/hash_table.c b/src/mesa/program/hash_table.c
index f7ef366c1a0..877a9e2ffc3 100644
--- a/src/mesa/program/hash_table.c
+++ b/src/mesa/program/hash_table.c
@@ -160,6 +160,25 @@ hash_table_remove(struct hash_table *ht, const void *key)
}
}
+void
+hash_table_call_foreach(struct hash_table *ht,
+ void (*callback)(const void *key,
+ void *data,
+ void *closure),
+ void *closure)
+{
+ int bucket;
+
+ for (bucket = 0; bucket < ht->num_buckets; bucket++) {
+ struct node *node, *temp;
+ foreach_s(node, temp, &ht->buckets[bucket]) {
+ struct hash_node *hn = (struct hash_node *) node;
+
+ callback(hn->key, hn->data, closure);
+ }
+ }
+}
+
unsigned
hash_table_string_hash(const void *key)
{
diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h
index f1c4fdcd1fa..e715bb1cc62 100644
--- a/src/mesa/program/hash_table.h
+++ b/src/mesa/program/hash_table.h
@@ -144,6 +144,13 @@ hash_table_pointer_hash(const void *key);
int
hash_table_pointer_compare(const void *key1, const void *key2);
+void
+hash_table_call_foreach(struct hash_table *ht,
+ void (*callback)(const void *key,
+ void *data,
+ void *closure),
+ void *closure);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 3c9b9733832..bc10b455b8b 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1481,7 +1481,6 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir)
if (index) {
src.index += index->value.i[0] * element_size;
} else {
- src_reg array_base = this->result;
/* Variable index array dereference. It eats the "vec4" of the
* base of the array and an index that offsets the Mesa register
* index.
@@ -2163,7 +2162,7 @@ ir_to_mesa_visitor::visit(ir_discard *ir)
void
ir_to_mesa_visitor::visit(ir_if *ir)
{
- ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL;
+ ir_to_mesa_instruction *cond_inst, *if_inst;
ir_to_mesa_instruction *prev_inst;
prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
@@ -2195,7 +2194,7 @@ ir_to_mesa_visitor::visit(ir_if *ir)
visit_exec_list(&ir->then_instructions, this);
if (!ir->else_instructions.is_empty()) {
- else_inst = emit(ir->condition, OPCODE_ELSE);
+ emit(ir->condition, OPCODE_ELSE);
visit_exec_list(&ir->else_instructions, this);
}
diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c
index 484596af760..7c3b4909e73 100644
--- a/src/mesa/program/prog_print.c
+++ b/src/mesa/program/prog_print.c
@@ -647,6 +647,7 @@ _mesa_fprint_instruction_opt(FILE *f,
case OPCODE_TXP:
case OPCODE_TXL:
case OPCODE_TXB:
+ case OPCODE_TXD:
fprintf(f, "%s", _mesa_opcode_string(inst->Opcode));
if (inst->SaturateMode == SATURATE_ZERO_ONE)
fprintf(f, "_SAT");
@@ -654,6 +655,12 @@ _mesa_fprint_instruction_opt(FILE *f,
fprint_dst_reg(f, &inst->DstReg, mode, prog);
fprintf(f, ", ");
fprint_src_reg(f, &inst->SrcReg[0], mode, prog);
+ if (inst->Opcode == OPCODE_TXD) {
+ fprintf(f, ", ");
+ fprint_src_reg(f, &inst->SrcReg[1], mode, prog);
+ fprintf(f, ", ");
+ fprint_src_reg(f, &inst->SrcReg[2], mode, prog);
+ }
fprintf(f, ", texture[%d], ", inst->TexSrcUnit);
switch (inst->TexSrcTarget) {
case TEXTURE_1D_INDEX: fprintf(f, "1D"); break;
diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index 1fd26f44d56..d94d7fe5dfb 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -602,11 +602,11 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[],
value[0] = 1.0F;
value[1] = 0.0F;
value[2] = -1.0F;
- value[3] = (GLfloat) (ctx->DrawBuffer->Height - 1);
+ value[3] = (GLfloat) ctx->DrawBuffer->Height;
} else {
/* Flipping Y upside down (XY) followed by identity (ZW). */
value[0] = -1.0F;
- value[1] = (GLfloat) (ctx->DrawBuffer->Height - 1);
+ value[1] = (GLfloat) ctx->DrawBuffer->Height;
value[2] = 1.0F;
value[3] = 0.0F;
}
diff --git a/src/mesa/program/prog_statevars.h b/src/mesa/program/prog_statevars.h
index 9fe8d81b3dd..04af3f4cf3a 100644
--- a/src/mesa/program/prog_statevars.h
+++ b/src/mesa/program/prog_statevars.h
@@ -120,7 +120,7 @@ typedef enum gl_state_index_ {
STATE_PT_BIAS, /**< Pixel transfer RGBA bias */
STATE_SHADOW_AMBIENT, /**< ARB_shadow_ambient fail value; token[2] is texture unit index */
STATE_FB_SIZE, /**< (width-1, height-1, 0, 0) */
- STATE_FB_WPOS_Y_TRANSFORM, /**< (1, 0, -1, height-1) if a FBO is bound, (-1, height-1, 1, 0) otherwise */
+ STATE_FB_WPOS_Y_TRANSFORM, /**< (1, 0, -1, height) if a FBO is bound, (-1, height, 1, 0) otherwise */
STATE_ROT_MATRIX_0, /**< ATI_envmap_bumpmap, rot matrix row 0 */
STATE_ROT_MATRIX_1, /**< ATI_envmap_bumpmap, rot matrix row 1 */
STATE_INTERNAL_DRIVER /* first available state index for drivers (must be last) */
diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y
index b35bc5a7cae..dbf5abaa617 100644
--- a/src/mesa/program/program_parse.y
+++ b/src/mesa/program/program_parse.y
@@ -1258,7 +1258,11 @@ optArraySize:
| INTEGER
{
if (($1 < 1) || ((unsigned) $1 > state->limits->MaxParameters)) {
- yyerror(& @1, state, "invalid parameter array size");
+ char msg[100];
+ _mesa_snprintf(msg, sizeof(msg),
+ "invalid parameter array size (size=%d max=%u)",
+ $1, state->limits->MaxParameters);
+ yyerror(& @1, state, msg);
YYERROR;
} else {
$$ = $1;
@@ -2060,9 +2064,14 @@ resultColBinding: COLOR optResultFaceType optResultColorType
optResultFaceType:
{
- $$ = (state->mode == ARB_vertex)
- ? VERT_RESULT_COL0
- : FRAG_RESULT_COLOR;
+ if (state->mode == ARB_vertex) {
+ $$ = VERT_RESULT_COL0;
+ } else {
+ if (state->option.DrawBuffers)
+ $$ = FRAG_RESULT_DATA0;
+ else
+ $$ = FRAG_RESULT_COLOR;
+ }
}
| '[' INTEGER ']'
{
diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c
index 95a9bde401a..de96eb42c9b 100644
--- a/src/mesa/program/register_allocate.c
+++ b/src/mesa/program/register_allocate.c
@@ -28,6 +28,46 @@
/** @file register_allocate.c
*
* Graph-coloring register allocator.
+ *
+ * The basic idea of graph coloring is to make a node in a graph for
+ * every thing that needs a register (color) number assigned, and make
+ * edges in the graph between nodes that interfere (can't be allocated
+ * to the same register at the same time).
+ *
+ * During the "simplify" process, any any node with fewer edges than
+ * there are registers means that that edge can get assigned a
+ * register regardless of what its neighbors choose, so that node is
+ * pushed on a stack and removed (with its edges) from the graph.
+ * That likely causes other nodes to become trivially colorable as well.
+ *
+ * Then during the "select" process, nodes are popped off of that
+ * stack, their edges restored, and assigned a color different from
+ * their neighbors. Because they were pushed on the stack only when
+ * they were trivially colorable, any color chosen won't interfere
+ * with the registers to be popped later.
+ *
+ * The downside to most graph coloring is that real hardware often has
+ * limitations, like registers that need to be allocated to a node in
+ * pairs, or aligned on some boundary. This implementation follows
+ * the paper "Retargetable Graph-Coloring Register Allocation for
+ * Irregular Architectures" by Johan Runeson and Sven-Olof Nyström.
+ *
+ * In this system, there are register classes each containing various
+ * registers, and registers may interfere with other registers. For
+ * example, one might have a class of base registers, and a class of
+ * aligned register pairs that would each interfere with their pair of
+ * the base registers. Each node has a register class it needs to be
+ * assigned to. Define p(B) to be the size of register class B, and
+ * q(B,C) to be the number of registers in B that the worst choice
+ * register in C could conflict with. Then, this system replaces the
+ * basic graph coloring test of "fewer edges from this node than there
+ * are registers" with "For this node of class B, the sum of q(B,C)
+ * for each neighbor node of class C is less than pB".
+ *
+ * A nice feature of the pq test is that q(B,C) can be computed once
+ * up front and stored in a 2-dimensional array, so that the cost of
+ * coloring a node is constant with the number of registers. We do
+ * this during ra_set_finalize().
*/
#include <ralloc.h>
@@ -37,6 +77,8 @@
#include "main/mtypes.h"
#include "register_allocate.h"
+#define NO_REG ~0
+
struct ra_reg {
GLboolean *conflicts;
unsigned int *conflict_list;
@@ -56,25 +98,47 @@ struct ra_class {
GLboolean *regs;
/**
- * p_B in Runeson/Nyström paper.
+ * p(B) in Runeson/Nyström paper.
*
* This is "how many regs are in the set."
*/
unsigned int p;
/**
- * q_B,C in Runeson/Nyström paper.
+ * q(B,C) (indexed by C, B is this register class) in
+ * Runeson/Nyström paper. This is "how many registers of B could
+ * the worst choice register from C conflict with".
*/
unsigned int *q;
};
struct ra_node {
+ /** @{
+ *
+ * List of which nodes this node interferes with. This should be
+ * symmetric with the other node.
+ */
GLboolean *adjacency;
unsigned int *adjacency_list;
- unsigned int class;
unsigned int adjacency_count;
+ /** @} */
+
+ unsigned int class;
+
+ /* Register, if assigned, or NO_REG. */
unsigned int reg;
+
+ /**
+ * Set when the node is in the trivially colorable stack. When
+ * set, the adjacency to this node is ignored, to implement the
+ * "remove the edge from the graph" in simplification without
+ * having to actually modify the adjacency_list.
+ */
GLboolean in_stack;
+
+ /* For an implementation that needs register spilling, this is the
+ * approximate cost of spilling this node.
+ */
float spill_cost;
};
@@ -227,7 +291,7 @@ ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
g->nodes[i].adjacency_list = ralloc_array(g, unsigned int, count);
g->nodes[i].adjacency_count = 0;
ra_add_node_adjacency(g, i, i);
- g->nodes[i].reg = ~0;
+ g->nodes[i].reg = NO_REG;
}
return g;
@@ -287,7 +351,7 @@ ra_simplify(struct ra_graph *g)
progress = GL_FALSE;
for (i = g->count - 1; i >= 0; i--) {
- if (g->nodes[i].in_stack)
+ if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG)
continue;
if (pq_test(g, i)) {
@@ -367,7 +431,7 @@ ra_optimistic_color(struct ra_graph *g)
unsigned int i;
for (i = 0; i < g->count; i++) {
- if (g->nodes[i].in_stack)
+ if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG)
continue;
g->stack[g->stack_count] = i;
@@ -391,6 +455,24 @@ ra_get_node_reg(struct ra_graph *g, unsigned int n)
return g->nodes[n].reg;
}
+/**
+ * Forces a node to a specific register. This can be used to avoid
+ * creating a register class containing one node when handling data
+ * that must live in a fixed location and is known to not conflict
+ * with other forced register assignment (as is common with shader
+ * input data). These nodes do not end up in the stack during
+ * ra_simplify(), and thus at ra_select() time it is as if they were
+ * the first popped off the stack and assigned their fixed locations.
+ *
+ * Must be called before ra_simplify().
+ */
+void
+ra_set_node_reg(struct ra_graph *g, unsigned int n, unsigned int reg)
+{
+ g->nodes[n].reg = reg;
+ g->nodes[n].in_stack = GL_FALSE;
+}
+
static float
ra_get_spill_benefit(struct ra_graph *g, unsigned int n)
{
diff --git a/src/mesa/program/register_allocate.h b/src/mesa/program/register_allocate.h
index 198b89f2d7d..5b95833f394 100644
--- a/src/mesa/program/register_allocate.h
+++ b/src/mesa/program/register_allocate.h
@@ -65,6 +65,7 @@ GLboolean ra_select(struct ra_graph *g);
GLboolean ra_allocate_no_spills(struct ra_graph *g);
unsigned int ra_get_node_reg(struct ra_graph *g, unsigned int n);
+void ra_set_node_reg(struct ra_graph * g, unsigned int n, unsigned int reg);
void ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost);
int ra_get_best_spill_node(struct ra_graph *g);
/** @} */