summaryrefslogtreecommitdiffstats
path: root/src/mesa/program
diff options
context:
space:
mode:
authorStéphane Marchesin <[email protected]>2011-08-26 17:37:25 -0700
committerStéphane Marchesin <[email protected]>2011-08-26 17:37:25 -0700
commitf8e6d19f3f40931be741b44d3edf210c38e13f0f (patch)
treee99e4c619901412ac6448534b0f57ce1c4295c6b /src/mesa/program
parent974c49ed176de55aadb335a2956ef5dfec774a23 (diff)
parente3b0e3776646d0367206e4544229622eb22fe9f8 (diff)
Merge branch 'master' of git://anongit.freedesktop.org/mesa/mesa
Diffstat (limited to 'src/mesa/program')
-rw-r--r--src/mesa/program/ir_to_mesa.cpp317
-rw-r--r--src/mesa/program/nvfragparse.c23
-rw-r--r--src/mesa/program/prog_execute.c12
-rw-r--r--src/mesa/program/prog_opt_constant_fold.c451
-rw-r--r--src/mesa/program/prog_optimize.c19
-rw-r--r--src/mesa/program/prog_optimize.h3
-rw-r--r--src/mesa/program/prog_parameter.c78
-rw-r--r--src/mesa/program/prog_parameter.h31
-rw-r--r--src/mesa/program/prog_parameter_layout.c2
-rw-r--r--src/mesa/program/prog_print.c2
-rw-r--r--src/mesa/program/prog_statevars.c2
-rw-r--r--src/mesa/program/program.c8
-rw-r--r--src/mesa/program/program_parse.y56
-rw-r--r--src/mesa/program/program_parser.h3
-rw-r--r--src/mesa/program/register_allocate.c21
-rw-r--r--src/mesa/program/register_allocate.h2
-rw-r--r--src/mesa/program/sampler.cpp2
17 files changed, 872 insertions, 160 deletions
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index d8e5a3a9772..6820e4c6ba7 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -134,7 +134,7 @@ src_reg::src_reg(dst_reg reg)
this->index = reg.index;
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
- this->reladdr = NULL;
+ this->reladdr = reg.reladdr;
}
dst_reg::dst_reg(src_reg reg)
@@ -297,11 +297,11 @@ public:
/**
* Emit the correct dot-product instruction for the type of arguments
*/
- void emit_dp(ir_instruction *ir,
- dst_reg dst,
- src_reg src0,
- src_reg src1,
- unsigned elements);
+ ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
+ dst_reg dst,
+ src_reg src0,
+ src_reg src1,
+ unsigned elements);
void emit_scalar(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, src_reg src0);
@@ -312,9 +312,11 @@ public:
void emit_scs(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, const src_reg &src);
- GLboolean try_emit_mad(ir_expression *ir,
+ bool try_emit_mad(ir_expression *ir,
int mul_operand);
- GLboolean try_emit_sat(ir_expression *ir);
+ bool try_emit_mad_for_and_not(ir_expression *ir,
+ int mul_operand);
+ bool try_emit_sat(ir_expression *ir);
void emit_swz(ir_expression *ir);
@@ -331,20 +333,6 @@ dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
-static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
-
-static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...)
-{
- va_list args;
- va_start(args, fmt);
- ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
- va_end(args);
-
- prog->LinkStatus = GL_FALSE;
-}
-
static int
swizzle_for_size(int size)
{
@@ -422,7 +410,7 @@ ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
}
-void
+ir_to_mesa_instruction *
ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
dst_reg dst, src_reg src0, src_reg src1,
unsigned elements)
@@ -431,7 +419,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
};
- emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+ return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
}
/**
@@ -593,13 +581,13 @@ ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
}
}
-struct src_reg
+src_reg
ir_to_mesa_visitor::src_reg_for_float(float val)
{
src_reg src(PROGRAM_CONSTANT, -1, NULL);
src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
- &val, 1, &src.swizzle);
+ (const gl_constant_value *)&val, 1, &src.swizzle);
return src;
}
@@ -655,8 +643,6 @@ src_reg
ir_to_mesa_visitor::get_temp(const glsl_type *type)
{
src_reg src;
- int swizzle[4];
- int i;
src.file = PROGRAM_TEMPORARY;
src.index = next_temp;
@@ -666,12 +652,7 @@ ir_to_mesa_visitor::get_temp(const glsl_type *type)
if (type->is_array() || type->is_record()) {
src.swizzle = SWIZZLE_NOOP;
} else {
- for (i = 0; i < type->vector_elements; i++)
- swizzle[i] = i;
- for (; i < 4; i++)
- swizzle[i] = type->vector_elements - 1;
- src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
- swizzle[2], swizzle[3]);
+ src.swizzle = swizzle_for_size(type->vector_elements);
}
src.negate = 0;
@@ -744,7 +725,7 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
}
}
- struct variable_storage *storage;
+ variable_storage *storage;
dst_reg dst;
if (i == ir->num_state_slots) {
/* We'll set the index later. */
@@ -789,10 +770,11 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
if (storage->file == PROGRAM_TEMPORARY &&
dst.index != storage->index + (int) ir->num_state_slots) {
- fail_link(this->shader_program,
- "failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
- ir->name, dst.index - storage->index,
- type_size(ir->type));
+ linker_error(this->shader_program,
+ "failed to load builtin uniform `%s' "
+ "(%d/%d regs loaded)\n",
+ ir->name, dst.index - storage->index,
+ type_size(ir->type));
}
}
}
@@ -889,7 +871,7 @@ ir_to_mesa_visitor::visit(ir_function *ir)
}
}
-GLboolean
+bool
ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
{
int nonmul_operand = 1 - mul_operand;
@@ -912,7 +894,47 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
return true;
}
-GLboolean
+/**
+ * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false. Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition. Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ * - a * !b
+ * - a * (1 - b)
+ * - (a * 1) - (a * b)
+ * - a + -(a * b)
+ * - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+ const int other_operand = 1 - try_operand;
+ src_reg a, b;
+
+ ir_expression *expr = ir->operands[try_operand]->as_expression();
+ if (!expr || expr->operation != ir_unop_logic_not)
+ return false;
+
+ ir->operands[other_operand]->accept(this);
+ a = this->result;
+ expr->operands[0]->accept(this);
+ b = this->result;
+
+ b.negate = ~b.negate;
+
+ this->result = get_temp(ir->type);
+ emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
+
+ return true;
+}
+
+bool
ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
{
/* Saturates were only introduced to vertex programs in
@@ -928,10 +950,30 @@ ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
sat_src->accept(this);
src_reg src = this->result;
- this->result = get_temp(ir->type);
- ir_to_mesa_instruction *inst;
- inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
- inst->saturate = true;
+ /* If we generated an expression instruction into a temporary in
+ * processing the saturate's operand, apply the saturate to that
+ * instruction. Otherwise, generate a MOV to do the saturate.
+ *
+ * Note that we have to be careful to only do this optimization if
+ * the instruction in question was what generated src->result. For
+ * example, ir_dereference_array might generate a MUL instruction
+ * to create the reladdr, and return us a src reg using that
+ * reladdr. That MUL result is not the value we're trying to
+ * saturate.
+ */
+ ir_expression *sat_src_expr = sat_src->as_expression();
+ ir_to_mesa_instruction *new_inst;
+ new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
+ if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
+ sat_src_expr->operation == ir_binop_add ||
+ sat_src_expr->operation == ir_binop_dot)) {
+ new_inst->saturate = true;
+ } else {
+ this->result = get_temp(ir->type);
+ ir_to_mesa_instruction *inst;
+ inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
+ inst->saturate = true;
+ }
return true;
}
@@ -1088,6 +1130,16 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
if (try_emit_mad(ir, 0))
return;
}
+
+ /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+ */
+ if (ir->operation == ir_binop_logic_and) {
+ if (try_emit_mad_for_and_not(ir, 1))
+ return;
+ if (try_emit_mad_for_and_not(ir, 0))
+ return;
+ }
+
if (try_emit_sat(ir))
return;
@@ -1135,7 +1187,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
switch (ir->operation) {
case ir_unop_logic_not:
- emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0));
+ /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
+ * older GPUs implement SEQ using multiple instructions (i915 uses two
+ * SGE instructions and a MUL instruction). Since our logic values are
+ * 0.0 and 1.0, 1-x also implements !x.
+ */
+ op[0].negate = ~op[0].negate;
+ emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
break;
case ir_unop_neg:
op[0].negate = ~op[0].negate;
@@ -1231,8 +1289,19 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
ir->operands[1]->type->is_vector()) {
src_reg temp = get_temp(glsl_type::vec4_type);
emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero becomes 1.0, and positive values become zero.
+ */
emit_dp(ir, result_dst, temp, temp, vector_elements);
- emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0));
+
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero becomes 1.0, and negative values become zero. This
+ * achieved using SGE.
+ */
+ src_reg sge_src = result_src;
+ sge_src.negate = ~sge_src.negate;
+ emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
} else {
emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
}
@@ -1243,29 +1312,83 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
ir->operands[1]->type->is_vector()) {
src_reg temp = get_temp(glsl_type::vec4_type);
emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
- emit_dp(ir, result_dst, temp, temp, vector_elements);
- emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ ir_to_mesa_instruction *const dp =
+ emit_dp(ir, result_dst, temp, temp, vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * achieved using SLT.
+ */
+ src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+ }
} else {
emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
}
break;
- case ir_unop_any:
+ case ir_unop_any: {
assert(ir->operands[0]->type->is_vector());
- emit_dp(ir, result_dst, op[0], op[0],
- ir->operands[0]->type->vector_elements);
- emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ ir_to_mesa_instruction *const dp =
+ emit_dp(ir, result_dst, op[0], op[0],
+ ir->operands[0]->type->vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+ }
break;
+ }
case ir_binop_logic_xor:
emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
break;
- case ir_binop_logic_or:
- /* This could be a saturated add and skip the SNE. */
- emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
- emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+ case ir_binop_logic_or: {
+ /* After the addition, the value will be an integer on the
+ * range [0,2]. Zero stays zero, and positive values become 1.0.
+ */
+ ir_to_mesa_instruction *add =
+ emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ add->saturate = true;
+ } else {
+ /* Negating the result of the addition gives values on the range
+ * [-2, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+ }
break;
+ }
case ir_binop_logic_and:
/* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
@@ -1496,6 +1619,18 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir)
this->result, src_reg_for_float(element_size));
}
+ /* If there was already a relative address register involved, add the
+ * new and the old together to get the new offset.
+ */
+ if (src.reladdr != NULL) {
+ src_reg accum_reg = get_temp(glsl_type::float_type);
+
+ emit(ir, OPCODE_ADD, dst_reg(accum_reg),
+ index_reg, *src.reladdr);
+
+ index_reg = accum_reg;
+ }
+
src.reladdr = ralloc(mem_ctx, src_reg);
memcpy(src.reladdr, &index_reg, sizeof(index_reg));
}
@@ -1796,7 +1931,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir)
src = src_reg(PROGRAM_CONSTANT, -1, NULL);
src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
- values,
+ (gl_constant_value *) values,
ir->type->vector_elements,
&src.swizzle);
emit(ir, OPCODE_MOV, mat_column, src);
@@ -1834,7 +1969,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir)
this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
- values,
+ (gl_constant_value *) values,
ir->type->vector_elements,
&this->result.swizzle);
}
@@ -1969,7 +2104,10 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
ir_to_mesa_instruction *inst = NULL;
prog_opcode opcode = OPCODE_NOP;
- ir->coordinate->accept(this);
+ if (ir->op == ir_txs)
+ this->result = src_reg_for_float(0.0);
+ else
+ ir->coordinate->accept(this);
/* Put our coords in a temp. We'll need to modify them for shadow,
* projection, or LOD, so the only case we'd use it as is is if
@@ -1993,6 +2131,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
switch (ir->op) {
case ir_tex:
+ case ir_txs:
opcode = OPCODE_TEX;
break;
case ir_txb:
@@ -2401,29 +2540,32 @@ check_resources(const struct gl_context *ctx,
case GL_VERTEX_PROGRAM_ARB:
if (_mesa_bitcount(prog->SamplersUsed) >
ctx->Const.MaxVertexTextureImageUnits) {
- fail_link(shader_program, "Too many vertex shader texture samplers");
+ linker_error(shader_program,
+ "Too many vertex shader texture samplers");
}
if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
- fail_link(shader_program, "Too many vertex shader constants");
+ linker_error(shader_program, "Too many vertex shader constants");
}
break;
case MESA_GEOMETRY_PROGRAM:
if (_mesa_bitcount(prog->SamplersUsed) >
ctx->Const.MaxGeometryTextureImageUnits) {
- fail_link(shader_program, "Too many geometry shader texture samplers");
+ linker_error(shader_program,
+ "Too many geometry shader texture samplers");
}
if (prog->Parameters->NumParameters >
MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
- fail_link(shader_program, "Too many geometry shader constants");
+ linker_error(shader_program, "Too many geometry shader constants");
}
break;
case GL_FRAGMENT_PROGRAM_ARB:
if (_mesa_bitcount(prog->SamplersUsed) >
ctx->Const.MaxTextureImageUnits) {
- fail_link(shader_program, "Too many fragment shader texture samplers");
+ linker_error(shader_program,
+ "Too many fragment shader texture samplers");
}
if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
- fail_link(shader_program, "Too many fragment shader constants");
+ linker_error(shader_program, "Too many fragment shader constants");
}
break;
default:
@@ -2531,16 +2673,17 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
*/
if (file == PROGRAM_SAMPLER) {
for (unsigned int j = 0; j < size / 4; j++)
- prog->Parameters->ParameterValues[index + j][0] = next_sampler++;
+ prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
}
/* The location chosen in the Parameters list here (returned
* from _mesa_add_uniform) has to match what the linker chose.
*/
if (index != parameter_index) {
- fail_link(shader_program, "Allocation of uniform `%s' to target "
- "failed (%d vs %d)\n",
- uniform->Name, index, parameter_index);
+ linker_error(shader_program,
+ "Allocation of uniform `%s' to target failed "
+ "(%d vs %d)\n",
+ uniform->Name, index, parameter_index);
}
}
}
@@ -2573,8 +2716,8 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
int loc = _mesa_get_uniform_location(ctx, shader_program, name);
if (loc == -1) {
- fail_link(shader_program,
- "Couldn't find uniform for initializer %s\n", name);
+ linker_error(shader_program,
+ "Couldn't find uniform for initializer %s\n", name);
return;
}
@@ -2974,11 +3117,31 @@ get_mesa_program(struct gl_context *ctx,
if (mesa_inst->SrcReg[src].RelAddr)
prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
- if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
- fail_link(shader_program, "Couldn't flatten if statement\n");
- }
-
switch (mesa_inst->Opcode) {
+ case OPCODE_IF:
+ if (options->EmitNoIfs) {
+ linker_warning(shader_program,
+ "Couldn't flatten if-statement. "
+ "This will likely result in software "
+ "rasterization.\n");
+ }
+ break;
+ case OPCODE_BGNLOOP:
+ if (options->EmitNoLoops) {
+ linker_warning(shader_program,
+ "Couldn't unroll loop. "
+ "This will likely result in software "
+ "rasterization.\n");
+ }
+ break;
+ case OPCODE_CONT:
+ if (options->EmitNoCont) {
+ linker_warning(shader_program,
+ "Couldn't lower continue-statement. "
+ "This will likely result in software "
+ "rasterization.\n");
+ }
+ break;
case OPCODE_BGNSUB:
inst->function->inst = i;
mesa_inst->Comment = strdup(inst->function->sig->function_name());
@@ -3246,7 +3409,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
for (i = 0; i < prog->NumShaders; i++) {
if (!prog->Shaders[i]->CompileStatus) {
- fail_link(prog, "linking with uncompiled shader");
+ linker_error(prog, "linking with uncompiled shader");
prog->LinkStatus = GL_FALSE;
}
}
diff --git a/src/mesa/program/nvfragparse.c b/src/mesa/program/nvfragparse.c
index 8516b5fc1ff..ce72c610d89 100644
--- a/src/mesa/program/nvfragparse.c
+++ b/src/mesa/program/nvfragparse.c
@@ -472,8 +472,9 @@ Parse_ScalarConstant(struct parse_state *parseState, GLfloat *number)
const GLfloat *constant;
if (!Parse_Identifier(parseState, ident))
RETURN_ERROR1("Expected an identifier");
- constant = _mesa_lookup_parameter_value(parseState->parameters,
- -1, (const char *) ident);
+ constant = (GLfloat *)_mesa_lookup_parameter_value(parseState->parameters,
+ -1,
+ (const char *) ident);
/* XXX Check that it's a constant and not a parameter */
if (!constant) {
RETURN_ERROR1("Undefined symbol");
@@ -1039,7 +1040,8 @@ Parse_VectorSrc(struct parse_state *parseState,
if (!Parse_ScalarConstant(parseState, values))
RETURN_ERROR;
paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
- values, 4, NULL);
+ (gl_constant_value *) values,
+ 4, NULL);
srcReg->File = PROGRAM_NAMED_PARAM;
srcReg->Index = paramIndex;
}
@@ -1051,7 +1053,8 @@ Parse_VectorSrc(struct parse_state *parseState,
if (!Parse_VectorConstant(parseState, values))
RETURN_ERROR;
paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
- values, 4, NULL);
+ (gl_constant_value *) values,
+ 4, NULL);
srcReg->File = PROGRAM_NAMED_PARAM;
srcReg->Index = paramIndex;
}
@@ -1145,7 +1148,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState,
if (!Parse_VectorConstant(parseState, values))
RETURN_ERROR;
paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
- values, 4, NULL);
+ (gl_constant_value *) values,
+ 4, NULL);
srcReg->File = PROGRAM_NAMED_PARAM;
srcReg->Index = paramIndex;
}
@@ -1170,7 +1174,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState,
if (!Parse_ScalarConstant(parseState, values))
RETURN_ERROR;
paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
- values, 4, NULL);
+ (gl_constant_value *) values,
+ 4, NULL);
srcReg->Index = paramIndex;
srcReg->File = PROGRAM_NAMED_PARAM;
needSuffix = GL_FALSE;
@@ -1296,7 +1301,8 @@ Parse_InstructionSequence(struct parse_state *parseState,
RETURN_ERROR2(id, "already defined");
}
_mesa_add_named_parameter(parseState->parameters,
- (const char *) id, value);
+ (const char *) id,
+ (gl_constant_value *) value);
}
else if (Parse_String(parseState, "DECLARE")) {
GLubyte id[100];
@@ -1315,7 +1321,8 @@ Parse_InstructionSequence(struct parse_state *parseState,
RETURN_ERROR2(id, "already declared");
}
_mesa_add_named_parameter(parseState->parameters,
- (const char *) id, value);
+ (const char *) id,
+ (gl_constant_value *) value);
}
else if (Parse_String(parseState, "END")) {
inst->Opcode = OPCODE_END;
diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index e7553c69dbe..77f842a1630 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -157,7 +157,7 @@ get_src_register_pointer(const struct prog_src_register *source,
case PROGRAM_NAMED_PARAM:
if (reg >= (GLint) prog->Parameters->NumParameters)
return ZeroVec;
- return prog->Parameters->ParameterValues[reg];
+ return (GLfloat *) prog->Parameters->ParameterValues[reg];
case PROGRAM_SYSTEM_VALUE:
assert(reg < Elements(machine->SystemValues));
@@ -639,7 +639,7 @@ _mesa_execute_program(struct gl_context * ctx,
struct gl_program_machine *machine)
{
const GLuint numInst = program->NumInstructions;
- const GLuint maxExec = 10000;
+ const GLuint maxExec = 65536;
GLuint pc, numExec = 0;
machine->CurProgram = program;
@@ -1651,6 +1651,14 @@ _mesa_execute_program(struct gl_context * ctx,
GLfloat texcoord[4], color[4];
fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+ /* For TEX, texcoord.Q should not be used and its value should not
+ * matter (at most, we pass coord.xyz to texture3D() in GLSL).
+ * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
+ * which is effectively what happens when the texcoord swizzle
+ * is .xyzz
+ */
+ texcoord[3] = 1.0f;
+
fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
if (DEBUG_PROG) {
diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c
new file mode 100644
index 00000000000..e2418b55451
--- /dev/null
+++ b/src/mesa/program/prog_opt_constant_fold.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "program.h"
+#include "prog_instruction.h"
+#include "prog_optimize.h"
+#include "prog_parameter.h"
+#include <stdbool.h>
+
+static bool
+src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
+{
+ unsigned i;
+
+ for (i = 0; i < num_srcs; i++) {
+ if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
+ return false;
+ }
+
+ return true;
+}
+
+static struct prog_src_register
+src_reg_for_float(struct gl_program *prog, float val)
+{
+ struct prog_src_register src;
+ unsigned swiz;
+
+ memset(&src, 0, sizeof(src));
+
+ src.File = PROGRAM_CONSTANT;
+ src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+ (gl_constant_value *) &val, 1, &swiz);
+ src.Swizzle = swiz;
+ return src;
+}
+
+static struct prog_src_register
+src_reg_for_vec4(struct gl_program *prog, const float *val)
+{
+ struct prog_src_register src;
+ unsigned swiz;
+
+ memset(&src, 0, sizeof(src));
+
+ src.File = PROGRAM_CONSTANT;
+ src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+ (gl_constant_value *) val, 4, &swiz);
+ src.Swizzle = swiz;
+ return src;
+}
+
+static bool
+src_regs_are_same(const struct prog_src_register *a,
+ const struct prog_src_register *b)
+{
+ return (a->File == b->File)
+ && (a->Index == b->Index)
+ && (a->Swizzle == b->Swizzle)
+ && (a->Abs == b->Abs)
+ && (a->Negate == b->Negate)
+ && (a->RelAddr == 0)
+ && (b->RelAddr == 0);
+}
+
+static void
+get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
+{
+ const gl_constant_value *const value =
+ prog->Parameters->ParameterValues[r->Index];
+
+ data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
+ data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
+ data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
+ data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
+
+ if (r->Abs) {
+ data[0] = fabsf(data[0]);
+ data[1] = fabsf(data[1]);
+ data[2] = fabsf(data[2]);
+ data[3] = fabsf(data[3]);
+ }
+
+ if (r->Negate & 0x01) {
+ data[0] = -data[0];
+ }
+
+ if (r->Negate & 0x02) {
+ data[1] = -data[1];
+ }
+
+ if (r->Negate & 0x04) {
+ data[2] = -data[2];
+ }
+
+ if (r->Negate & 0x08) {
+ data[3] = -data[3];
+ }
+}
+
+/**
+ * Try to replace instructions that produce a constant result with simple moves
+ *
+ * The hope is that a following copy propagation pass will eliminate the
+ * unnecessary move instructions.
+ */
+GLboolean
+_mesa_constant_fold(struct gl_program *prog)
+{
+ bool progress = false;
+ unsigned i;
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ struct prog_instruction *const inst = &prog->Instructions[i];
+
+ switch (inst->Opcode) {
+ case OPCODE_ADD:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = a[0] + b[0];
+ result[1] = a[1] + b[1];
+ result[2] = a[2] + b[2];
+ result[3] = a[3] + b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_CMP:
+ /* FINISHME: We could also optimize CMP instructions where the first
+ * FINISHME: source is a constant that is either all < 0.0 or all
+ * FINISHME: >= 0.0.
+ */
+ if (src_regs_are_constant(inst, 3)) {
+ float a[4];
+ float b[4];
+ float c[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+ get_value(prog, &inst->SrcReg[2], c);
+
+ result[0] = a[0] < 0.0f ? b[0] : c[0];
+ result[1] = a[1] < 0.0f ? b[1] : c[1];
+ result[2] = a[2] < 0.0f ? b[2] : c[2];
+ result[3] = a[3] < 0.0f ? b[3] : c[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+ inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_DP2:
+ case OPCODE_DP3:
+ case OPCODE_DP4:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result;
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ /* It seems like a loop could be used here, but we cleverly put
+ * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from
+ * the opcode results in various failures of the loop control.
+ */
+ result = (a[0] * b[0]) + (a[1] * b[1]);
+
+ if (inst->Opcode >= OPCODE_DP3)
+ result += a[2] * b[2];
+
+ if (inst->Opcode == OPCODE_DP4)
+ result += a[3] * b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_MUL:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = a[0] * b[0];
+ result[1] = a[1] * b[1];
+ result[2] = a[2] * b[2];
+ result[3] = a[3] * b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SEQ:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SGE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SGT:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SLE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SLT:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SNE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return progress;
+}
diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index 8a40fa69eca..25d9684b137 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -472,8 +472,7 @@ can_downward_mov_be_modifed(const struct prog_instruction *mov)
mov->SrcReg[0].HasIndex2 == 0 &&
mov->SrcReg[0].RelAddr2 == 0 &&
mov->DstReg.RelAddr == 0 &&
- mov->DstReg.CondMask == COND_TR &&
- mov->SaturateMode == SATURATE_OFF;
+ mov->DstReg.CondMask == COND_TR;
}
@@ -482,7 +481,8 @@ can_upward_mov_be_modifed(const struct prog_instruction *mov)
{
return
can_downward_mov_be_modifed(mov) &&
- mov->DstReg.File == PROGRAM_TEMPORARY;
+ mov->DstReg.File == PROGRAM_TEMPORARY &&
+ mov->SaturateMode == SATURATE_OFF;
}
@@ -657,6 +657,8 @@ _mesa_merge_mov_into_inst(struct prog_instruction *inst,
if (mask != (inst->DstReg.WriteMask & mask))
return GL_FALSE;
+ inst->SaturateMode |= mov->SaturateMode;
+
/* Depending on the instruction, we may need to recompute the swizzles.
* Also, some other instructions (like TEX) are not linear. We will only
* consider completely active sources and destinations
@@ -1319,6 +1321,15 @@ _mesa_simplify_cmp(struct gl_program * program)
inst->Opcode = OPCODE_MOV;
inst->SrcReg[0] = inst->SrcReg[1];
+
+ /* Unused operands are expected to have the file set to
+ * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes
+ * all of the sources.
+ */
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+ inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
}
}
if (dbg) {
@@ -1347,6 +1358,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
any_change = GL_TRUE;
if (_mesa_remove_dead_code_local(program))
any_change = GL_TRUE;
+
+ any_change = _mesa_constant_fold(program) || any_change;
_mesa_reallocate_registers(program);
} while (any_change);
}
diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h
index 463f5fc51c4..9854fb7a491 100644
--- a/src/mesa/program/prog_optimize.h
+++ b/src/mesa/program/prog_optimize.h
@@ -44,4 +44,7 @@ _mesa_find_temp_intervals(const struct prog_instruction *instructions,
extern void
_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program);
+extern GLboolean
+_mesa_constant_fold(struct gl_program *prog);
+
#endif
diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c
index 3570cab118b..49b3ffbdd5c 100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@ -56,8 +56,8 @@ _mesa_new_parameter_list_sized(unsigned size)
p->Parameters = (struct gl_program_parameter *)
calloc(1, size * sizeof(struct gl_program_parameter));
- p->ParameterValues = (GLfloat (*)[4])
- _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16);
+ p->ParameterValues = (gl_constant_value (*)[4])
+ _mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16);
if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) {
@@ -101,14 +101,15 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList)
* \param name the parameter name, will be duplicated/copied!
* \param size number of elements in 'values' vector (1..4, or more)
* \param datatype GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE.
- * \param values initial parameter value, up to 4 GLfloats, or NULL
+ * \param values initial parameter value, up to 4 gl_constant_values, or NULL
* \param state state indexes, or NULL
* \return index of new parameter in the list, or -1 if error (out of mem)
*/
GLint
_mesa_add_parameter(struct gl_program_parameter_list *paramList,
gl_register_file type, const char *name,
- GLuint size, GLenum datatype, const GLfloat *values,
+ GLuint size, GLenum datatype,
+ const gl_constant_value *values,
const gl_state_index state[STATE_LENGTH],
GLbitfield flags)
{
@@ -127,10 +128,10 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
oldNum * sizeof(struct gl_program_parameter),
paramList->Size * sizeof(struct gl_program_parameter));
- paramList->ParameterValues = (GLfloat (*)[4])
+ paramList->ParameterValues = (gl_constant_value (*)[4])
_mesa_align_realloc(paramList->ParameterValues, /* old buf */
- oldNum * 4 * sizeof(GLfloat), /* old size */
- paramList->Size * 4 *sizeof(GLfloat), /* new sz */
+ oldNum * 4 * sizeof(gl_constant_value),/* old sz */
+ paramList->Size*4*sizeof(gl_constant_value),/*new*/
16);
}
@@ -142,7 +143,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
return -1;
}
else {
- GLuint i;
+ GLuint i, j;
paramList->NumParameters = oldNum + sz4;
@@ -163,7 +164,8 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
}
else {
/* silence valgrind */
- ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0);
+ for (j = 0; j < 4; j++)
+ paramList->ParameterValues[oldNum + i][j].f = 0;
}
size -= 4;
}
@@ -184,7 +186,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
*/
GLint
_mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
- const char *name, const GLfloat values[4])
+ const char *name, const gl_constant_value values[4])
{
return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name,
4, GL_NONE, values, NULL, 0x0);
@@ -204,17 +206,17 @@ _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
*/
GLint
_mesa_add_named_constant(struct gl_program_parameter_list *paramList,
- const char *name, const GLfloat values[4],
+ const char *name, const gl_constant_value values[4],
GLuint size)
{
/* first check if this is a duplicate constant */
GLint pos;
for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) {
- const GLfloat *pvals = paramList->ParameterValues[pos];
- if (pvals[0] == values[0] &&
- pvals[1] == values[1] &&
- pvals[2] == values[2] &&
- pvals[3] == values[3] &&
+ const gl_constant_value *pvals = paramList->ParameterValues[pos];
+ if (pvals[0].u == values[0].u &&
+ pvals[1].u == values[1].u &&
+ pvals[2].u == values[2].u &&
+ pvals[3].u == values[3].u &&
strcmp(paramList->Parameters[pos].Name, name) == 0) {
/* Same name and value is already in the param list - reuse it */
return pos;
@@ -239,9 +241,9 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
* \return index/position of the new parameter in the parameter list.
*/
GLint
-_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
- const GLfloat values[4], GLuint size,
- GLuint *swizzleOut)
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
+ const gl_constant_value values[4], GLuint size,
+ GLenum datatype, GLuint *swizzleOut)
{
GLint pos;
ASSERT(size >= 1);
@@ -262,7 +264,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
struct gl_program_parameter *p = paramList->Parameters + pos;
if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) {
/* ok, found room */
- GLfloat *pVal = paramList->ParameterValues[pos];
+ gl_constant_value *pVal = paramList->ParameterValues[pos];
GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */
pVal[p->Size] = values[0];
p->Size++;
@@ -274,7 +276,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
/* add a new parameter to store this constant */
pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL,
- size, GL_NONE, values, NULL, 0x0);
+ size, datatype, values, NULL, 0x0);
if (pos >= 0 && swizzleOut) {
if (size == 1)
*swizzleOut = SWIZZLE_XXXX;
@@ -285,6 +287,28 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
}
/**
+ * Add a new unnamed constant to the parameter list. This will be used
+ * when a fragment/vertex program contains something like this:
+ * MOV r, { 0, 1, 2, 3 };
+ * If swizzleOut is non-null we'll search the parameter list for an
+ * existing instance of the constant which matches with a swizzle.
+ *
+ * \param paramList the parameter list
+ * \param values four float values
+ * \param swizzleOut returns swizzle mask for accessing the constant
+ * \return index/position of the new parameter in the parameter list.
+ * \sa _mesa_add_typed_unnamed_constant
+ */
+GLint
+_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
+ const gl_constant_value values[4], GLuint size,
+ GLuint *swizzleOut)
+{
+ return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE,
+ swizzleOut);
+}
+
+/**
* Add parameter representing a varying variable.
*/
GLint
@@ -401,7 +425,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
* Lookup a parameter value by name in the given parameter list.
* \return pointer to the float[4] values.
*/
-GLfloat *
+gl_constant_value *
_mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
GLsizei nameLen, const char *name)
{
@@ -465,7 +489,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
*/
GLboolean
_mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
- const GLfloat v[], GLuint vSize,
+ const gl_constant_value v[], GLuint vSize,
GLint *posOut, GLuint *swizzleOut)
{
GLuint i;
@@ -484,7 +508,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
/* swizzle not allowed */
GLuint j, match = 0;
for (j = 0; j < vSize; j++) {
- if (v[j] == list->ParameterValues[i][j])
+ if (v[j].u == list->ParameterValues[i][j].u)
match++;
}
if (match == vSize) {
@@ -498,7 +522,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
/* look for v[0] anywhere within float[4] value */
GLuint j;
for (j = 0; j < list->Parameters[i].Size; j++) {
- if (list->ParameterValues[i][j] == v[0]) {
+ if (list->ParameterValues[i][j].u == v[0].u) {
/* found it */
*posOut = i;
*swizzleOut = MAKE_SWIZZLE4(j, j, j, j);
@@ -511,13 +535,13 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
GLuint swz[4];
GLuint match = 0, j, k;
for (j = 0; j < vSize; j++) {
- if (v[j] == list->ParameterValues[i][j]) {
+ if (v[j].u == list->ParameterValues[i][j].u) {
swz[j] = j;
match++;
}
else {
for (k = 0; k < list->Parameters[i].Size; k++) {
- if (v[j] == list->ParameterValues[i][k]) {
+ if (v[j].u == list->ParameterValues[i][k].u) {
swz[j] = k;
match++;
break;
diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index 10cbbe57a6c..1a5ed343937 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -47,6 +47,17 @@
/*@}*/
+/**
+ * Actual data for constant values of parameters.
+ */
+typedef union gl_constant_value
+{
+ GLfloat f;
+ GLboolean b;
+ GLint i;
+ GLuint u;
+} gl_constant_value;
+
/**
* Program parameter.
@@ -81,7 +92,7 @@ struct gl_program_parameter_list
GLuint Size; /**< allocated size of Parameters, ParameterValues */
GLuint NumParameters; /**< number of parameters in arrays */
struct gl_program_parameter *Parameters; /**< Array [Size] */
- GLfloat (*ParameterValues)[4]; /**< Array [Size] of GLfloat[4] */
+ gl_constant_value (*ParameterValues)[4]; /**< Array [Size] of constant[4] */
GLbitfield StateFlags; /**< _NEW_* flags indicating which state changes
might invalidate ParameterValues[] */
};
@@ -112,22 +123,28 @@ _mesa_num_parameters(const struct gl_program_parameter_list *list)
extern GLint
_mesa_add_parameter(struct gl_program_parameter_list *paramList,
gl_register_file type, const char *name,
- GLuint size, GLenum datatype, const GLfloat *values,
+ GLuint size, GLenum datatype,
+ const gl_constant_value *values,
const gl_state_index state[STATE_LENGTH],
GLbitfield flags);
extern GLint
_mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
- const char *name, const GLfloat values[4]);
+ const char *name, const gl_constant_value values[4]);
extern GLint
_mesa_add_named_constant(struct gl_program_parameter_list *paramList,
- const char *name, const GLfloat values[4],
+ const char *name, const gl_constant_value values[4],
GLuint size);
extern GLint
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
+ const gl_constant_value values[4], GLuint size,
+ GLenum datatype, GLuint *swizzleOut);
+
+extern GLint
_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
- const GLfloat values[4], GLuint size,
+ const gl_constant_value values[4], GLuint size,
GLuint *swizzleOut);
extern GLint
@@ -143,7 +160,7 @@ extern GLint
_mesa_add_state_reference(struct gl_program_parameter_list *paramList,
const gl_state_index stateTokens[STATE_LENGTH]);
-extern GLfloat *
+extern gl_constant_value *
_mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
GLsizei nameLen, const char *name);
@@ -153,7 +170,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
extern GLboolean
_mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
- const GLfloat v[], GLuint vSize,
+ const gl_constant_value v[], GLuint vSize,
GLint *posOut, GLuint *swizzleOut);
extern GLuint
diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c
index 90a9771080c..28fca3b92d9 100644
--- a/src/mesa/program/prog_parameter_layout.c
+++ b/src/mesa/program/prog_parameter_layout.c
@@ -182,7 +182,7 @@ _mesa_layout_parameters(struct asm_parser_state *state)
switch (p->Type) {
case PROGRAM_CONSTANT: {
- const float *const v =
+ const gl_constant_value *const v =
state->prog->Parameters->ParameterValues[idx];
inst->Base.SrcReg[i].Index =
diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c
index 7c3b4909e73..70412b1fa6a 100644
--- a/src/mesa/program/prog_print.c
+++ b/src/mesa/program/prog_print.c
@@ -985,7 +985,7 @@ _mesa_fprint_parameter_list(FILE *f,
fprintf(f, "dirty state flags: 0x%x\n", list->StateFlags);
for (i = 0; i < list->NumParameters; i++){
struct gl_program_parameter *param = list->Parameters + i;
- const GLfloat *v = list->ParameterValues[i];
+ const GLfloat *v = (GLfloat *) list->ParameterValues[i];
fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}",
i, param->Size,
_mesa_register_file_name(list->Parameters[i].Type),
diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index 16f9690e865..6aa2409e85e 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -1111,7 +1111,7 @@ _mesa_load_state_parameters(struct gl_context *ctx,
if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
_mesa_fetch_state(ctx,
paramList->Parameters[i].StateIndexes,
- paramList->ParameterValues[i]);
+ &paramList->ParameterValues[i][0].f);
}
}
}
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index adca094ee89..ecff2344a44 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -388,8 +388,9 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog)
if (prog->String)
free(prog->String);
- _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
-
+ if (prog->Instructions) {
+ _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
+ }
if (prog->Parameters) {
_mesa_free_parameter_list(prog->Parameters);
}
@@ -1031,7 +1032,8 @@ _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog)
GLuint i;
GLuint whiteSwizzle;
GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters,
- white, 4, &whiteSwizzle);
+ (gl_constant_value *) white,
+ 4, &whiteSwizzle);
(void) whiteIndex;
diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y
index dbf5abaa617..dec35038be5 100644
--- a/src/mesa/program/program_parse.y
+++ b/src/mesa/program/program_parse.y
@@ -1854,64 +1854,64 @@ paramConstUse: paramConstScalarUse | paramConstVector;
paramConstScalarDecl: signedFloatConstant
{
$$.count = 4;
- $$.data[0] = $1;
- $$.data[1] = $1;
- $$.data[2] = $1;
- $$.data[3] = $1;
+ $$.data[0].f = $1;
+ $$.data[1].f = $1;
+ $$.data[2].f = $1;
+ $$.data[3].f = $1;
}
;
paramConstScalarUse: REAL
{
$$.count = 1;
- $$.data[0] = $1;
- $$.data[1] = $1;
- $$.data[2] = $1;
- $$.data[3] = $1;
+ $$.data[0].f = $1;
+ $$.data[1].f = $1;
+ $$.data[2].f = $1;
+ $$.data[3].f = $1;
}
| INTEGER
{
$$.count = 1;
- $$.data[0] = (float) $1;
- $$.data[1] = (float) $1;
- $$.data[2] = (float) $1;
- $$.data[3] = (float) $1;
+ $$.data[0].f = (float) $1;
+ $$.data[1].f = (float) $1;
+ $$.data[2].f = (float) $1;
+ $$.data[3].f = (float) $1;
}
;
paramConstVector: '{' signedFloatConstant '}'
{
$$.count = 4;
- $$.data[0] = $2;
- $$.data[1] = 0.0f;
- $$.data[2] = 0.0f;
- $$.data[3] = 1.0f;
+ $$.data[0].f = $2;
+ $$.data[1].f = 0.0f;
+ $$.data[2].f = 0.0f;
+ $$.data[3].f = 1.0f;
}
| '{' signedFloatConstant ',' signedFloatConstant '}'
{
$$.count = 4;
- $$.data[0] = $2;
- $$.data[1] = $4;
- $$.data[2] = 0.0f;
- $$.data[3] = 1.0f;
+ $$.data[0].f = $2;
+ $$.data[1].f = $4;
+ $$.data[2].f = 0.0f;
+ $$.data[3].f = 1.0f;
}
| '{' signedFloatConstant ',' signedFloatConstant ','
signedFloatConstant '}'
{
$$.count = 4;
- $$.data[0] = $2;
- $$.data[1] = $4;
- $$.data[2] = $6;
- $$.data[3] = 1.0f;
+ $$.data[0].f = $2;
+ $$.data[1].f = $4;
+ $$.data[2].f = $6;
+ $$.data[3].f = 1.0f;
}
| '{' signedFloatConstant ',' signedFloatConstant ','
signedFloatConstant ',' signedFloatConstant '}'
{
$$.count = 4;
- $$.data[0] = $2;
- $$.data[1] = $4;
- $$.data[2] = $6;
- $$.data[3] = $8;
+ $$.data[0].f = $2;
+ $$.data[1].f = $4;
+ $$.data[2].f = $6;
+ $$.data[3].f = $8;
}
;
diff --git a/src/mesa/program/program_parser.h b/src/mesa/program/program_parser.h
index 8e5aaee95e5..5637598f3b3 100644
--- a/src/mesa/program/program_parser.h
+++ b/src/mesa/program/program_parser.h
@@ -23,6 +23,7 @@
#pragma once
#include "main/config.h"
+#include "program/prog_parameter.h"
struct gl_context;
@@ -96,7 +97,7 @@ struct asm_symbol {
struct asm_vector {
unsigned count;
- float data[4];
+ gl_constant_value data[4];
};
diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c
index de96eb42c9b..f5b5174fc18 100644
--- a/src/mesa/program/register_allocate.c
+++ b/src/mesa/program/register_allocate.c
@@ -200,6 +200,27 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
}
}
+/**
+ * Adds a conflict between base_reg and reg, and also between reg and
+ * anything that base_reg conflicts with.
+ *
+ * This can simplify code for setting up multiple register classes
+ * which are aggregates of some base hardware registers, compared to
+ * explicitly using ra_add_reg_conflict.
+ */
+void
+ra_add_transitive_reg_conflict(struct ra_regs *regs,
+ unsigned int base_reg, unsigned int reg)
+{
+ int i;
+
+ ra_add_reg_conflict(regs, reg, base_reg);
+
+ for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) {
+ ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]);
+ }
+}
+
unsigned int
ra_alloc_reg_class(struct ra_regs *regs)
{
diff --git a/src/mesa/program/register_allocate.h b/src/mesa/program/register_allocate.h
index 5b95833f394..ee2e58a4756 100644
--- a/src/mesa/program/register_allocate.h
+++ b/src/mesa/program/register_allocate.h
@@ -40,6 +40,8 @@ struct ra_regs *ra_alloc_reg_set(unsigned int count);
unsigned int ra_alloc_reg_class(struct ra_regs *regs);
void ra_add_reg_conflict(struct ra_regs *regs,
unsigned int r1, unsigned int r2);
+void ra_add_transitive_reg_conflict(struct ra_regs *regs,
+ unsigned int base_reg, unsigned int reg);
void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg);
void ra_set_finalize(struct ra_regs *regs);
/** @} */
diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
index 1457d1199fa..e8d34c670a9 100644
--- a/src/mesa/program/sampler.cpp
+++ b/src/mesa/program/sampler.cpp
@@ -132,6 +132,6 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler,
index += getname.offset;
- return prog->Parameters->ParameterValues[index][0];
+ return prog->Parameters->ParameterValues[index][0].f;
}
}