diff options
Diffstat (limited to 'src/mesa/shader')
-rw-r--r-- | src/mesa/shader/arbprogparse.c | 14 | ||||
-rw-r--r-- | src/mesa/shader/nvfragparse.c | 2 | ||||
-rw-r--r-- | src/mesa/shader/nvprogram.c | 7 | ||||
-rw-r--r-- | src/mesa/shader/nvvertexec.c | 349 | ||||
-rw-r--r-- | src/mesa/shader/nvvertexec.h | 34 | ||||
-rw-r--r-- | src/mesa/shader/nvvertparse.c | 2 | ||||
-rw-r--r-- | src/mesa/shader/program.c | 98 | ||||
-rw-r--r-- | src/mesa/shader/program_instruction.h | 2 | ||||
-rw-r--r-- | src/mesa/shader/programopt.c | 7 |
9 files changed, 259 insertions, 256 deletions
diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index b8e5e4bd8ae..43e3bc183d4 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 6.5.1 + * Version: 6.5.2 * * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * @@ -2571,8 +2571,6 @@ parse_fp_vector_src_reg(GLcontext * ctx, const GLubyte ** inst, reg->File = file; reg->Index = index; - reg->Abs = 0; /* NV only */ - reg->NegateAbs = 0; /* NV only */ reg->NegateBase = negate; reg->Swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); return 0; @@ -2595,8 +2593,6 @@ parse_fp_dst_reg(GLcontext * ctx, const GLubyte ** inst, if (parse_masked_dst_reg (ctx, inst, vc_head, Program, &file, &idx, &mask)) return 1; - reg->CondMask = 0; /* NV only */ - reg->CondSwizzle = 0; /* NV only */ reg->File = file; reg->Index = idx; reg->WriteMask = mask; @@ -2632,8 +2628,6 @@ parse_fp_scalar_src_reg (GLcontext * ctx, const GLubyte ** inst, reg->File = File; reg->Index = Index; - reg->Abs = 0; /* NV only */ - reg->NegateAbs = 0; /* NV only */ reg->NegateBase = Negate; reg->Swizzle = (Swizzle[0] << 0); @@ -2656,7 +2650,7 @@ parse_fp_instruction (GLcontext * ctx, const GLubyte ** inst, GLubyte instClass, type, code; GLboolean rel; - _mesa_init_instruction(fp); + _mesa_init_instructions(fp, 1); /* Record the position in the program string for debugging */ fp->StringPos = Program->Position; @@ -3148,7 +3142,7 @@ parse_vp_instruction (GLcontext * ctx, const GLubyte ** inst, /* The actual opcode name */ code = *(*inst)++; - _mesa_init_instruction(vp); + _mesa_init_instructions(vp, 1); /* Record the position in the program string for debugging */ vp->StringPos = Program->Position; @@ -3690,7 +3684,7 @@ parse_instructions(GLcontext * ctx, const GLubyte * inst, /* Finally, tag on an OPCODE_END instruction */ { const GLuint numInst = Program->Base.NumInstructions; - _mesa_init_instruction(Program->Base.Instructions + numInst); + _mesa_init_instructions(Program->Base.Instructions + numInst, 1); Program->Base.Instructions[numInst].Opcode = OPCODE_END; /* YYY Wrong Position in program, whatever, at least not random -> crash Program->Position = parse_position (&inst); diff --git a/src/mesa/shader/nvfragparse.c b/src/mesa/shader/nvfragparse.c index 49ce220944e..5f3a30b7416 100644 --- a/src/mesa/shader/nvfragparse.c +++ b/src/mesa/shader/nvfragparse.c @@ -1273,7 +1273,7 @@ Parse_InstructionSequence(struct parse_state *parseState, GLubyte token[100]; /* Initialize the instruction */ - _mesa_init_instruction(inst); + _mesa_init_instructions(inst, 1); /* special instructions */ if (Parse_String(parseState, "DEFINE")) { diff --git a/src/mesa/shader/nvprogram.c b/src/mesa/shader/nvprogram.c index 47d2b61a628..0e9a01dcab8 100644 --- a/src/mesa/shader/nvprogram.c +++ b/src/mesa/shader/nvprogram.c @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 6.5.1 + * Version: 6.5.2 * * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * @@ -77,10 +77,7 @@ _mesa_ExecuteProgramNV(GLenum target, GLuint id, const GLfloat *params) return; } - _mesa_init_vp_per_vertex_registers(ctx); - _mesa_init_vp_per_primitive_registers(ctx); - COPY_4V(ctx->VertexProgram.Machine.Inputs[VERT_ATTRIB_POS], params); - _mesa_exec_vertex_program(ctx, vprog); + _mesa_exec_vertex_state_program(ctx, vprog, params); } diff --git a/src/mesa/shader/nvvertexec.c b/src/mesa/shader/nvvertexec.c index 19855936593..10962d7e14a 100644 --- a/src/mesa/shader/nvvertexec.c +++ b/src/mesa/shader/nvvertexec.c @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 6.5.2 * * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * @@ -47,23 +47,23 @@ static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; * per-vertex. */ void -_mesa_init_vp_per_vertex_registers(GLcontext *ctx) +_mesa_init_vp_per_vertex_registers(GLcontext *ctx, struct vp_machine *machine) { /* Input registers get initialized from the current vertex attribs */ - MEMCPY(ctx->VertexProgram.Machine.Inputs, ctx->Current.Attrib, + MEMCPY(machine->Inputs, ctx->Current.Attrib, MAX_VERTEX_PROGRAM_ATTRIBS * 4 * sizeof(GLfloat)); if (ctx->VertexProgram.Current->IsNVProgram) { GLuint i; /* Output/result regs are initialized to [0,0,0,1] */ for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) { - ASSIGN_4V(ctx->VertexProgram.Machine.Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F); + ASSIGN_4V(machine->Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F); } /* Temp regs are initialized to [0,0,0,0] */ for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) { - ASSIGN_4V(ctx->VertexProgram.Machine.Temporaries[i], 0.0F, 0.0F, 0.0F, 0.0F); + ASSIGN_4V(machine->Temporaries[i], 0.0F, 0.0F, 0.0F, 0.0F); } - ASSIGN_4V(ctx->VertexProgram.Machine.AddressReg, 0, 0, 0, 0); + ASSIGN_4V(machine->AddressReg, 0, 0, 0, 0); } } @@ -139,7 +139,7 @@ _mesa_init_vp_per_primitive_registers(GLcontext *ctx) continue; } - /* load the matrix */ + /* load the matrix values into sequential registers */ if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) { load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m); } @@ -176,36 +176,37 @@ _mesa_init_vp_per_primitive_registers(GLcontext *ctx) * For debugging. Dump the current vertex program machine registers. */ void -_mesa_dump_vp_state( const struct gl_vertex_program_state *state ) +_mesa_dump_vp_state( const struct gl_vertex_program_state *state, + const struct vp_machine *machine) { int i; _mesa_printf("VertexIn:\n"); for (i = 0; i < MAX_NV_VERTEX_PROGRAM_INPUTS; i++) { _mesa_printf("%d: %f %f %f %f ", i, - state->Machine.Inputs[i][0], - state->Machine.Inputs[i][1], - state->Machine.Inputs[i][2], - state->Machine.Inputs[i][3]); + machine->Inputs[i][0], + machine->Inputs[i][1], + machine->Inputs[i][2], + machine->Inputs[i][3]); } _mesa_printf("\n"); _mesa_printf("VertexOut:\n"); for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) { _mesa_printf("%d: %f %f %f %f ", i, - state->Machine.Outputs[i][0], - state->Machine.Outputs[i][1], - state->Machine.Outputs[i][2], - state->Machine.Outputs[i][3]); + machine->Outputs[i][0], + machine->Outputs[i][1], + machine->Outputs[i][2], + machine->Outputs[i][3]); } _mesa_printf("\n"); _mesa_printf("Registers:\n"); for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) { _mesa_printf("%d: %f %f %f %f ", i, - state->Machine.Temporaries[i][0], - state->Machine.Temporaries[i][1], - state->Machine.Temporaries[i][2], - state->Machine.Temporaries[i][3]); + machine->Temporaries[i][0], + machine->Temporaries[i][1], + machine->Temporaries[i][2], + machine->Temporaries[i][3]); } _mesa_printf("\n"); @@ -227,41 +228,45 @@ _mesa_dump_vp_state( const struct gl_vertex_program_state *state ) * source register. */ static INLINE const GLfloat * -get_register_pointer( const struct prog_src_register *source, - const struct gl_vertex_program_state *state ) +get_register_pointer( GLcontext *ctx, + const struct prog_src_register *source, + struct vp_machine *machine, + const struct gl_vertex_program *program ) { if (source->RelAddr) { - const GLint reg = source->Index + state->Machine.AddressReg[0]; + const GLint reg = source->Index + machine->AddressReg[0]; ASSERT( (source->File == PROGRAM_ENV_PARAM) || (source->File == PROGRAM_STATE_VAR) ); if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS) return ZeroVec; else if (source->File == PROGRAM_ENV_PARAM) - return state->Parameters[reg]; - else - return state->Current->Base.Parameters->ParameterValues[reg]; + return ctx->VertexProgram.Parameters[reg]; + else { + ASSERT(source->File == PROGRAM_LOCAL_PARAM); + return program->Base.Parameters->ParameterValues[reg]; + } } else { switch (source->File) { case PROGRAM_TEMPORARY: ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_TEMPS); - return state->Machine.Temporaries[source->Index]; + return machine->Temporaries[source->Index]; case PROGRAM_INPUT: ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_INPUTS); - return state->Machine.Inputs[source->Index]; + return machine->Inputs[source->Index]; case PROGRAM_OUTPUT: /* This is only needed for the PRINT instruction */ ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_OUTPUTS); - return state->Machine.Outputs[source->Index]; + return machine->Outputs[source->Index]; case PROGRAM_LOCAL_PARAM: ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS); - return state->Current->Base.LocalParams[source->Index]; + return program->Base.LocalParams[source->Index]; case PROGRAM_ENV_PARAM: ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_PARAMS); - return state->Parameters[source->Index]; + return ctx->VertexProgram.Parameters[source->Index]; case PROGRAM_STATE_VAR: - ASSERT(source->Index < state->Current->Base.Parameters->NumParameters); - return state->Current->Base.Parameters->ParameterValues[source->Index]; + ASSERT(source->Index < program->Base.Parameters->NumParameters); + return program->Base.Parameters->ParameterValues[source->Index]; default: _mesa_problem(NULL, "Bad source register file in get_register_pointer"); @@ -277,23 +282,23 @@ get_register_pointer( const struct prog_src_register *source, * Apply swizzling and negating as needed. */ static INLINE void -fetch_vector4( const struct prog_src_register *source, - const struct gl_vertex_program_state *state, +fetch_vector4( GLcontext *ctx, + const struct prog_src_register *source, + struct vp_machine *machine, + const struct gl_vertex_program *program, GLfloat result[4] ) { - const GLfloat *src = get_register_pointer(source, state); - + const GLfloat *src = get_register_pointer(ctx, source, machine, program); + ASSERT(src); + result[0] = src[GET_SWZ(source->Swizzle, 0)]; + result[1] = src[GET_SWZ(source->Swizzle, 1)]; + result[2] = src[GET_SWZ(source->Swizzle, 2)]; + result[3] = src[GET_SWZ(source->Swizzle, 3)]; if (source->NegateBase) { - result[0] = -src[GET_SWZ(source->Swizzle, 0)]; - result[1] = -src[GET_SWZ(source->Swizzle, 1)]; - result[2] = -src[GET_SWZ(source->Swizzle, 2)]; - result[3] = -src[GET_SWZ(source->Swizzle, 3)]; - } - else { - result[0] = src[GET_SWZ(source->Swizzle, 0)]; - result[1] = src[GET_SWZ(source->Swizzle, 1)]; - result[2] = src[GET_SWZ(source->Swizzle, 2)]; - result[3] = src[GET_SWZ(source->Swizzle, 3)]; + result[0] = -result[0]; + result[1] = -result[1]; + result[2] = -result[2]; + result[3] = -result[3]; } } @@ -303,17 +308,17 @@ fetch_vector4( const struct prog_src_register *source, * As above, but only return result[0] element. */ static INLINE void -fetch_vector1( const struct prog_src_register *source, - const struct gl_vertex_program_state *state, +fetch_vector1( GLcontext *ctx, + const struct prog_src_register *source, + struct vp_machine *machine, + const struct gl_vertex_program *program, GLfloat result[4] ) { - const GLfloat *src = get_register_pointer(source, state); - + const GLfloat *src = get_register_pointer(ctx, source, machine, program); + ASSERT(src); + result[0] = src[GET_SWZ(source->Swizzle, 0)]; if (source->NegateBase) { - result[0] = -src[GET_SWZ(source->Swizzle, 0)]; - } - else { - result[0] = src[GET_SWZ(source->Swizzle, 0)]; + result[0] = -result[0]; } } @@ -322,19 +327,21 @@ fetch_vector1( const struct prog_src_register *source, * Store 4 floats into a register. */ static void -store_vector4( const struct prog_dst_register *dest, - struct gl_vertex_program_state *state, +store_vector4( const struct prog_instruction *inst, + struct vp_machine *machine, const GLfloat value[4] ) { + const struct prog_dst_register *dest = &(inst->DstReg); GLfloat *dst; switch (dest->File) { - case PROGRAM_TEMPORARY: - dst = state->Machine.Temporaries[dest->Index]; - break; case PROGRAM_OUTPUT: - dst = state->Machine.Outputs[dest->Index]; + dst = machine->Outputs[dest->Index]; + break; + case PROGRAM_TEMPORARY: + dst = machine->Temporaries[dest->Index]; break; case PROGRAM_ENV_PARAM: + /* Only for VP state programs */ { /* a slight hack */ GET_CURRENT_CONTEXT(ctx); @@ -379,9 +386,10 @@ store_vector4( const struct prog_dst_register *dest, * Execute the given vertex program */ void -_mesa_exec_vertex_program(GLcontext *ctx, const struct gl_vertex_program *program) +_mesa_exec_vertex_program(GLcontext *ctx, + struct vp_machine *machine, + const struct gl_vertex_program *program) { - struct gl_vertex_program_state *state = &ctx->VertexProgram; const struct prog_instruction *inst; ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */ @@ -390,9 +398,9 @@ _mesa_exec_vertex_program(GLcontext *ctx, const struct gl_vertex_program *progra * by the MVP matrix and store in the vertex position result register. */ if (ctx->VertexProgram.Current->IsPositionInvariant) { - TRANSFORM_POINT( ctx->VertexProgram.Machine.Outputs[VERT_RESULT_HPOS], + TRANSFORM_POINT( machine->Outputs[VERT_RESULT_HPOS], ctx->_ModelProjectMatrix.m, - ctx->VertexProgram.Machine.Inputs[VERT_ATTRIB_POS]); + machine->Inputs[VERT_ATTRIB_POS]); /* XXX: This could go elsewhere */ ctx->VertexProgram.Current->Base.OutputsWritten |= VERT_BIT_POS; @@ -411,15 +419,15 @@ _mesa_exec_vertex_program(GLcontext *ctx, const struct gl_vertex_program *progra case OPCODE_MOV: { GLfloat t[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - store_vector4( &inst->DstReg, state, t ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + store_vector4( inst, machine, t ); } break; case OPCODE_LIT: { const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */ GLfloat t[4], lit[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); t[0] = MAX2(t[0], 0.0F); t[1] = MAX2(t[1], 0.0F); t[3] = CLAMP(t[3], -(128.0F - epsilon), (128.0F - epsilon)); @@ -427,32 +435,32 @@ _mesa_exec_vertex_program(GLcontext *ctx, const struct gl_vertex_program *progra lit[1] = t[0]; lit[2] = (t[0] > 0.0) ? (GLfloat) _mesa_pow(t[1], t[3]) : 0.0F; lit[3] = 1.0; - store_vector4( &inst->DstReg, state, lit ); + store_vector4( inst, machine, lit ); } break; case OPCODE_RCP: { GLfloat t[4]; - fetch_vector1( &inst->SrcReg[0], state, t ); + fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t ); if (t[0] != 1.0F) t[0] = 1.0F / t[0]; /* div by zero is infinity! */ t[1] = t[2] = t[3] = t[0]; - store_vector4( &inst->DstReg, state, t ); + store_vector4( inst, machine, t ); } break; case OPCODE_RSQ: { GLfloat t[4]; - fetch_vector1( &inst->SrcReg[0], state, t ); + fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t ); t[0] = INV_SQRTF(FABSF(t[0])); t[1] = t[2] = t[3] = t[0]; - store_vector4( &inst->DstReg, state, t ); + store_vector4( inst, machine, t ); } break; case OPCODE_EXP: { GLfloat t[4], q[4], floor_t0; - fetch_vector1( &inst->SrcReg[0], state, t ); + fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t ); floor_t0 = FLOORF(t[0]); if (floor_t0 > FLT_MAX_EXP) { SET_POS_INFINITY(q[0]); @@ -475,13 +483,13 @@ _mesa_exec_vertex_program(GLcontext *ctx, const struct gl_vertex_program *progra } q[1] = t[0] - floor_t0; q[3] = 1.0F; - store_vector4( &inst->DstReg, state, q ); + store_vector4( inst, machine, q ); } break; case OPCODE_LOG: { GLfloat t[4], q[4], abs_t0; - fetch_vector1( &inst->SrcReg[0], state, t ); + fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t ); abs_t0 = FABSF(t[0]); if (abs_t0 != 0.0F) { /* Since we really can't handle infinite values on VMS @@ -512,147 +520,147 @@ _mesa_exec_vertex_program(GLcontext *ctx, const struct gl_vertex_program *progra SET_NEG_INFINITY(q[2]); } q[3] = 1.0; - store_vector4( &inst->DstReg, state, q ); + store_vector4( inst, machine, q ); } break; case OPCODE_MUL: { GLfloat t[4], u[4], prod[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); prod[0] = t[0] * u[0]; prod[1] = t[1] * u[1]; prod[2] = t[2] * u[2]; prod[3] = t[3] * u[3]; - store_vector4( &inst->DstReg, state, prod ); + store_vector4( inst, machine, prod ); } break; case OPCODE_ADD: { GLfloat t[4], u[4], sum[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); sum[0] = t[0] + u[0]; sum[1] = t[1] + u[1]; sum[2] = t[2] + u[2]; sum[3] = t[3] + u[3]; - store_vector4( &inst->DstReg, state, sum ); + store_vector4( inst, machine, sum ); } break; case OPCODE_DP3: { GLfloat t[4], u[4], dot[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2]; dot[1] = dot[2] = dot[3] = dot[0]; - store_vector4( &inst->DstReg, state, dot ); + store_vector4( inst, machine, dot ); } break; case OPCODE_DP4: { GLfloat t[4], u[4], dot[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3]; dot[1] = dot[2] = dot[3] = dot[0]; - store_vector4( &inst->DstReg, state, dot ); + store_vector4( inst, machine, dot ); } break; case OPCODE_DST: { GLfloat t[4], u[4], dst[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); dst[0] = 1.0F; dst[1] = t[1] * u[1]; dst[2] = t[2]; dst[3] = u[3]; - store_vector4( &inst->DstReg, state, dst ); + store_vector4( inst, machine, dst ); } break; case OPCODE_MIN: { GLfloat t[4], u[4], min[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); min[0] = (t[0] < u[0]) ? t[0] : u[0]; min[1] = (t[1] < u[1]) ? t[1] : u[1]; min[2] = (t[2] < u[2]) ? t[2] : u[2]; min[3] = (t[3] < u[3]) ? t[3] : u[3]; - store_vector4( &inst->DstReg, state, min ); + store_vector4( inst, machine, min ); } break; case OPCODE_MAX: { GLfloat t[4], u[4], max[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); max[0] = (t[0] > u[0]) ? t[0] : u[0]; max[1] = (t[1] > u[1]) ? t[1] : u[1]; max[2] = (t[2] > u[2]) ? t[2] : u[2]; max[3] = (t[3] > u[3]) ? t[3] : u[3]; - store_vector4( &inst->DstReg, state, max ); + store_vector4( inst, machine, max ); } break; case OPCODE_SLT: { GLfloat t[4], u[4], slt[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F; slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F; slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F; slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F; - store_vector4( &inst->DstReg, state, slt ); + store_vector4( inst, machine, slt ); } break; case OPCODE_SGE: { GLfloat t[4], u[4], sge[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F; sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F; sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F; sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F; - store_vector4( &inst->DstReg, state, sge ); + store_vector4( inst, machine, sge ); } break; case OPCODE_MAD: { GLfloat t[4], u[4], v[4], sum[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); - fetch_vector4( &inst->SrcReg[2], state, v ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); + fetch_vector4( ctx, &inst->SrcReg[2], machine, program, v ); sum[0] = t[0] * u[0] + v[0]; sum[1] = t[1] * u[1] + v[1]; sum[2] = t[2] * u[2] + v[2]; sum[3] = t[3] * u[3] + v[3]; - store_vector4( &inst->DstReg, state, sum ); + store_vector4( inst, machine, sum ); } break; case OPCODE_ARL: { GLfloat t[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - state->Machine.AddressReg[0] = (GLint) FLOORF(t[0]); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + machine->AddressReg[0] = (GLint) FLOORF(t[0]); } break; case OPCODE_DPH: { GLfloat t[4], u[4], dot[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3]; dot[1] = dot[2] = dot[3] = dot[0]; - store_vector4( &inst->DstReg, state, dot ); + store_vector4( inst, machine, dot ); } break; case OPCODE_RCC: { GLfloat t[4], u; - fetch_vector1( &inst->SrcReg[0], state, t ); + fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t ); if (t[0] == 1.0F) u = 1.0F; else @@ -674,115 +682,120 @@ _mesa_exec_vertex_program(GLcontext *ctx, const struct gl_vertex_program *progra } } t[0] = t[1] = t[2] = t[3] = u; - store_vector4( &inst->DstReg, state, t ); + store_vector4( inst, machine, t ); } break; case OPCODE_SUB: /* GL_NV_vertex_program1_1 */ { GLfloat t[4], u[4], sum[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); sum[0] = t[0] - u[0]; sum[1] = t[1] - u[1]; sum[2] = t[2] - u[2]; sum[3] = t[3] - u[3]; - store_vector4( &inst->DstReg, state, sum ); + store_vector4( inst, machine, sum ); } break; case OPCODE_ABS: /* GL_NV_vertex_program1_1 */ { GLfloat t[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); if (t[0] < 0.0) t[0] = -t[0]; if (t[1] < 0.0) t[1] = -t[1]; if (t[2] < 0.0) t[2] = -t[2]; if (t[3] < 0.0) t[3] = -t[3]; - store_vector4( &inst->DstReg, state, t ); + store_vector4( inst, machine, t ); } break; case OPCODE_FLR: /* GL_ARB_vertex_program */ { GLfloat t[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); t[0] = FLOORF(t[0]); t[1] = FLOORF(t[1]); t[2] = FLOORF(t[2]); t[3] = FLOORF(t[3]); - store_vector4( &inst->DstReg, state, t ); + store_vector4( inst, machine, t ); } break; case OPCODE_FRC: /* GL_ARB_vertex_program */ { GLfloat t[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); t[0] = t[0] - FLOORF(t[0]); t[1] = t[1] - FLOORF(t[1]); t[2] = t[2] - FLOORF(t[2]); t[3] = t[3] - FLOORF(t[3]); - store_vector4( &inst->DstReg, state, t ); + store_vector4( inst, machine, t ); } break; case OPCODE_EX2: /* GL_ARB_vertex_program */ { GLfloat t[4]; - fetch_vector1( &inst->SrcReg[0], state, t ); + fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t ); t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]); - store_vector4( &inst->DstReg, state, t ); + store_vector4( inst, machine, t ); } break; case OPCODE_LG2: /* GL_ARB_vertex_program */ { GLfloat t[4]; - fetch_vector1( &inst->SrcReg[0], state, t ); + fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t ); t[0] = t[1] = t[2] = t[3] = LOG2(t[0]); - store_vector4( &inst->DstReg, state, t ); + store_vector4( inst, machine, t ); } break; case OPCODE_POW: /* GL_ARB_vertex_program */ { GLfloat t[4], u[4]; - fetch_vector1( &inst->SrcReg[0], state, t ); - fetch_vector1( &inst->SrcReg[1], state, u ); + fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector1( ctx, &inst->SrcReg[1], machine, program, u ); t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]); - store_vector4( &inst->DstReg, state, t ); + store_vector4( inst, machine, t ); } break; case OPCODE_XPD: /* GL_ARB_vertex_program */ { GLfloat t[4], u[4], cross[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); - fetch_vector4( &inst->SrcReg[1], state, u ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); + fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u ); cross[0] = t[1] * u[2] - t[2] * u[1]; cross[1] = t[2] * u[0] - t[0] * u[2]; cross[2] = t[0] * u[1] - t[1] * u[0]; - store_vector4( &inst->DstReg, state, cross ); + store_vector4( inst, machine, cross ); } break; case OPCODE_SWZ: /* GL_ARB_vertex_program */ { const struct prog_src_register *source = &inst->SrcReg[0]; - const GLfloat *src = get_register_pointer(source, state); + const GLfloat *src = get_register_pointer(ctx, source, + machine, program); GLfloat result[4]; GLuint i; /* do extended swizzling here */ for (i = 0; i < 4; i++) { - if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ZERO) + const GLuint swz = GET_SWZ(source->Swizzle, i); + if (swz == SWIZZLE_ZERO) result[i] = 0.0; - else if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ONE) + else if (swz == SWIZZLE_ONE) result[i] = 1.0; - else - result[i] = src[GET_SWZ(source->Swizzle, i)]; + else { + ASSERT(swz >= 0); + ASSERT(swz <= 3); + result[i] = src[swz]; + } if (source->NegateBase & (1 << i)) result[i] = -result[i]; } - store_vector4( &inst->DstReg, state, result ); + store_vector4( inst, machine, result ); } break; case OPCODE_PRINT: if (inst->SrcReg[0].File) { GLfloat t[4]; - fetch_vector4( &inst->SrcReg[0], state, t ); + fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t ); _mesa_printf("%s%g, %g, %g, %g\n", (char *) inst->Data, t[0], t[1], t[2], t[3]); } @@ -805,48 +818,18 @@ _mesa_exec_vertex_program(GLcontext *ctx, const struct gl_vertex_program *progra } - /** -Thoughts on vertex program optimization: - -The obvious thing to do is to compile the vertex program into X86/SSE/3DNow! -assembly code. That will probably be a lot of work. - -Another approach might be to replace the vp_instruction->Opcode field with -a pointer to a specialized C function which executes the instruction. -In particular we can write functions which skip swizzling, negating, -masking, relative addressing, etc. when they're not needed. - -For example: - -void simple_add( struct prog_instruction *inst ) + * Execute a vertex state program. + * \sa _mesa_ExecuteProgramNV + */ +void +_mesa_exec_vertex_state_program(GLcontext *ctx, + struct gl_vertex_program *vprog, + const GLfloat *params) { - GLfloat *sum = machine->Registers[inst->DstReg.Register]; - GLfloat *a = machine->Registers[inst->SrcReg[0].Register]; - GLfloat *b = machine->Registers[inst->SrcReg[1].Register]; - sum[0] = a[0] + b[0]; - sum[1] = a[1] + b[1]; - sum[2] = a[2] + b[2]; - sum[3] = a[3] + b[3]; + struct vp_machine machine; + _mesa_init_vp_per_vertex_registers(ctx, &machine); + _mesa_init_vp_per_primitive_registers(ctx); + COPY_4V(machine.Inputs[VERT_ATTRIB_POS], params); + _mesa_exec_vertex_program(ctx, &machine, vprog); } - -*/ - -/* - -KW: - -A first step would be to 'vectorize' the programs in the same way as -the normal transformation code in the tnl module. Thus each opcode -takes zero or more input vectors (registers) and produces one or more -output vectors. - -These operations would intially be coded in C, with machine-specific -assembly following, as is currently the case for matrix -transformations in the math/ directory. The preprocessing scheme for -selecting simpler operations Brian describes above would also work -here. - -This should give reasonable performance without excessive effort. - -*/ diff --git a/src/mesa/shader/nvvertexec.h b/src/mesa/shader/nvvertexec.h index e0fd46a7662..b1cf31bd3c4 100644 --- a/src/mesa/shader/nvvertexec.h +++ b/src/mesa/shader/nvvertexec.h @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.1 + * Version: 6.5.2 * - * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,16 +28,40 @@ #ifndef NVVERTEXEC_H #define NVVERTEXEC_H + +/** + * Virtual vertex program machine state. + * Only used during program execution. + */ +struct vp_machine +{ + GLfloat Temporaries[MAX_NV_VERTEX_PROGRAM_TEMPS][4]; + GLfloat Inputs[MAX_NV_VERTEX_PROGRAM_INPUTS][4]; + GLuint InputsSize[MAX_NV_VERTEX_PROGRAM_INPUTS]; + GLfloat Outputs[MAX_NV_VERTEX_PROGRAM_OUTPUTS][4]; + GLint AddressReg[4]; +}; + + + extern void -_mesa_init_vp_per_vertex_registers(GLcontext *ctx); +_mesa_init_vp_per_vertex_registers(GLcontext *ctx, struct vp_machine *machine); extern void _mesa_init_vp_per_primitive_registers(GLcontext *ctx); extern void -_mesa_exec_vertex_program(GLcontext *ctx, const struct gl_vertex_program *program); +_mesa_exec_vertex_program(GLcontext *ctx, + struct vp_machine *machine, + const struct gl_vertex_program *program); + +extern void +_mesa_exec_vertex_state_program(GLcontext *ctx, + struct gl_vertex_program *vprog, + const GLfloat *params); extern void -_mesa_dump_vp_state( const struct gl_vertex_program_state *state ); +_mesa_dump_vp_state( const struct gl_vertex_program_state *state, + const struct vp_machine *machine); #endif diff --git a/src/mesa/shader/nvvertparse.c b/src/mesa/shader/nvvertparse.c index f3821d7f43a..ecfe8ec334f 100644 --- a/src/mesa/shader/nvvertparse.c +++ b/src/mesa/shader/nvvertparse.c @@ -1143,7 +1143,7 @@ Parse_InstructionSequence(struct parse_state *parseState, struct prog_instruction *inst = program + parseState->numInst; /* Initialize the instruction */ - _mesa_init_instruction(inst); + _mesa_init_instructions(inst, 1); if (Parse_String(parseState, "MOV")) { if (!Parse_UnaryOpInstruction(parseState, inst, OPCODE_MOV)) diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c index 8ac38ae1199..681584941e7 100644 --- a/src/mesa/shader/program.c +++ b/src/mesa/shader/program.c @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 6.5.1 + * Version: 6.5.2 * * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * @@ -45,7 +45,7 @@ static const char * make_state_string(const GLint stateTokens[6]); -static GLuint +static GLbitfield make_state_flags(const GLint state[]); @@ -485,8 +485,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList, paramList->Parameters[index].StateIndexes[i] = (enum state_index) stateTokens[i]; } - paramList->StateFlags |= - make_state_flags(stateTokens); + paramList->StateFlags |= make_state_flags(stateTokens); } /* free name string here since we duplicated it in add_parameter() */ @@ -580,37 +579,29 @@ _mesa_fetch_state(GLcontext *ctx, const enum state_index state[], { /* state[1] is either 0=front or 1=back side */ const GLuint face = (GLuint) state[1]; + const struct gl_material *mat = &ctx->Light.Material; + ASSERT(face == 0 || face == 1); + /* we rely on tokens numbered so that _BACK_ == _FRONT_+ 1 */ + ASSERT(MAT_ATTRIB_FRONT_AMBIENT + 1 == MAT_ATTRIB_BACK_AMBIENT); + /* XXX we could get rid of this switch entirely with a little + * work in arbprogparse.c's parse_state_single_item(). + */ /* state[2] is the material attribute */ switch (state[2]) { case STATE_AMBIENT: - if (face == 0) - COPY_4V(value, ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]); - else - COPY_4V(value, ctx->Light.Material.Attrib[MAT_ATTRIB_BACK_AMBIENT]); + COPY_4V(value, mat->Attrib[MAT_ATTRIB_FRONT_AMBIENT + face]); return; case STATE_DIFFUSE: - if (face == 0) - COPY_4V(value, ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE]); - else - COPY_4V(value, ctx->Light.Material.Attrib[MAT_ATTRIB_BACK_DIFFUSE]); + COPY_4V(value, mat->Attrib[MAT_ATTRIB_FRONT_DIFFUSE + face]); return; case STATE_SPECULAR: - if (face == 0) - COPY_4V(value, ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SPECULAR]); - else - COPY_4V(value, ctx->Light.Material.Attrib[MAT_ATTRIB_BACK_SPECULAR]); + COPY_4V(value, mat->Attrib[MAT_ATTRIB_FRONT_SPECULAR + face]); return; case STATE_EMISSION: - if (face == 0) - COPY_4V(value, ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]); - else - COPY_4V(value, ctx->Light.Material.Attrib[MAT_ATTRIB_BACK_EMISSION]); + COPY_4V(value, mat->Attrib[MAT_ATTRIB_FRONT_EMISSION + face]); return; case STATE_SHININESS: - if (face == 0) - value[0] = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0]; - else - value[0] = ctx->Light.Material.Attrib[MAT_ATTRIB_BACK_SHININESS][0]; + value[0] = mat->Attrib[MAT_ATTRIB_FRONT_SHININESS + face][0]; value[1] = 0.0F; value[2] = 0.0F; value[3] = 1.0F; @@ -941,10 +932,14 @@ _mesa_fetch_state(GLcontext *ctx, const enum state_index state[], /** - * Return a bit mask of the Mesa state flags under which a parameter's - * value might change. + * Return a bitmask of the Mesa state flags (_NEW_* values) which would + * indicate that the given context state may have changed. + * The bitmask is used during validation to determine if we need to update + * vertex/fragment program parameters (like "state.material.color") when + * some GL state has changed. */ -static GLuint make_state_flags(const GLint state[]) +static GLbitfield +make_state_flags(const GLint state[]) { switch (state[0]) { case STATE_MATERIAL: @@ -1307,26 +1302,32 @@ _mesa_load_state_parameters(GLcontext *ctx, /** * Initialize program instruction fields to defaults. + * \param inst first instruction to initialize + * \param count number of instructions to initialize */ void -_mesa_init_instruction(struct prog_instruction *inst) +_mesa_init_instructions(struct prog_instruction *inst, GLuint count) { - _mesa_bzero(inst, sizeof(struct prog_instruction)); - - inst->SrcReg[0].File = PROGRAM_UNDEFINED; - inst->SrcReg[0].Swizzle = SWIZZLE_NOOP; - inst->SrcReg[1].File = PROGRAM_UNDEFINED; - inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; - inst->SrcReg[2].File = PROGRAM_UNDEFINED; - inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; - - inst->DstReg.File = PROGRAM_UNDEFINED; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->DstReg.CondMask = COND_TR; - inst->DstReg.CondSwizzle = SWIZZLE_NOOP; - - inst->SaturateMode = SATURATE_OFF; - inst->Precision = FLOAT32; + GLuint i; + + _mesa_bzero(inst, count * sizeof(struct prog_instruction)); + + for (i = 0; i < count; i++) { + inst[i].SrcReg[0].File = PROGRAM_UNDEFINED; + inst[i].SrcReg[0].Swizzle = SWIZZLE_NOOP; + inst[i].SrcReg[1].File = PROGRAM_UNDEFINED; + inst[i].SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst[i].SrcReg[2].File = PROGRAM_UNDEFINED; + inst[i].SrcReg[2].Swizzle = SWIZZLE_NOOP; + + inst[i].DstReg.File = PROGRAM_UNDEFINED; + inst[i].DstReg.WriteMask = WRITEMASK_XYZW; + inst[i].DstReg.CondMask = COND_TR; + inst[i].DstReg.CondSwizzle = SWIZZLE_NOOP; + + inst[i].SaturateMode = SATURATE_OFF; + inst[i].Precision = FLOAT32; + } } @@ -2102,7 +2103,9 @@ _mesa_GetProgramRegisterfvMESA(GLenum target, "glGetProgramRegisterfvMESA(registerName)"); return; } - COPY_4V(v, ctx->VertexProgram.Machine.Temporaries[i]); +#if 0 /* FIX ME */ + ctx->Driver.GetVertexProgramRegister(ctx, PROGRAM_TEMPORARY, i, v); +#endif } else if (reg[0] == 'v' && reg[1] == '[') { /* Vertex Input attribute */ @@ -2113,7 +2116,10 @@ _mesa_GetProgramRegisterfvMESA(GLenum target, _mesa_sprintf(number, "%d", i); if (_mesa_strncmp(reg + 2, name, 4) == 0 || _mesa_strncmp(reg + 2, number, _mesa_strlen(number)) == 0) { - COPY_4V(v, ctx->VertexProgram.Machine.Inputs[i]); +#if 0 /* FIX ME */ + ctx->Driver.GetVertexProgramRegister(ctx, PROGRAM_INPUT, + i, v); +#endif return; } } diff --git a/src/mesa/shader/program_instruction.h b/src/mesa/shader/program_instruction.h index 93bcfc240a9..cdec0ceb2a5 100644 --- a/src/mesa/shader/program_instruction.h +++ b/src/mesa/shader/program_instruction.h @@ -343,7 +343,7 @@ struct prog_instruction extern void -_mesa_init_instruction(struct prog_instruction *inst); +_mesa_init_instructions(struct prog_instruction *inst, GLuint count); extern GLuint _mesa_num_inst_src_regs(enum prog_opcode opcode); diff --git a/src/mesa/shader/programopt.c b/src/mesa/shader/programopt.c index 55991dcce34..172d373b578 100644 --- a/src/mesa/shader/programopt.c +++ b/src/mesa/shader/programopt.c @@ -85,8 +85,8 @@ _mesa_insert_mvp_code(GLcontext *ctx, struct gl_vertex_program *vprog) * newInst[2] = DP4 result.position.z, mvp.row[2], vertex.position; * newInst[3] = DP4 result.position.w, mvp.row[3], vertex.position; */ + _mesa_init_instructions(newInst, 4); for (i = 0; i < 4; i++) { - _mesa_init_instruction(newInst + i); newInst[i].Opcode = OPCODE_DP4; newInst[i].DstReg.File = PROGRAM_OUTPUT; newInst[i].DstReg.Index = VERT_RESULT_HPOS; @@ -137,7 +137,7 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) GLfloat fogVals[4]; GLuint fogConsts; /* constant values for EXP, EXP2 mode */ - if (fprog->FogOption != GL_NONE) { + if (fprog->FogOption == GL_NONE) { _mesa_problem(ctx, "_mesa_append_fog_code() called for fragment program" " with FogOption == GL_NONE"); return; @@ -191,8 +191,7 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) } assert(inst->Opcode == OPCODE_END); /* we'll overwrite this inst */ - for (i = 0; i < 6; i++) - _mesa_init_instruction(inst + i); + _mesa_init_instructions(inst, 6); /* emit instructions to compute fog blending factor */ if (fprog->FogOption == GL_LINEAR) { |