diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/tnl/t_vp_build.c | 112 |
1 files changed, 85 insertions, 27 deletions
diff --git a/src/mesa/tnl/t_vp_build.c b/src/mesa/tnl/t_vp_build.c index 35cea8454a5..6ffebb32a4a 100644 --- a/src/mesa/tnl/t_vp_build.c +++ b/src/mesa/tnl/t_vp_build.c @@ -47,7 +47,13 @@ * generated program with line/function references for each * instruction back into this file: */ -#define DISASSEM 0 +#define DISASSEM 1 + +/* Should be tunable by the driver - do we want to do matrix + * multiplications with DP4's or with MUL/MAD's? SSE works better + * with the latter, drivers may differ. + */ +#define PREFER_DP4 1 #define MAX_INSN 200 @@ -423,17 +429,34 @@ static void emit_normalize_vec3( struct tnl_program *p, release_temp(p, tmp); } +static void emit_passthrough( struct tnl_program *p, + GLuint input, + GLuint output ) +{ + struct ureg out = register_output(p, output); + emit_op1(p, VP_OPCODE_MOV, out, 0, register_input(p, input)); +} + static struct ureg get_eye_position( struct tnl_program *p ) { if (is_undef(p->eye_position)) { struct ureg pos = register_input( p, VERT_ATTRIB_POS ); struct ureg modelview[4]; - register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3, - STATE_MATRIX_TRANSPOSE, modelview ); p->eye_position = reserve_temp(p); - emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); + if (PREFER_DP4) { + register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3, + STATE_MATRIX, modelview ); + + emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); + } + else { + register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3, + STATE_MATRIX_TRANSPOSE, modelview ); + + emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); + } } return p->eye_position; @@ -492,9 +515,16 @@ static void build_hpos( struct tnl_program *p ) struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); struct ureg mvp[4]; - register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3, - STATE_MATRIX_TRANSPOSE, mvp ); - emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); + if (PREFER_DP4) { + register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3, + STATE_MATRIX, mvp ); + emit_matrix_transform_vec4( p, hpos, mvp, pos ); + } + else { + register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3, + STATE_MATRIX_TRANSPOSE, mvp ); + emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); + } } @@ -695,6 +725,35 @@ static void build_lighting( struct tnl_program *p ) _bfc1 = _bfc0; } + + /* If no lights, still need to emit the scenecolor. + */ + if (nr_lights == 0) { + { + struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); + emit_op1(p, VP_OPCODE_MOV, res0, 0, _col0); + } + + if (separate) { + struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); + emit_op1(p, VP_OPCODE_MOV, res1, 0, _col1); + } + + if (twoside) { + struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); + emit_op1(p, VP_OPCODE_MOV, res0, 0, _bfc0); + } + + if (twoside && separate) { + struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); + emit_op1(p, VP_OPCODE_MOV, res1, 0, _bfc1); + } + + release_temps(p); + return; + } + + for (i = 0; i < MAX_LIGHTS; i++) { struct gl_light *light = &ctx->Light.Light[i]; @@ -968,9 +1027,9 @@ static void build_texture_transform( struct tnl_program *p ) for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; GLuint texmat_enabled = ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i); - struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); if (texUnit->TexGenEnabled || texmat_enabled) { + struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); struct ureg out_texgen = undef; if (texUnit->TexGenEnabled) { @@ -1053,12 +1112,25 @@ static void build_texture_transform( struct tnl_program *p ) struct ureg in = (!is_undef(out_texgen) ? out_texgen : register_input(p, VERT_ATTRIB_TEX0+i)); - register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i, - 0, 3, 0, texmat ); - emit_matrix_transform_vec4( p, out, texmat, in ); + if (PREFER_DP4) { + register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i, + 0, 3, STATE_MATRIX, texmat ); + emit_matrix_transform_vec4( p, out, texmat, in ); + } + else { + register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i, + 0, 3, STATE_MATRIX_TRANSPOSE, texmat ); + emit_matrix_transform_vec4( p, out, texmat, in ); + } } release_temps(p); + } + else if (texUnit->_ReallyEnabled) { + /* KW: _ReallyEnabled isn't sufficient? Need to know whether + * this texture unit is referenced by the fragment shader. + */ + emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i); } } } @@ -1093,11 +1165,6 @@ static void build_pointsize( struct tnl_program *p ) } -static void build_passthrough( struct tnl_program *p, GLuint inputs ) -{ -} - - static GLboolean programs_eq( struct vertex_program *a, struct vertex_program *b ) { @@ -1123,7 +1190,6 @@ static GLboolean programs_eq( struct vertex_program *a, void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx ) { - TNLcontext *tnl = TNL_CONTEXT(ctx); struct tnl_program p; if (ctx->VertexProgram._Enabled) @@ -1170,6 +1236,8 @@ void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx ) */ if (ctx->Light.Enabled) build_lighting(&p); + else + emit_passthrough(&p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0); if (ctx->Fog.Enabled) build_fog(&p); @@ -1180,16 +1248,6 @@ void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx ) if (ctx->Point._Attenuated) build_pointsize(&p); - /* Is there a need to copy inputs to outputs? The software - * implementation might do this more efficiently by just assigning - * the missing results to point at input arrays. - */ - if (/* tnl->vp_copy_inputs && */ - (tnl->render_inputs & ~p.program->OutputsWritten)) { - build_passthrough(&p, tnl->render_inputs); - } - - /* Finish up: */ emit_op1(&p, VP_OPCODE_END, undef, 0, undef); |