diff options
author | Keith Whitwell <[email protected]> | 2008-10-10 15:19:05 +0100 |
---|---|---|
committer | Keith Whitwell <[email protected]> | 2008-10-10 15:23:36 +0100 |
commit | d7f1cb5b5a134b63227d5746a2dd1f05597c5c2f (patch) | |
tree | 7446e243da8b50f6cb323b7917bdb94fbd528368 /src | |
parent | 7ac1fc77661faf0897507fef0437fe69d0ba53ac (diff) | |
parent | f7556fdd40ed2719beaba271eee4a7551e212ad1 (diff) |
Merge commit 'origin/gallium-0.1' into gallium-0.2
Conflicts:
src/gallium/auxiliary/gallivm/instructionssoa.cpp
src/gallium/auxiliary/gallivm/soabuiltins.c
src/gallium/auxiliary/rtasm/rtasm_x86sse.c
src/gallium/auxiliary/rtasm/rtasm_x86sse.h
src/mesa/main/texenvprogram.c
src/mesa/shader/arbprogparse.c
src/mesa/shader/prog_statevars.c
src/mesa/state_tracker/st_draw.c
src/mesa/vbo/vbo_exec_draw.c
Diffstat (limited to 'src')
36 files changed, 455 insertions, 122 deletions
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 36751c26211..41a4cba1ddd 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -272,6 +272,14 @@ draw_enable_point_sprites(struct draw_context *draw, boolean enable) } +void +draw_set_force_passthrough( struct draw_context *draw, boolean enable ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->force_passthrough = enable; +} + + /** * Ask the draw module for the location/slot of the given vertex attribute in * a post-transformed vertex. diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 0ab3681b647..3eeb4535311 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -160,6 +160,9 @@ void draw_set_render( struct draw_context *draw, void draw_set_driver_clipping( struct draw_context *draw, boolean bypass_clipping ); +void draw_set_force_passthrough( struct draw_context *draw, + boolean enable ); + /******************************************************************************* * Draw pipeline */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index c0cf4269dbb..9825e116c32 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -231,9 +231,9 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) unsigned emit_sz = 0; unsigned src_buffer = 0; unsigned output_format; - unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) ); + unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) ); - switch (vbuf->vinfo->emit[i]) { + switch (vbuf->vinfo->attrib[i].emit) { case EMIT_4F: output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 40a72c9dcfc..5d531146c5f 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -163,12 +163,15 @@ struct draw_context struct { boolean bypass_clipping; + boolean bypass_vs; } driver; boolean flushing; /**< debugging/sanity */ boolean suspend_flushing; /**< internally set */ boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */ + boolean force_passthrough; /**< never clip or shade */ + /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 669c11c993c..87ec6ae20c2 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -69,26 +69,26 @@ draw_pt_arrays(struct draw_context *draw, return TRUE; } - - if (!draw->render) { - opt |= PT_PIPELINE; - } - - if (draw_need_pipeline(draw, - draw->rasterizer, - prim)) { - opt |= PT_PIPELINE; - } - - if (!draw->bypass_clipping && !draw->pt.test_fse) { - opt |= PT_CLIPTEST; + if (!draw->force_passthrough) { + if (!draw->render) { + opt |= PT_PIPELINE; + } + + if (draw_need_pipeline(draw, + draw->rasterizer, + prim)) { + opt |= PT_PIPELINE; + } + + if (!draw->bypass_clipping && !draw->pt.test_fse) { + opt |= PT_CLIPTEST; + } + + if (!draw->rasterizer->bypass_vs) { + opt |= PT_SHADE; + } } - - if (!draw->rasterizer->bypass_vs) { - opt |= PT_SHADE; - } - - + if (opt == 0) middle = draw->pt.middle.fetch_emit; else if (opt == PT_SHADE && !draw->pt.no_fse) diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index d4eca80588b..d520b05869b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -84,11 +84,11 @@ void draw_pt_emit_prepare( struct pt_emit *emit, unsigned emit_sz = 0; unsigned src_buffer = 0; unsigned output_format; - unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) ); + unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) ); - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_4F: output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 5a4db6cfe56..3966ad48ba7 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -121,7 +121,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, memset(&key, 0, sizeof(key)); for (i = 0; i < vinfo->num_attribs; i++) { - const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->src_index[i]]; + const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->attrib[i].src_index]; unsigned emit_sz = 0; unsigned input_format = src->src_format; @@ -129,7 +129,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned input_offset = src->src_offset; unsigned output_format; - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_4F: output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index a0e08dd10aa..f7e6a1a8eeb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -133,7 +133,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, for (i = 0; i < vinfo->num_attribs; i++) { unsigned emit_sz = 0; - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_4F: emit_sz = 4 * sizeof(float); break; @@ -161,8 +161,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle, * numbers, not to positions in the hw vertex description -- * that's handled by the output_offset field. */ - fse->key.element[i].out.format = vinfo->emit[i]; - fse->key.element[i].out.vs_output = vinfo->src_index[i]; + fse->key.element[i].out.format = vinfo->attrib[i].emit; + fse->key.element[i].out.vs_output = vinfo->attrib[i].src_index; fse->key.element[i].out.offset = dst_offset; dst_offset += emit_sz; diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 1446f785c51..3214213e445 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -49,7 +49,7 @@ draw_compute_vertex_size(struct vertex_info *vinfo) vinfo->size = 0; for (i = 0; i < vinfo->num_attribs; i++) { - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_OMIT: break; case EMIT_4UB: @@ -81,8 +81,8 @@ draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) unsigned i, j; for (i = 0; i < vinfo->num_attribs; i++) { - j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { + j = vinfo->attrib[i].src_index; + switch (vinfo->attrib[i].emit) { case EMIT_OMIT: debug_printf("EMIT_OMIT:"); break; diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 16c65c43175..a943607d7ed 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -75,12 +75,41 @@ struct vertex_info { uint num_attribs; uint hwfmt[4]; /**< hardware format info for this format */ - enum interp_mode interp_mode[PIPE_MAX_SHADER_INPUTS]; - enum attrib_emit emit[PIPE_MAX_SHADER_INPUTS]; /**< EMIT_x */ - uint src_index[PIPE_MAX_SHADER_INPUTS]; /**< map to post-xform attribs */ uint size; /**< total vertex size in dwords */ + + /* Keep this small and at the end of the struct to allow quick + * memcmp() comparisons. + */ + struct { + ubyte interp_mode:4; /**< INTERP_x */ + ubyte emit:4; /**< EMIT_x */ + ubyte src_index; /**< map to post-xform attribs */ + } attrib[PIPE_MAX_SHADER_INPUTS]; }; +static INLINE int +draw_vinfo_size( const struct vertex_info *a ) +{ + return ((const char *)&a->attrib[a->num_attribs] - + (const char *)a); +} + +static INLINE int +draw_vinfo_compare( const struct vertex_info *a, + const struct vertex_info *b ) +{ + unsigned sizea = draw_vinfo_size( a ); + return memcmp( a, b, sizea ); +} + +static INLINE void +draw_vinfo_copy( struct vertex_info *dst, + const struct vertex_info *src ) +{ + unsigned size = draw_vinfo_size( src ); + memcpy( dst, src, size ); +} + /** @@ -91,14 +120,15 @@ struct vertex_info */ static INLINE uint draw_emit_vertex_attr(struct vertex_info *vinfo, - enum attrib_emit emit, enum interp_mode interp, + enum attrib_emit emit, + enum interp_mode interp, /* only used by softpipe??? */ uint src_index) { const uint n = vinfo->num_attribs; assert(n < PIPE_MAX_SHADER_INPUTS); - vinfo->emit[n] = emit; - vinfo->interp_mode[n] = interp; - vinfo->src_index[n] = src_index; + vinfo->attrib[n].emit = emit; + vinfo->attrib[n].interp_mode = interp; + vinfo->attrib[n].src_index = src_index; vinfo->num_attribs++; return n; } diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index b20f3c49630..cb85e1734ec 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -167,6 +167,7 @@ void min(float4 *res, res[3] = minvec(tmp0w, tmp1w); } + void max(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index a5abbcde49f..99ee74cf14b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -371,7 +371,11 @@ void x86_jcc( struct x86_function *p, DUMP_I(cc); if (offset < 0) { - assert(p->csr - p->store > -offset); + /*assert(p->csr - p->store > -offset);*/ + if (p->csr - p->store <= -offset) { + /* probably out of memory (using the error_overflow buffer) */ + return; + } } if (offset <= 127 && offset >= -128) { @@ -699,6 +703,18 @@ void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) emit_modrm_noreg(p, 2, ptr); } +void sse_movntps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src) +{ + DUMP_RR( dst, src ); + + assert(dst.mod != mod_REG); + assert(src.mod == mod_REG); + emit_2ub(p, 0x0f, 0x2b); + emit_modrm(p, src, dst); +} + diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 86091e7f6b1..1b5eaaca850 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -190,6 +190,8 @@ void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); +void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); + void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index d194c2fb158..8f1f58b2dd1 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -77,9 +77,9 @@ emit_hw_vertex( struct i915_context *i915, assert(!i915->dirty); for (i = 0; i < vinfo->num_attribs; i++) { - const uint j = vinfo->src_index[i]; + const uint j = vinfo->attrib[i].src_index; const float *attrib = vertex->data[j]; - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_1F: OUT_BATCH( fui(attrib[0]) ); count++; diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 488615067c5..178d4e8781d 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -88,12 +88,12 @@ static void calculate_vertex_layout( struct i915_context *i915 ) if (needW) { draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZW; - vinfo.emit[0] = EMIT_4F; + vinfo.attrib[0].emit = EMIT_4F; } else { draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZ; - vinfo.emit[0] = EMIT_3F; + vinfo.attrib[0].emit = EMIT_3F; } /* hardware point size */ diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index bc8263c33e3..13d80173937 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -773,10 +773,10 @@ static void setup_tri_coefficients( struct setup_context *setup ) /* setup interpolation for all the remaining attributes: */ for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; + const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; - switch (vinfo->interp_mode[fragSlot]) { + switch (vinfo->attrib[fragSlot].interp_mode) { case INTERP_CONSTANT: for (j = 0; j < NUM_CHANNELS; j++) const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); @@ -1084,10 +1084,10 @@ setup_line_coefficients(struct setup_context *setup, /* setup interpolation for all the remaining attributes: */ for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; + const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; - switch (vinfo->interp_mode[fragSlot]) { + switch (vinfo->attrib[fragSlot].interp_mode) { case INTERP_CONSTANT: for (j = 0; j < NUM_CHANNELS; j++) const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); @@ -1331,10 +1331,10 @@ setup_point( struct setup_context *setup, const_coeff(setup, &setup->posCoef, 0, 3); for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; + const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; - switch (vinfo->interp_mode[fragSlot]) { + switch (vinfo->attrib[fragSlot].interp_mode) { case INTERP_CONSTANT: /* fall-through */ case INTERP_LINEAR: diff --git a/src/mesa/main/api_exec.c b/src/mesa/main/api_exec.c index 0c3c9c4de49..bae3bf11cb5 100644 --- a/src/mesa/main/api_exec.c +++ b/src/mesa/main/api_exec.c @@ -58,7 +58,7 @@ #include "colortab.h" #endif #include "context.h" -#if FEATURE_convolution +#if FEATURE_convolve #include "convolve.h" #endif #include "depth.h" @@ -402,7 +402,7 @@ _mesa_init_exec_table(struct _glapi_table *exec) SET_GetColorTableParameteriv(exec, _mesa_GetColorTableParameteriv); #endif -#if FEATURE_convolution +#if FEATURE_convolve SET_ConvolutionFilter1D(exec, _mesa_ConvolutionFilter1D); SET_ConvolutionFilter2D(exec, _mesa_ConvolutionFilter2D); SET_ConvolutionParameterf(exec, _mesa_ConvolutionParameterf); diff --git a/src/mesa/main/mfeatures.h b/src/mesa/main/mfeatures.h index ed78f57edf6..3819da3d680 100644 --- a/src/mesa/main/mfeatures.h +++ b/src/mesa/main/mfeatures.h @@ -39,7 +39,7 @@ #define FEATURE_accum _HAVE_FULL_GL #define FEATURE_attrib_stack _HAVE_FULL_GL #define FEATURE_colortable _HAVE_FULL_GL -#define FEATURE_convolution _HAVE_FULL_GL +#define FEATURE_convolve _HAVE_FULL_GL #define FEATURE_dispatch _HAVE_FULL_GL #define FEATURE_dlist _HAVE_FULL_GL #define FEATURE_draw_read_buffer _HAVE_FULL_GL diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 8e4f6a2e663..8bf6858c1e3 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2722,6 +2722,7 @@ struct gl_matrix_stack #define _NEW_MULTISAMPLE 0x2000000 /**< __GLcontextRec::Multisample */ #define _NEW_TRACK_MATRIX 0x4000000 /**< __GLcontextRec::VertexProgram */ #define _NEW_PROGRAM 0x8000000 /**< __GLcontextRec::VertexProgram */ +#define _NEW_CURRENT_ATTRIB 0x10000000 /**< __GLcontextRec::Current */ #define _NEW_ALL ~0 /*@}*/ @@ -3049,6 +3050,8 @@ struct __GLcontextRec GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */ GLbitfield NewState; /**< bitwise-or of _NEW_* flags */ + GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */ + /** \name Derived state */ /*@{*/ /** Bitwise-or of DD_* flags. Note that this bitfield may be used before diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c index a1e32e70ba9..2d060300300 100644 --- a/src/mesa/main/queryobj.c +++ b/src/mesa/main/queryobj.c @@ -95,7 +95,7 @@ _mesa_wait_query(GLcontext *ctx, struct gl_query_object *q) * XXX maybe add Delete() method to gl_query_object class and call that instead */ void -_mesa_delete_query(struct gl_query_object *q) +_mesa_delete_query(GLcontext *ctx, struct gl_query_object *q) { _mesa_free(q); } diff --git a/src/mesa/main/queryobj.h b/src/mesa/main/queryobj.h index c05a1f3da82..9a9774641bb 100644 --- a/src/mesa/main/queryobj.h +++ b/src/mesa/main/queryobj.h @@ -37,7 +37,7 @@ extern void _mesa_free_query_data(GLcontext *ctx); extern void -_mesa_delete_query(struct gl_query_object *q); +_mesa_delete_query(GLcontext *ctx, struct gl_query_object *q); extern void _mesa_begin_query(GLcontext *ctx, struct gl_query_object *q); diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 5913019bc12..7e44310d8d3 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -447,6 +447,9 @@ _mesa_update_state_locked( GLcontext *ctx ) GLbitfield new_state = ctx->NewState; GLbitfield prog_flags = _NEW_PROGRAM; + if (new_state == _NEW_CURRENT_ATTRIB) + goto out; + if (MESA_VERBOSE & VERBOSE_STATE) _mesa_print_state("_mesa_update_state", new_state); @@ -510,7 +513,8 @@ _mesa_update_state_locked( GLcontext *ctx ) _mesa_update_tnl_spaces( ctx, new_state ); if (ctx->FragmentProgram._MaintainTexEnvProgram) { - prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR); + prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE_MATRIX | _NEW_LIGHT | + _NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR); } if (ctx->VertexProgram._MaintainTnlProgram) { prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX | @@ -532,6 +536,7 @@ _mesa_update_state_locked( GLcontext *ctx ) * Set ctx->NewState to zero to avoid recursion if * Driver.UpdateState() has to call FLUSH_VERTICES(). (fixed?) */ + out: new_state = ctx->NewState; ctx->NewState = 0; ctx->Driver.UpdateState(ctx, new_state); @@ -548,3 +553,38 @@ _mesa_update_state( GLcontext *ctx ) _mesa_update_state_locked(ctx); _mesa_unlock_context_textures(ctx); } + + + + +/* Want to figure out which fragment program inputs are actually + * constant/current values from ctx->Current. These should be + * referenced as a tracked state variable rather than a fragment + * program input, to save the overhead of putting a constant value in + * every submitted vertex, transferring it to hardware, interpolating + * it across the triangle, etc... + * + * When there is a VP bound, just use vp->outputs. But when we're + * generating vp from fixed function state, basically want to + * calculate: + * + * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) | + * potential_vp_outputs ) + * + * Where potential_vp_outputs is calculated by looking at enabled + * texgen, etc. + * + * The generated fragment program should then only declare inputs that + * may vary or otherwise differ from the ctx->Current values. + * Otherwise, the fp should track them as state values instead. + */ +void +_mesa_set_varying_vp_inputs( GLcontext *ctx, + GLbitfield varying_inputs ) +{ + if (ctx->varying_vp_inputs != varying_inputs) { + ctx->varying_vp_inputs = varying_inputs; + ctx->NewState |= _NEW_ARRAY; + //_mesa_printf("%s %x\n", __FUNCTION__, varying_inputs); + } +} diff --git a/src/mesa/main/state.h b/src/mesa/main/state.h index bb7cb8f32a3..79f2f6beb0c 100644 --- a/src/mesa/main/state.h +++ b/src/mesa/main/state.h @@ -37,5 +37,8 @@ _mesa_update_state( GLcontext *ctx ); extern void _mesa_update_state_locked( GLcontext *ctx ); +void +_mesa_set_varying_vp_inputs( GLcontext *ctx, + GLbitfield varying_inputs ); #endif diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index f6bbbcfaede..c23173014de 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -55,15 +55,17 @@ struct texenvprog_cache_item #define DISASSEM (MESA_VERBOSE & VERBOSE_DISASSEM) struct mode_opt { - GLuint Source:4; - GLuint Operand:3; + GLubyte Source:4; + GLubyte Operand:3; }; struct state_key { - GLbitfield enabled_units; + GLuint nr_enabled_units:8; + GLuint enabled_units:8; GLuint separate_specular:1; GLuint fog_enabled:1; GLuint fog_mode:2; + GLuint inputs_available:12; struct { GLuint enabled:1; @@ -74,10 +76,10 @@ struct state_key { GLuint NumArgsRGB:2; GLuint ModeRGB:4; - struct mode_opt OptRGB[3]; - GLuint NumArgsA:2; GLuint ModeA:4; + + struct mode_opt OptRGB[3]; struct mode_opt OptA[3]; } unit[8]; }; @@ -199,6 +201,66 @@ static GLuint translate_tex_src_bit( GLbitfield bit ) } } +#define VERT_BIT_TEX_ANY (0xff << VERT_ATTRIB_TEX0) +#define VERT_RESULT_TEX_ANY (0xff << VERT_RESULT_TEX0) + +/** + * Identify all possible varying inputs. The fragment program will + * never reference non-varying inputs, but will track them via state + * constants instead. + * + * This function figures out all the inputs that the fragment program + * has access to. The bitmask is later reduced to just those which + * are actually referenced. + */ +static GLbitfield get_fp_input_mask( GLcontext *ctx ) +{ + GLbitfield fp_inputs = 0x0; + + if (!ctx->VertexProgram._Enabled || + !ctx->VertexProgram._Current) { + + /* Fixed function logic */ + GLbitfield varying_inputs = ctx->varying_vp_inputs; + + /* First look at what values may be computed by the generated + * vertex program: + */ + if (ctx->Light.Enabled) { + fp_inputs |= FRAG_BIT_COL0; + + if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) + fp_inputs |= FRAG_BIT_COL1; + } + + fp_inputs |= (ctx->Texture._TexGenEnabled | + ctx->Texture._TexMatEnabled) << FRAG_ATTRIB_TEX0; + + /* Then look at what might be varying as a result of enabled + * arrays, etc: + */ + if (varying_inputs & VERT_BIT_COLOR0) fp_inputs |= FRAG_BIT_COL0; + if (varying_inputs & VERT_BIT_COLOR1) fp_inputs |= FRAG_BIT_COL1; + + fp_inputs |= (((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0) + << FRAG_ATTRIB_TEX0); + + } + else { + /* calculate from vp->outputs */ + GLbitfield vp_outputs = ctx->VertexProgram._Current->Base.OutputsWritten; + + if (vp_outputs & (1 << VERT_RESULT_COL0)) fp_inputs |= FRAG_BIT_COL0; + if (vp_outputs & (1 << VERT_RESULT_COL1)) fp_inputs |= FRAG_BIT_COL1; + + fp_inputs |= (((vp_outputs & VERT_RESULT_TEX_ANY) >> VERT_RESULT_TEX0) + << FRAG_ATTRIB_TEX0); + } + + return fp_inputs; +} + + /** * Examine current texture environment state and generate a unique * key to identify it. @@ -206,7 +268,9 @@ static GLuint translate_tex_src_bit( GLbitfield bit ) static void make_state_key( GLcontext *ctx, struct state_key *key ) { GLuint i, j; - + GLbitfield inputs_referenced = FRAG_BIT_COL0; + GLbitfield inputs_available = get_fp_input_mask( ctx ); + memset(key, 0, sizeof(*key)); for (i=0;i<MAX_TEXTURE_UNITS;i++) { @@ -217,6 +281,8 @@ static void make_state_key( GLcontext *ctx, struct state_key *key ) key->unit[i].enabled = 1; key->enabled_units |= (1<<i); + key->nr_enabled_units = i+1; + inputs_referenced |= FRAG_BIT_TEX(i); key->unit[i].source_index = translate_tex_src_bit(texUnit->_ReallyEnabled); @@ -245,16 +311,22 @@ static void make_state_key( GLcontext *ctx, struct state_key *key ) } } - if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) + if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { key->separate_specular = 1; + inputs_referenced |= FRAG_BIT_COL1; + } if (ctx->Fog.Enabled) { key->fog_enabled = 1; key->fog_mode = translate_fog_mode(ctx->Fog.Mode); + inputs_referenced |= FRAG_BIT_FOGC; /* maybe */ } + + key->inputs_available = (inputs_available & inputs_referenced); } -/* Use uregs to represent registers internally, translate to Mesa's +/** + * Use uregs to represent registers internally, translate to Mesa's * expected formats on emit. * * NOTE: These are passed by value extensively in this file rather @@ -287,16 +359,16 @@ static const struct ureg undef = { }; -/* State used to build the fragment program: +/** State used to build the fragment program: */ struct texenv_fragment_program { struct gl_fragment_program *program; GLcontext *ctx; struct state_key *state; - GLbitfield alu_temps; /* Track texture indirections, see spec. */ - GLbitfield temps_output; /* Track texture indirections, see spec. */ - GLbitfield temp_in_use; /* Tracks temporary regs which are in use. */ + GLbitfield alu_temps; /**< Track texture indirections, see spec. */ + GLbitfield temps_output; /**< Track texture indirections, see spec. */ + GLbitfield temp_in_use; /**< Tracks temporary regs which are in use. */ GLboolean error; struct ureg src_texture[MAX_TEXTURE_UNITS]; @@ -304,11 +376,11 @@ struct texenv_fragment_program { * else undef. */ - struct ureg src_previous; /* Reg containing color from previous + struct ureg src_previous; /**< Reg containing color from previous * stage. May need to be decl'd. */ - GLuint last_tex_stage; /* Number of last enabled texture unit */ + GLuint last_tex_stage; /**< Number of last enabled texture unit */ struct ureg half; struct ureg one; @@ -457,11 +529,29 @@ static struct ureg register_param5( struct texenv_fragment_program *p, #define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) +static GLuint frag_to_vert_attrib( GLuint attrib ) +{ + switch (attrib) { + case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0; + case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1; + default: + assert(attrib >= FRAG_ATTRIB_TEX0); + assert(attrib <= FRAG_ATTRIB_TEX7); + return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0; + } +} + static struct ureg register_input( struct texenv_fragment_program *p, GLuint input ) { - p->program->Base.InputsRead |= (1 << input); - return make_ureg(PROGRAM_INPUT, input); + if (p->state->inputs_available & (1<<input)) { + p->program->Base.InputsRead |= (1 << input); + return make_ureg(PROGRAM_INPUT, input); + } + else { + GLuint idx = frag_to_vert_attrib( input ); + return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, idx ); + } } diff --git a/src/mesa/shader/prog_cache.c b/src/mesa/shader/prog_cache.c index 36a25377c55..9437e596138 100644 --- a/src/mesa/shader/prog_cache.c +++ b/src/mesa/shader/prog_cache.c @@ -44,6 +44,7 @@ struct cache_item struct gl_program_cache { struct cache_item **items; + struct cache_item *last; GLuint size, n_items; }; @@ -83,6 +84,8 @@ rehash(struct gl_program_cache *cache) struct cache_item *c, *next; GLuint size, i; + cache->last = NULL; + size = cache->size * 3; items = (struct cache_item**) _mesa_malloc(size * sizeof(*items)); _mesa_memset(items, 0, size * sizeof(*items)); @@ -105,6 +108,8 @@ clear_cache(GLcontext *ctx, struct gl_program_cache *cache) { struct cache_item *c, *next; GLuint i; + + cache->last = NULL; for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { @@ -149,18 +154,26 @@ _mesa_delete_program_cache(GLcontext *ctx, struct gl_program_cache *cache) struct gl_program * -_mesa_search_program_cache(const struct gl_program_cache *cache, +_mesa_search_program_cache(struct gl_program_cache *cache, const void *key, GLuint keysize) { - const GLuint hash = hash_key(key, keysize); - struct cache_item *c; - - for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->hash == hash && memcmp(c->key, key, keysize) == 0) - return c->program; + if (cache->last && + memcmp(cache->last->key, key, keysize) == 0) { + return cache->last->program; } + else { + const GLuint hash = hash_key(key, keysize); + struct cache_item *c; + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->hash == hash && memcmp(c->key, key, keysize) == 0) { + cache->last = c; + return c->program; + } + } - return NULL; + return NULL; + } } diff --git a/src/mesa/shader/prog_cache.h b/src/mesa/shader/prog_cache.h index a8c91fba011..4e1ccac03ff 100644 --- a/src/mesa/shader/prog_cache.h +++ b/src/mesa/shader/prog_cache.h @@ -42,7 +42,7 @@ _mesa_delete_program_cache(GLcontext *ctx, struct gl_program_cache *pc); extern struct gl_program * -_mesa_search_program_cache(const struct gl_program_cache *cache, +_mesa_search_program_cache(struct gl_program_cache *cache, const void *key, GLuint keysize); extern void diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c index d4e31207e84..34c47413506 100644 --- a/src/mesa/shader/prog_statevars.c +++ b/src/mesa/shader/prog_statevars.c @@ -395,6 +395,12 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], case STATE_INTERNAL: switch (state[1]) { + case STATE_CURRENT_ATTRIB: { + const GLuint idx = (GLuint) state[2]; + COPY_4V(value, ctx->Current.Attrib[idx]); + return; + } + case STATE_NORMAL_SCALE: ASSIGN_4V(value, ctx->_ModelViewInvScale, @@ -501,6 +507,9 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], } return; + /* XXX: make sure new tokens added here are also handled in the + * _mesa_program_state_flags() switch, below. + */ default: /* unknown state indexes are silently ignored * should be handled by the driver. @@ -574,11 +583,29 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH]) case STATE_INTERNAL: switch (state[1]) { + case STATE_CURRENT_ATTRIB: + return _NEW_CURRENT_ATTRIB; + + case STATE_NORMAL_SCALE: + return _NEW_MODELVIEW; + case STATE_TEXRECT_SCALE: case STATE_SHADOW_AMBIENT: return _NEW_TEXTURE; case STATE_FOG_PARAMS_OPTIMIZED: return _NEW_FOG; + case STATE_LIGHT_SPOT_DIR_NORMALIZED: + case STATE_LIGHT_POSITION: + case STATE_LIGHT_POSITION_NORMALIZED: + case STATE_LIGHT_HALF_VECTOR: + return _NEW_LIGHT; + + case STATE_PT_SCALE: + case STATE_PT_BIAS: + case STATE_PCM_SCALE: + case STATE_PCM_BIAS: + return _NEW_PIXEL; + default: /* unknown state indexes are silently ignored and * no flag set, since it is handled by the driver. diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h index 20643ca7947..72e51f40314 100644 --- a/src/mesa/shader/prog_statevars.h +++ b/src/mesa/shader/prog_statevars.h @@ -104,6 +104,7 @@ typedef enum gl_state_index_ { STATE_LOCAL, STATE_INTERNAL, /* Mesa additions */ + STATE_CURRENT_ATTRIB, /* ctx->Current vertex attrib value */ STATE_NORMAL_SCALE, STATE_TEXRECT_SCALE, STATE_FOG_PARAMS_OPTIMIZED, /* for faster fog calc */ diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c index f120c20bdf2..5d21cff6721 100644 --- a/src/mesa/shader/program.c +++ b/src/mesa/shader/program.c @@ -686,17 +686,47 @@ _mesa_combine_programs(GLcontext *ctx, if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprogA, *fprogB, *newFprog; + GLbitfield progB_inputsRead = progB->InputsRead; + GLint progB_colorFile, progB_colorIndex; + fprogA = (struct gl_fragment_program *) progA; fprogB = (struct gl_fragment_program *) progB; newFprog = (struct gl_fragment_program *) newProg; newFprog->UsesKill = fprogA->UsesKill || fprogB->UsesKill; + /* We'll do a search and replace for instances + * of progB_colorFile/progB_colorIndex below... + */ + progB_colorFile = PROGRAM_INPUT; + progB_colorIndex = FRAG_ATTRIB_COL0; + + /* + * The fragment program may get color from a state var rather than + * a fragment input (vertex output) if it's constant. + * See the texenvprogram.c code. + * So, search the program's parameter list now to see if the program + * gets color from a state var instead of a conventional fragment + * input register. + */ + for (i = 0; i < progB->Parameters->NumParameters; i++) { + struct gl_program_parameter *p = &progB->Parameters->Parameters[i]; + if (p->Type == PROGRAM_STATE_VAR && + p->StateIndexes[0] == STATE_INTERNAL && + p->StateIndexes[1] == STATE_CURRENT_ATTRIB && + p->StateIndexes[2] == VERT_ATTRIB_COLOR0) { + progB_inputsRead |= FRAG_BIT_COL0; + progB_colorFile = PROGRAM_STATE_VAR; + progB_colorIndex = i; + break; + } + } + /* Connect color outputs of fprogA to color inputs of fprogB, via a * new temporary register. */ if ((progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) && - (progB->InputsRead & (1 << FRAG_ATTRIB_COL0))) { + (progB_inputsRead & FRAG_BIT_COL0)) { GLint tempReg = _mesa_find_free_register(newProg, PROGRAM_TEMPORARY); if (tempReg < 0) { _mesa_problem(ctx, "No free temp regs found in " @@ -707,13 +737,14 @@ _mesa_combine_programs(GLcontext *ctx, replace_registers(newInst, lenA, PROGRAM_OUTPUT, FRAG_RESULT_COLR, PROGRAM_TEMPORARY, tempReg); - /* replace reads from input.color[0] with tempReg */ + /* replace reads from the input color with tempReg */ replace_registers(newInst + lenA, lenB, - PROGRAM_INPUT, FRAG_ATTRIB_COL0, - PROGRAM_TEMPORARY, tempReg); + progB_colorFile, progB_colorIndex, /* search for */ + PROGRAM_TEMPORARY, tempReg /* replace with */ ); } - inputsB = progB->InputsRead; + /* compute combined program's InputsRead */ + inputsB = progB_inputsRead; if (progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) { inputsB &= ~(1 << FRAG_ATTRIB_COL0); } diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index fc47896c242..5eef4ebe92e 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -215,6 +215,9 @@ static void update_raster_state( struct st_context *st ) raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_NONE; } } + + /* ST_NEW_VERTEX_PROGRAM + */ if (vertProg) { if (vertProg->Base.Id == 0) { if (vertProg->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { @@ -277,7 +280,7 @@ const struct st_tracked_state st_update_rasterizer = { _NEW_POLYGON | _NEW_PROGRAM | _NEW_SCISSOR), /* mesa state dependencies*/ - 0, /* state tracker dependencies */ + ST_NEW_VERTEX_PROGRAM, /* state tracker dependencies */ }, update_raster_state /* update function */ }; diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index e545e00eb2a..acaf1de8823 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -26,7 +26,7 @@ **************************************************************************/ #include "main/imports.h" -#if FEATURE_convolution +#if FEATURE_convolve #include "main/convolve.h" #endif #include "main/enums.h" @@ -409,7 +409,7 @@ st_TexImage(GLcontext * ctx, stImage->face = _mesa_tex_target_to_face(target); stImage->level = level; -#if FEATURE_convolution +#if FEATURE_convolve if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) { _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth, &postConvHeight); diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index f9016923dcd..61949a93884 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -526,7 +526,7 @@ st_draw_vbo(GLcontext *ctx, num_vbuffers = 1; num_velements = vp->num_inputs; if (num_velements == 0) - num_vbuffers = 0; + num_vbuffers = 0; } else { /*printf("Draw non-interleaved\n");*/ diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index fdb0c5a9a4d..a6ce26ffed8 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -143,29 +143,37 @@ static void vbo_exec_copy_to_current( struct vbo_exec_context *exec ) for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) { if (exec->vtx.attrsz[i]) { - GLfloat *current = (GLfloat *)vbo->currval[i].Ptr; - /* Note: the exec->vtx.current[i] pointers point into the * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays. */ - COPY_CLEAN_4V(current, - exec->vtx.attrsz[i], - exec->vtx.attrptr[i]); + GLfloat *current = (GLfloat *)vbo->currval[i].Ptr; + GLfloat tmp[4]; + + COPY_CLEAN_4V(tmp, + exec->vtx.attrsz[i], + exec->vtx.attrptr[i]); + + if (memcmp(current, tmp, sizeof(tmp)) != 0) + { + memcpy(current, tmp, sizeof(tmp)); - /* Given that we explicitly state size here, there is no need - * for the COPY_CLEAN above, could just copy 16 bytes and be - * done. The only problem is when Mesa accesses ctx->Current - * directly. - */ - vbo->currval[i].Size = exec->vtx.attrsz[i]; - - /* This triggers rather too much recalculation of Mesa state - * that doesn't get used (eg light positions). - */ - if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT && - i <= VBO_ATTRIB_MAT_BACK_INDEXES) - ctx->NewState |= _NEW_LIGHT; + /* Given that we explicitly state size here, there is no need + * for the COPY_CLEAN above, could just copy 16 bytes and be + * done. The only problem is when Mesa accesses ctx->Current + * directly. + */ + vbo->currval[i].Size = exec->vtx.attrsz[i]; + + /* This triggers rather too much recalculation of Mesa state + * that doesn't get used (eg light positions). + */ + if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT && + i <= VBO_ATTRIB_MAT_BACK_INDEXES) + ctx->NewState |= _NEW_LIGHT; + + ctx->NewState |= _NEW_CURRENT_ATTRIB; + } } } diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 0f9d8da3568..8871e10cf60 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -127,6 +127,7 @@ static void recalculate_input_bindings( GLcontext *ctx ) struct vbo_context *vbo = vbo_context(ctx); struct vbo_exec_context *exec = &vbo->exec; const struct gl_client_array **inputs = &exec->array.inputs[0]; + GLbitfield const_inputs = 0x0; GLuint i; exec->array.program_mode = get_program_mode(ctx); @@ -141,19 +142,24 @@ static void recalculate_input_bindings( GLcontext *ctx ) for (i = 0; i <= VERT_ATTRIB_TEX7; i++) { if (exec->array.legacy_array[i]->Enabled) inputs[i] = exec->array.legacy_array[i]; - else + else { inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } } for (i = 0; i < MAT_ATTRIB_MAX; i++) { inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); } /* Could use just about anything, just to fill in the empty * slots: */ - for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) + for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) { inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } break; case VP_NV: @@ -166,15 +172,19 @@ static void recalculate_input_bindings( GLcontext *ctx ) inputs[i] = exec->array.generic_array[i]; else if (exec->array.legacy_array[i]->Enabled) inputs[i] = exec->array.legacy_array[i]; - else + else { inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } } /* Could use just about anything, just to fill in the empty * slots: */ - for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) + for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) { inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0]; + const_inputs |= 1 << i; + } break; case VP_ARB: @@ -189,25 +199,34 @@ static void recalculate_input_bindings( GLcontext *ctx ) inputs[0] = exec->array.generic_array[0]; else if (exec->array.legacy_array[0]->Enabled) inputs[0] = exec->array.legacy_array[0]; - else + else { inputs[0] = &vbo->legacy_currval[0]; + const_inputs |= 1 << 0; + } for (i = 1; i <= VERT_ATTRIB_TEX7; i++) { if (exec->array.legacy_array[i]->Enabled) inputs[i] = exec->array.legacy_array[i]; - else + else { inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } } for (i = 0; i < 16; i++) { if (exec->array.generic_array[i]->Enabled) inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i]; - else + else { inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } + } break; } + + _mesa_set_varying_vp_inputs( ctx, ~const_inputs ); } static void bind_arrays( GLcontext *ctx ) @@ -257,6 +276,11 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) bind_arrays( ctx ); + /* Again... + */ + if (ctx->NewState) + _mesa_update_state( ctx ); + prim[0].begin = 1; prim[0].end = 1; prim[0].weak = 0; @@ -297,6 +321,9 @@ vbo_exec_DrawRangeElements(GLenum mode, bind_arrays( ctx ); + if (ctx->NewState) + _mesa_update_state( ctx ); + ib.count = count; ib.type = type; ib.obj = ctx->Array.ElementArrayBufferObj; diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 92356ba9773..5bf3d836db5 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -150,6 +150,7 @@ static void vbo_exec_bind_arrays( GLcontext *ctx ) GLubyte *data = exec->vtx.buffer_map; const GLuint *map; GLuint attr; + GLbitfield varying_inputs = 0x0; /* Install the default (ie Current) attributes first, then overlay * all active ones. @@ -211,8 +212,11 @@ static void vbo_exec_bind_arrays( GLcontext *ctx ) arrays[attr]._MaxElement = count; /* ??? */ data += exec->vtx.attrsz[src] * sizeof(GLfloat); + varying_inputs |= 1<<attr; } } + + _mesa_set_varying_vp_inputs( ctx, varying_inputs ); } @@ -242,6 +246,9 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec ) */ vbo_exec_bind_arrays( ctx ); + if (ctx->NewState) + _mesa_update_state( ctx ); + /* if using a real VBO, unmap it before drawing */ if (exec->vtx.bufferobj->Name) { ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj); diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index ed82f09958d..0488c5d7182 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -64,18 +64,26 @@ static void _playback_copy_to_current( GLcontext *ctx, for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) { if (node->attrsz[i]) { GLfloat *current = (GLfloat *)vbo->currval[i].Ptr; + GLfloat tmp[4]; - COPY_CLEAN_4V(current, - node->attrsz[i], - data); + COPY_CLEAN_4V(tmp, + node->attrsz[i], + data); + + if (memcmp(current, tmp, 4 * sizeof(GLfloat)) != 0) + { + memcpy(current, tmp, 4 * sizeof(GLfloat)); - vbo->currval[i].Size = node->attrsz[i]; + vbo->currval[i].Size = node->attrsz[i]; - data += node->attrsz[i]; + if (i >= VBO_ATTRIB_FIRST_MATERIAL && + i <= VBO_ATTRIB_LAST_MATERIAL) + ctx->NewState |= _NEW_LIGHT; + + ctx->NewState |= _NEW_CURRENT_ATTRIB; + } - if (i >= VBO_ATTRIB_FIRST_MATERIAL && - i <= VBO_ATTRIB_LAST_MATERIAL) - ctx->NewState |= _NEW_LIGHT; + data += node->attrsz[i]; } } @@ -110,6 +118,7 @@ static void vbo_bind_vertex_list( GLcontext *ctx, GLuint data = node->buffer_offset; const GLuint *map; GLuint attr; + GLbitfield varying_inputs = 0x0; /* Install the default (ie Current) attributes first, then overlay * all active ones. @@ -159,8 +168,11 @@ static void vbo_bind_vertex_list( GLcontext *ctx, assert(arrays[attr].BufferObj->Name); data += node->attrsz[src] * sizeof(GLfloat); + varying_inputs |= 1<<attr; } } + + _mesa_set_varying_vp_inputs( ctx, varying_inputs ); } static void vbo_save_loopback_vertex_list( GLcontext *ctx, @@ -229,6 +241,11 @@ void vbo_save_playback_vertex_list( GLcontext *ctx, void *data ) vbo_bind_vertex_list( ctx, node ); + /* Again... + */ + if (ctx->NewState) + _mesa_update_state( ctx ); + vbo_context(ctx)->draw_prims( ctx, save->inputs, node->prim, |