diff options
author | Dave Airlie <[email protected]> | 2009-03-20 10:52:17 +1000 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2009-03-20 10:52:17 +1000 |
commit | 407e8ae5b167b0193e1e5b1266a5d61ed836dfb5 (patch) | |
tree | 2d9d05a5c3122f41a13aa8bd9ae921c1176e6b0d /src/mesa/drivers/dri | |
parent | bdaa0341caffc353fd26bbd91865c2d86eed11c1 (diff) | |
parent | 114bb54324f22cb53bcd14607234d0acd74d37bd (diff) |
Merge remote branch 'main/master' into radeon-rewrite
Conflicts:
src/mesa/drivers/dri/r300/r300_cmdbuf.c
src/mesa/drivers/dri/r300/r300_state.c
src/mesa/drivers/dri/r300/r300_swtcl.c
src/mesa/drivers/dri/r300/radeon_ioctl.c
src/mesa/drivers/dri/radeon/radeon_screen.c
Diffstat (limited to 'src/mesa/drivers/dri')
110 files changed, 1960 insertions, 1306 deletions
diff --git a/src/mesa/drivers/dri/common/extension_helper.h b/src/mesa/drivers/dri/common/extension_helper.h index 0d641f25f7c..8dcaaee3079 100644 --- a/src/mesa/drivers/dri/common/extension_helper.h +++ b/src/mesa/drivers/dri/common/extension_helper.h @@ -2640,6 +2640,13 @@ static const char GetProgramStringARB_names[] = ""; #endif +#if defined(need_GL_ATI_envmap_bumpmap) +static const char TexBumpParameterfvATI_names[] = + "ip\0" /* Parameter signature */ + "glTexBumpParameterfvATI\0" + ""; +#endif + #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) static const char CompileShaderARB_names[] = "i\0" /* Parameter signature */ @@ -2980,6 +2987,13 @@ static const char GetVertexAttribdvARB_names[] = ""; #endif +#if defined(need_GL_ATI_envmap_bumpmap) +static const char TexBumpParameterivATI_names[] = + "ip\0" /* Parameter signature */ + "glTexBumpParameterivATI\0" + ""; +#endif + #if defined(need_GL_EXT_convolution) static const char GetSeparableFilter_names[] = "iiippp\0" /* Parameter signature */ @@ -3891,6 +3905,13 @@ static const char VertexAttrib4dARB_names[] = ""; #endif +#if defined(need_GL_ATI_envmap_bumpmap) +static const char GetTexBumpParameterfvATI_names[] = + "ip\0" /* Parameter signature */ + "glGetTexBumpParameterfvATI\0" + ""; +#endif + #if defined(need_GL_NV_fragment_program) static const char ProgramNamedParameter4dNV_names[] = "iipdddd\0" /* Parameter signature */ @@ -4834,6 +4855,13 @@ static const char VertexAttribs1svNV_names[] = ""; #endif +#if defined(need_GL_ATI_envmap_bumpmap) +static const char GetTexBumpParameterivATI_names[] = + "ip\0" /* Parameter signature */ + "glGetTexBumpParameterivATI\0" + ""; +#endif + #if defined(need_GL_EXT_coordinate_frame) static const char Binormal3bEXT_names[] = "iii\0" /* Parameter signature */ @@ -5198,6 +5226,16 @@ static const struct dri_extension_function GL_ATI_draw_buffers_functions[] = { }; #endif +#if defined(need_GL_ATI_envmap_bumpmap) +static const struct dri_extension_function GL_ATI_envmap_bumpmap_functions[] = { + { TexBumpParameterfvATI_names, TexBumpParameterfvATI_remap_index, -1 }, + { TexBumpParameterivATI_names, TexBumpParameterivATI_remap_index, -1 }, + { GetTexBumpParameterfvATI_names, GetTexBumpParameterfvATI_remap_index, -1 }, + { GetTexBumpParameterivATI_names, GetTexBumpParameterivATI_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + #if defined(need_GL_ATI_fragment_shader) static const struct dri_extension_function GL_ATI_fragment_shader_functions[] = { { ColorFragmentOp2ATI_names, ColorFragmentOp2ATI_remap_index, -1 }, diff --git a/src/mesa/drivers/dri/common/texmem.c b/src/mesa/drivers/dri/common/texmem.c index ff174a251d2..b64618a03c8 100644 --- a/src/mesa/drivers/dri/common/texmem.c +++ b/src/mesa/drivers/dri/common/texmem.c @@ -1063,31 +1063,31 @@ void driInitTextureObjects( GLcontext *ctx, driTextureObject * swapped, ctx->Texture.CurrentUnit = i; if ( (targets & DRI_TEXMGR_DO_TEXTURE_1D) != 0 ) { - texObj = ctx->Texture.Unit[i].Current1D; + texObj = ctx->Texture.Unit[i].CurrentTex[TEXTURE_1D_INDEX]; ctx->Driver.BindTexture( ctx, GL_TEXTURE_1D, texObj ); move_to_tail( swapped, (driTextureObject *) texObj->DriverData ); } if ( (targets & DRI_TEXMGR_DO_TEXTURE_2D) != 0 ) { - texObj = ctx->Texture.Unit[i].Current2D; + texObj = ctx->Texture.Unit[i].CurrentTex[TEXTURE_2D_INDEX]; ctx->Driver.BindTexture( ctx, GL_TEXTURE_2D, texObj ); move_to_tail( swapped, (driTextureObject *) texObj->DriverData ); } if ( (targets & DRI_TEXMGR_DO_TEXTURE_3D) != 0 ) { - texObj = ctx->Texture.Unit[i].Current3D; + texObj = ctx->Texture.Unit[i].CurrentTex[TEXTURE_3D_INDEX]; ctx->Driver.BindTexture( ctx, GL_TEXTURE_3D, texObj ); move_to_tail( swapped, (driTextureObject *) texObj->DriverData ); } if ( (targets & DRI_TEXMGR_DO_TEXTURE_CUBE) != 0 ) { - texObj = ctx->Texture.Unit[i].CurrentCubeMap; + texObj = ctx->Texture.Unit[i].CurrentTex[TEXTURE_CUBE_INDEX]; ctx->Driver.BindTexture( ctx, GL_TEXTURE_CUBE_MAP_ARB, texObj ); move_to_tail( swapped, (driTextureObject *) texObj->DriverData ); } if ( (targets & DRI_TEXMGR_DO_TEXTURE_RECT) != 0 ) { - texObj = ctx->Texture.Unit[i].CurrentRect; + texObj = ctx->Texture.Unit[i].CurrentTex[TEXTURE_RECT_INDEX]; ctx->Driver.BindTexture( ctx, GL_TEXTURE_RECTANGLE_NV, texObj ); move_to_tail( swapped, (driTextureObject *) texObj->DriverData ); } diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index 6b44ed9a673..c9acd81be74 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -32,19 +32,14 @@ #include <string.h> #include <stdlib.h> #include "main/mtypes.h" +#include "main/cpuinfo.h" #include "main/extensions.h" #include "glapi/dispatch.h" #include "utils.h" -int driDispatchRemapTable[ driDispatchRemapTable_size ]; -#if defined(USE_X86_ASM) -#include "x86/common_x86_asm.h" -#endif +int driDispatchRemapTable[ driDispatchRemapTable_size ]; -#if defined(USE_PPC_ASM) -#include "ppc/common_ppc_features.h" -#endif unsigned driParseDebugString( const char * debug, @@ -93,12 +88,8 @@ unsigned driGetRendererString( char * buffer, const char * hardware_name, const char * driver_date, GLuint agp_mode ) { -#define MAX_INFO 4 - const char * cpu[MAX_INFO]; - unsigned next = 0; - unsigned i; - unsigned offset; - + unsigned offset; + char *cpu; offset = sprintf( buffer, "Mesa DRI %s %s", hardware_name, driver_date ); @@ -118,59 +109,10 @@ driGetRendererString( char * buffer, const char * hardware_name, /* Append any CPU-specific information. */ -#ifdef USE_X86_ASM - if ( _mesa_x86_cpu_features ) { - cpu[next] = " x86"; - next++; - } -# ifdef USE_MMX_ASM - if ( cpu_has_mmx ) { - cpu[next] = (cpu_has_mmxext) ? "/MMX+" : "/MMX"; - next++; - } -# endif -# ifdef USE_3DNOW_ASM - if ( cpu_has_3dnow ) { - cpu[next] = (cpu_has_3dnowext) ? "/3DNow!+" : "/3DNow!"; - next++; - } -# endif -# ifdef USE_SSE_ASM - if ( cpu_has_xmm ) { - cpu[next] = (cpu_has_xmm2) ? "/SSE2" : "/SSE"; - next++; - } -# endif - -#elif defined(USE_SPARC_ASM) - - cpu[0] = " SPARC"; - next = 1; - -#elif defined(USE_PPC_ASM) - if ( _mesa_ppc_cpu_features ) { - cpu[next] = (cpu_has_64) ? " PowerPC 64" : " PowerPC"; - next++; - } - -# ifdef USE_VMX_ASM - if ( cpu_has_vmx ) { - cpu[next] = "/Altivec"; - next++; - } -# endif - - if ( ! cpu_has_fpu ) { - cpu[next] = "/No FPU"; - next++; - } -#endif - - for ( i = 0 ; i < next ; i++ ) { - const size_t len = strlen( cpu[i] ); - - strncpy( & buffer[ offset ], cpu[i], len ); - offset += len; + cpu = _mesa_get_cpu_string(); + if (cpu) { + offset += sprintf(buffer + offset, " %s", cpu); + _mesa_free(cpu); } return offset; diff --git a/src/mesa/drivers/dri/common/xmlpool/options.h b/src/mesa/drivers/dri/common/xmlpool/options.h index d5f4fc34917..d76595578c7 100644 --- a/src/mesa/drivers/dri/common/xmlpool/options.h +++ b/src/mesa/drivers/dri/common/xmlpool/options.h @@ -546,3 +546,23 @@ DRI_CONF_OPT_BEGIN(nv_vertex_program,bool,def) \ DRI_CONF_DESC(fr,"Activer l'extension GL_NV_vertex_program") \ DRI_CONF_DESC(sv,"Aktivera tillägget GL_NV_vertex_program") \ DRI_CONF_OPT_END + +#define DRI_CONF_ALWAYS_FLUSH_BATCH(def) \ +DRI_CONF_OPT_BEGIN(always_flush_batch,bool,def) \ + DRI_CONF_DESC(en,"Enable flushing batchbuffer after each draw call") \ + DRI_CONF_DESC(de,"Enable flushing batchbuffer after each draw call") \ + DRI_CONF_DESC(es,"Enable flushing batchbuffer after each draw call") \ + DRI_CONF_DESC(nl,"Enable flushing batchbuffer after each draw call") \ + DRI_CONF_DESC(fr,"Enable flushing batchbuffer after each draw call") \ + DRI_CONF_DESC(sv,"Enable flushing batchbuffer after each draw call") \ +DRI_CONF_OPT_END + +#define DRI_CONF_ALWAYS_FLUSH_CACHE(def) \ +DRI_CONF_OPT_BEGIN(always_flush_cache,bool,def) \ + DRI_CONF_DESC(en,"Enable flushing GPU caches with each draw call") \ + DRI_CONF_DESC(de,"Enable flushing GPU caches with each draw call") \ + DRI_CONF_DESC(es,"Enable flushing GPU caches with each draw call") \ + DRI_CONF_DESC(nl,"Enable flushing GPU caches with each draw call") \ + DRI_CONF_DESC(fr,"Enable flushing GPU caches with each draw call") \ + DRI_CONF_DESC(sv,"Enable flushing GPU caches with each draw call") \ +DRI_CONF_OPT_END diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h index 4df1916aad0..5fd6ec65bf8 100644 --- a/src/mesa/drivers/dri/common/xmlpool/t_options.h +++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h @@ -237,3 +237,13 @@ DRI_CONF_OPT_END DRI_CONF_OPT_BEGIN(nv_vertex_program,bool,def) \ DRI_CONF_DESC(en,gettext("Enable extension GL_NV_vertex_program")) \ DRI_CONF_OPT_END + +#define DRI_CONF_ALWAYS_FLUSH_BATCH(def) \ +DRI_CONF_OPT_BEGIN(always_flush_batch,bool,def) \ + DRI_CONF_DESC(en,gettext("Enable flushing batchbuffer after each draw call")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_ALWAYS_FLUSH_CACHE(def) \ +DRI_CONF_OPT_BEGIN(always_flush_cache,bool,def) \ + DRI_CONF_DESC(en,gettext("Enable flushing GPU caches with each draw call")) \ +DRI_CONF_OPT_END diff --git a/src/mesa/drivers/dri/fb/fb_dri.c b/src/mesa/drivers/dri/fb/fb_dri.c index f1194d7ce82..571b8922d5d 100644 --- a/src/mesa/drivers/dri/fb/fb_dri.c +++ b/src/mesa/drivers/dri/fb/fb_dri.c @@ -480,7 +480,7 @@ fbCreateBuffer( __DRIscreenPrivate *driScrnPriv, static void fbDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } diff --git a/src/mesa/drivers/dri/ffb/ffb_state.c b/src/mesa/drivers/dri/ffb/ffb_state.c index ee0fe4e0dbe..5eb8f417ffb 100644 --- a/src/mesa/drivers/dri/ffb/ffb_state.c +++ b/src/mesa/drivers/dri/ffb/ffb_state.c @@ -275,7 +275,7 @@ ffbDDStencilFuncSeparate(GLcontext *ctx, GLenum face, GLenum func, /* We will properly update sw/hw state when stenciling is * enabled. */ - if (! ctx->Stencil.Enabled) + if (! ctx->Stencil._Enabled) return; stencilctl = fmesa->stencilctl; @@ -333,7 +333,7 @@ ffbDDStencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail, /* We will properly update sw/hw state when stenciling is * enabled. */ - if (! ctx->Stencil.Enabled) + if (! ctx->Stencil._Enabled) return; stencilctl = fmesa->stencilctl; diff --git a/src/mesa/drivers/dri/ffb/ffb_xmesa.c b/src/mesa/drivers/dri/ffb/ffb_xmesa.c index 00bdcec7a69..3b9f5c67591 100644 --- a/src/mesa/drivers/dri/ffb/ffb_xmesa.c +++ b/src/mesa/drivers/dri/ffb/ffb_xmesa.c @@ -394,7 +394,7 @@ ffbCreateBuffer(__DRIscreenPrivate *driScrnPriv, static void ffbDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } diff --git a/src/mesa/drivers/dri/gamma/gamma_tex.c b/src/mesa/drivers/dri/gamma/gamma_tex.c index 2ffb790f28a..ca33c1740ff 100644 --- a/src/mesa/drivers/dri/gamma/gamma_tex.c +++ b/src/mesa/drivers/dri/gamma/gamma_tex.c @@ -400,19 +400,19 @@ void gammaInitTextureObjects( GLcontext *ctx ) ctx->Texture.CurrentUnit = 0; - texObj = ctx->Texture.Unit[0].Current1D; + texObj = ctx->Texture.Unit[0].CurrentTex[TEXTURE_1D_INDEX]; gammaBindTexture( ctx, GL_TEXTURE_1D, texObj ); - texObj = ctx->Texture.Unit[0].Current2D; + texObj = ctx->Texture.Unit[0].CurrentTex[TEXTURE_2D_INDEX]; gammaBindTexture( ctx, GL_TEXTURE_2D, texObj ); #if 0 ctx->Texture.CurrentUnit = 1; - texObj = ctx->Texture.Unit[1].Current1D; + texObj = ctx->Texture.Unit[1].CurrentTex[TEXTURE_1D_INDEX]; gammaBindTexture( ctx, GL_TEXTURE_1D, texObj ); - texObj = ctx->Texture.Unit[1].Current2D; + texObj = ctx->Texture.Unit[1].CurrentTex[TEXTURE_2D_INDEX]; gammaBindTexture( ctx, GL_TEXTURE_2D, texObj ); #endif diff --git a/src/mesa/drivers/dri/gamma/gamma_xmesa.c b/src/mesa/drivers/dri/gamma/gamma_xmesa.c index 2a28902e1e9..7b5b53589c6 100644 --- a/src/mesa/drivers/dri/gamma/gamma_xmesa.c +++ b/src/mesa/drivers/dri/gamma/gamma_xmesa.c @@ -96,7 +96,7 @@ gammaCreateBuffer( __DRIscreenPrivate *driScrnPriv, static void gammaDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } static void diff --git a/src/mesa/drivers/dri/i810/i810screen.c b/src/mesa/drivers/dri/i810/i810screen.c index 9a5a39c1dbc..6e49f3466c3 100644 --- a/src/mesa/drivers/dri/i810/i810screen.c +++ b/src/mesa/drivers/dri/i810/i810screen.c @@ -341,7 +341,7 @@ i810CreateBuffer( __DRIscreenPrivate *driScrnPriv, static void i810DestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } const struct __DriverAPIRec driDriverAPI = { diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index fdce8af31f4..9c540cb2bb7 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -73,6 +73,8 @@ i830CreateContext(const __GLcontextModes * mesaVis, return GL_FALSE; } + _math_matrix_ctr(&intel->ViewportMatrix); + /* Initialize swrast, tnl driver tables: */ intelInitSpanFuncs(ctx); intelInitTriFuncs(ctx); diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 8fc8aa5f900..1a949210789 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -422,10 +422,10 @@ i830_emit_state(struct intel_context *intel) struct i830_hw_state *state = i830->current; int i, count; GLuint dirty; - GET_CURRENT_CONTEXT(ctx); - BATCH_LOCALS; dri_bo *aper_array[3 + I830_TEX_UNITS]; int aper_count; + GET_CURRENT_CONTEXT(ctx); + BATCH_LOCALS; /* We don't hold the lock at this point, so want to make sure that * there won't be a buffer wrap between the state emits and the primitive diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 6e2d41e19ab..7549029a1be 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -119,6 +119,8 @@ i915CreateContext(const __GLcontextModes * mesaVis, return GL_FALSE; } + _math_matrix_ctr(&intel->ViewportMatrix); + /* Initialize swrast, tnl driver tables: */ intelInitSpanFuncs(ctx); intelInitTriFuncs(ctx); diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index f091d600c35..52f09a4b1b2 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -180,9 +180,9 @@ get_result_vector(struct i915_fragment_program *p, switch (inst->DstReg.File) { case PROGRAM_OUTPUT: switch (inst->DstReg.Index) { - case FRAG_RESULT_COLR: + case FRAG_RESULT_COLOR: return UREG(REG_TYPE_OC, 0); - case FRAG_RESULT_DEPR: + case FRAG_RESULT_DEPTH: p->depth_written = 1; return UREG(REG_TYPE_OD, 0); default: diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index e80996580c7..1d39278cbf0 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -89,8 +89,8 @@ intel_flush_inline_primitive(struct intel_context *intel) static void intel_start_inline(struct intel_context *intel, uint32_t prim) { - BATCH_LOCALS; uint32_t batch_flags = LOOP_CLIPRECTS; + BATCH_LOCALS; intel->vtbl.emit_state(intel); @@ -201,10 +201,10 @@ uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count) /** Dispatches the accumulated primitive to the batchbuffer. */ void intel_flush_prim(struct intel_context *intel) { - BATCH_LOCALS; dri_bo *aper_array[2]; dri_bo *vb_bo; unsigned int offset, count; + BATCH_LOCALS; /* Must be called after an intel_start_prim. */ assert(intel->prim.primitive != ~0); @@ -989,7 +989,7 @@ intelChooseRenderState(GLcontext * ctx) intel->draw_tri = intel_fallback_tri; if (flags & DD_TRI_SMOOTH) { - if (intel->strict_conformance) + if (intel->conformance_mode > 0) intel->draw_tri = intel_fallback_tri; } @@ -1001,7 +1001,7 @@ intelChooseRenderState(GLcontext * ctx) } if (flags & DD_POINT_SMOOTH) { - if (intel->strict_conformance) + if (intel->conformance_mode > 0) intel->draw_point = intel_fallback_point; } diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 82370162f59..c724218cf56 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -88,7 +88,7 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) memset(key, 0, sizeof(*key)); - key->stencil = ctx->Stencil.Enabled; + key->stencil = ctx->Stencil._Enabled; key->stencil_two_side = ctx->Stencil._TestTwoSide; if (key->stencil) { diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index c45d48dff8e..d830e49e50a 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -181,34 +181,54 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); is_negative = brw_IF(p, BRW_EXECUTE_1); { - brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); - brw_MOV(p, c->reg.t1, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /* + * Both can be negative on GM965/G965 due to RHW workaround + * if so, this object should be rejected. + */ + if (!BRW_IS_G4X(p->brw)) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_neg2); + } + + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); } is_negative = brw_ELSE(p, is_negative); { - /* Coming back in. We know that both cannot be negative - * because the line would have been culled in that case. - */ + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + + /* If both are positive, do nothing */ + /* Only on GM965/G965 */ + if (!BRW_IS_G4X(p->brw)) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + } - /* If both are positive, do nothing */ - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); - is_neg2 = brw_IF(p, BRW_EXECUTE_1); { - brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); - brw_MOV(p, c->reg.t0, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - brw_ENDIF(p, is_neg2); - } + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + if (!BRW_IS_G4X(p->brw)) { + brw_ENDIF(p, is_neg2); + } + } brw_ENDIF(p, is_negative); } brw_ENDIF(p, plane_active); diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index 1dbba37fe7e..7fd37bd05ff 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -455,6 +455,8 @@ static void brw_clip_test( struct brw_clip_compile *c ) struct brw_indirect vt2 = brw_indirect(2, 0); struct brw_compile *p = &c->func; + struct brw_instruction *is_outside; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); @@ -462,53 +464,87 @@ static void brw_clip_test( struct brw_clip_compile *c ) brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); /* test nearz, xmin, ymin plane */ - brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); + /* clip.xyz < -clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 0), brw_imm_ud(0)); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 1), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 2), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); /* test farz, xmax, ymax plane */ - brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); + /* clip.xyz > clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); + brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); + brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 0), brw_imm_ud(0)); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 1), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 2), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index eaac6224f6e..d96ff293102 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -121,6 +121,9 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /* if conformance mode is set, swrast can handle any size AA point */ ctx->Const.MaxPointSizeAA = 255.0; + /* We want the GLSL compiler to emit code that uses condition codes */ + ctx->Shader.EmitCondCodes = GL_TRUE; + /* ctx->Const.MaxNativeVertexProgramTemps = 32; */ brw_init_state( brw ); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index df90c2027f3..48ed4325bef 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -46,7 +46,7 @@ * * CURBE - constant URB entry. An urb region (entry) used to hold * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. + * preload into the GRF when spawning a thread. * * VUE - vertex URB entry. An urb entry holding a vertex and usually * a vertex header. The header contains control information and @@ -63,7 +63,7 @@ * special and may be overwritten. * * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous + * by sending messages. Message parameters are placed in contiguous * MRF registers. All program output is via these messages. URB * entries are populated by sending a message to the shared URB * function containing the new data, together with a control word, @@ -154,21 +154,22 @@ struct brw_state_flags { GLuint cache; }; + +/** Subclass of Mesa vertex program */ struct brw_vertex_program { struct gl_vertex_program program; GLuint id; }; - +/** Subclass of Mesa fragment program */ struct brw_fragment_program { struct gl_fragment_program program; - GLuint id; + GLuint id; /**< serial no. to identify frag progs, never re-used */ + GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ }; - - /* Data about a particular attempt to compile a program. Note that * there can be many of these, each in a different GL state * corresponding to a different brw_wm_prog_key struct, with different @@ -418,8 +419,8 @@ struct brw_context struct brw_tracked_state **atoms; GLuint nr_atoms; - GLuint nr_draw_regions; - struct intel_region *draw_regions[MAX_DRAW_BUFFERS]; + GLuint nr_color_regions; + struct intel_region *color_regions[MAX_DRAW_BUFFERS]; struct intel_region *depth_region; /** @@ -627,8 +628,6 @@ struct brw_context * brw_vtbl.c */ void brwInitVtbl( struct brw_context *brw ); -void brw_do_flush( struct brw_context *brw, - GLuint flags ); /*====================================================================== * brw_context.c @@ -670,7 +669,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ); */ void brw_upload_urb_fence(struct brw_context *brw); -void brw_upload_constant_buffer_state(struct brw_context *brw); +/* brw_curbe.c + */ +void brw_upload_cs_urb_state(struct brw_context *brw); /*====================================================================== @@ -683,6 +684,32 @@ brw_context( GLcontext *ctx ) return (struct brw_context *)ctx; } +static INLINE struct brw_vertex_program * +brw_vertex_program(struct gl_vertex_program *p) +{ + return (struct brw_vertex_program *) p; +} + +static INLINE const struct brw_vertex_program * +brw_vertex_program_const(const struct gl_vertex_program *p) +{ + return (const struct brw_vertex_program *) p; +} + +static INLINE struct brw_fragment_program * +brw_fragment_program(struct gl_fragment_program *p) +{ + return (struct brw_fragment_program *) p; +} + +static INLINE const struct brw_fragment_program * +brw_fragment_program_const(const struct gl_fragment_program *p) +{ + return (const struct brw_fragment_program *) p; +} + + + #define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) #endif diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 4eaaa5f871b..545dedd34ba 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -53,7 +53,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; + const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16; GLuint nr_clip_regs = 0; GLuint total_regs; @@ -138,24 +138,24 @@ const struct brw_tracked_state brw_curbe_offsets = { * fixed-function hardware in a double-buffering scheme to avoid a * pipeline stall each time the contents of the curbe is changed. */ -void brw_upload_constant_buffer_state(struct brw_context *brw) +void brw_upload_cs_urb_state(struct brw_context *brw) { - struct brw_constant_buffer_state cbs; - memset(&cbs, 0, sizeof(cbs)); + struct brw_cs_urb_state cs_urb; + memset(&cs_urb, 0, sizeof(cs_urb)); /* It appears that this is the state packet for the CS unit, ie. the * urb entries detailed here are housed in the CS range from the * URB_FENCE command. */ - cbs.header.opcode = CMD_CONST_BUFFER_STATE; - cbs.header.length = sizeof(cbs)/4 - 2; + cs_urb.header.opcode = CMD_CS_URB_STATE; + cs_urb.header.length = sizeof(cs_urb)/4 - 2; /* BRW_NEW_URB_FENCE */ - cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; - cbs.bits0.urb_entry_size = brw->urb.csize - 1; + cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cs_urb.bits0.urb_entry_size = brw->urb.csize - 1; assert(brw->urb.nr_cs_entries); - BRW_CACHED_BATCH_STRUCT(brw, &cbs); + BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); } static GLfloat fixed_plane[6][4] = { @@ -174,10 +174,12 @@ static GLfloat fixed_plane[6][4] = { static void prepare_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - GLuint sz = brw->curbe.total_size; - GLuint bufsz = sz * 16 * sizeof(GLfloat); + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + const GLuint sz = brw->curbe.total_size; + const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; @@ -189,27 +191,25 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; if (sz == 0) { - if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } - return; } - buf = (GLfloat *)malloc(bufsz); - - memset(buf, 0, bufsz); + buf = (GLfloat *) _mesa_calloc(bufsz); + /* fragment shader constants */ if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) - buf[offset + i] = brw->wm.prog_data->param[i][0]; + buf[offset + i] = *brw->wm.prog_data->param[i]; } @@ -244,7 +244,7 @@ static void prepare_constant_buffer(struct brw_context *brw) } } - + /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; GLuint nr = vp->program.Base.Parameters->NumParameters; @@ -252,10 +252,11 @@ static void prepare_constant_buffer(struct brw_context *brw) _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); for (i = 0; i < nr; i++) { - buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0]; - buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1]; - buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2]; - buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3]; + const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; + buf[offset + i * 4 + 0] = value[0]; + buf[offset + i * 4 + 1] = value[1]; + buf[offset + i * 4 + 2] = value[2]; + buf[offset + i * 4 + 3] = value[3]; } } @@ -274,11 +275,14 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { - free(buf); + /* constants have not changed */ + _mesa_free(buf); } else { + /* constants have changed */ if (brw->curbe.last_buf) - free(brw->curbe.last_buf); + _mesa_free(brw->curbe.last_buf); + brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 39c32255f8b..590b064c7ef 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -734,7 +734,7 @@ #define CMD_URB_FENCE 0x6000 -#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CS_URB_STATE 0x6001 #define CMD_CONST_BUFFER 0x6002 #define CMD_STATE_BASE_ADDRESS 0x6101 diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 99fd587e9fc..5342622a737 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -127,6 +127,7 @@ static void brw_emit_prim(struct brw_context *brw, uint32_t hw_prim) { struct brw_3d_primitive prim_packet; + struct intel_context *intel = &brw->intel; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), @@ -146,10 +147,27 @@ static void brw_emit_prim(struct brw_context *brw, /* Can't wrap here, since we rely on the validated state. */ brw->no_batch_wrap = GL_TRUE; + + /* If we're set to always flush, do it before and after the primitive emit. + * We want to catch both missed flushes that hurt instruction/state cache + * and missed flushes of the render cache as it heads to other parts of + * the besides the draw code. + */ + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } + brw->no_batch_wrap = GL_FALSE; } @@ -194,9 +212,16 @@ static GLboolean check_fallbacks( struct brw_context *brw, GLcontext *ctx = &brw->intel.ctx; GLuint i; - if (!brw->intel.strict_conformance) + /* If we don't require strict OpenGL conformance, never + * use fallbacks. If we're forcing fallbacks, always + * use fallfacks. + */ + if (brw->intel.conformance_mode == 0) return GL_FALSE; + if (brw->intel.conformance_mode == 2) + return GL_TRUE; + if (ctx->Polygon.SmoothFlag) { for (i = 0; i < nr_prims; i++) if (reduced_prim[prim[i].mode] == GL_TRIANGLES) @@ -220,7 +245,7 @@ static GLboolean check_fallbacks( struct brw_context *brw, /* GS doesn't get enough information to know when to reset * the stipple counter?!? */ - if (prim[i].mode == GL_LINE_LOOP) + if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) return GL_TRUE; if (prim[i].mode == GL_POLYGON && @@ -230,13 +255,46 @@ static GLboolean check_fallbacks( struct brw_context *brw, } } - if (ctx->Point.SmoothFlag) { for (i = 0; i < nr_prims; i++) if (prim[i].mode == GL_POINTS) return GL_TRUE; } + + /* BRW hardware doesn't handle GL_CLAMP texturing correctly; + * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP + * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and + * we want strict conformance, force the fallback. + * Right now, we only do this for 2D textures. + */ + { + int u; + for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; + if (texUnit->Enabled) { + if (texUnit->Enabled & TEXTURE_1D_BIT) { + if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_2D_BIT) { + if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_3D_BIT) { + if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { + return GL_TRUE; + } + } + } + } + } + /* Nothing stopping us from the fast path now */ return GL_FALSE; } @@ -261,11 +319,18 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (ctx->NewState) _mesa_update_state( ctx ); + /* We have to validate the textures *before* checking for fallbacks; + * otherwise, the software fallback won't be able to rely on the + * texture state, the firstLevel and lastLevel fields won't be + * set in the intel texture object (they'll both be 0), and the + * software fallback will segfault if it attempts to access any + * texture level other than level 0. + */ + brw_validate_textures( brw ); + if (check_fallbacks(brw, prim, nr_prims)) return GL_FALSE; - brw_validate_textures( brw ); - /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); @@ -346,6 +411,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, retval = GL_TRUE; } + if (intel->always_flush_batch) + intel_batchbuffer_flush(intel->batch); out: UNLOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index b3ae4eef334..c53efba5991 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -129,3 +129,126 @@ const GLuint *brw_get_program( struct brw_compile *p, return (const GLuint *)p->store; } + + +/** + * Subroutine calls require special attention. + * Mesa instructions may be expanded into multiple hardware instructions + * so the prog_instruction::BranchTarget field can't be used as an index + * into the hardware instructions. + * + * The BranchTarget field isn't needed, however. Mesa's GLSL compiler + * emits CAL and BGNSUB instructions with labels that can be used to map + * subroutine calls to actual subroutine code blocks. + * + * The structures and function here implement patching of CAL instructions + * so they jump to the right subroutine code... + */ + + +/** + * For each OPCODE_BGNSUB we create one of these. + */ +struct brw_glsl_label +{ + const char *name; /**< the label string */ + GLuint position; /**< the position of the brw instruction for this label */ + struct brw_glsl_label *next; /**< next in linked list */ +}; + + +/** + * For each OPCODE_CAL we create one of these. + */ +struct brw_glsl_call +{ + GLuint call_inst_pos; /**< location of the CAL instruction */ + const char *sub_name; /**< name of subroutine to call */ + struct brw_glsl_call *next; /**< next in linked list */ +}; + + +/** + * Called for each OPCODE_BGNSUB. + */ +void +brw_save_label(struct brw_compile *c, const char *name, GLuint position) +{ + struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label); + label->name = name; + label->position = position; + label->next = c->first_label; + c->first_label = label; +} + + +/** + * Called for each OPCODE_CAL. + */ +void +brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos) +{ + struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call); + call->call_inst_pos = call_pos; + call->sub_name = name; + call->next = c->first_call; + c->first_call = call; +} + + +/** + * Lookup a label, return label's position/offset. + */ +static GLuint +brw_lookup_label(struct brw_compile *c, const char *name) +{ + const struct brw_glsl_label *label; + for (label = c->first_label; label; label = label->next) { + if (strcmp(name, label->name) == 0) { + return label->position; + } + } + abort(); /* should never happen */ + return ~0; +} + + +/** + * When we're done generating code, this function is called to resolve + * subroutine calls. + */ +void +brw_resolve_cals(struct brw_compile *c) +{ + const struct brw_glsl_call *call; + + for (call = c->first_call; call; call = call->next) { + const GLuint sub_loc = brw_lookup_label(c, call->sub_name); + struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos]; + struct brw_instruction *brw_sub_inst = &c->store[sub_loc]; + GLint offset = brw_sub_inst - brw_call_inst; + + /* patch brw_inst1 to point to brw_inst2 */ + brw_set_src1(brw_call_inst, brw_imm_d(offset * 16)); + } + + /* free linked list of calls */ + { + struct brw_glsl_call *call, *next; + for (call = c->first_call; call; call = next) { + next = call->next; + _mesa_free(call); + } + c->first_call = NULL; + } + + /* free linked list of labels */ + { + struct brw_glsl_label *label, *next; + for (label = c->first_label; label; label = next) { + next = label->next; + _mesa_free(label); + } + c->first_label = NULL; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 9e2b39af9bb..eb99c21711e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -91,8 +91,13 @@ struct brw_indirect { }; +struct brw_glsl_label; +struct brw_glsl_call; + + + #define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1200 +#define BRW_EU_MAX_INSN 4000 struct brw_compile { struct brw_instruction store[BRW_EU_MAX_INSN]; @@ -106,9 +111,22 @@ struct brw_compile { GLuint flag_value; GLboolean single_program_flow; struct brw_context *brw; + + struct brw_glsl_label *first_label; /**< linked list of labels */ + struct brw_glsl_call *first_call; /**< linked list of CALs */ }; +void +brw_save_label(struct brw_compile *c, const char *name, GLuint position); + +void +brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos); + +void +brw_resolve_cals(struct brw_compile *c); + + static INLINE int type_sz( GLuint type ) { @@ -152,6 +170,13 @@ static INLINE struct brw_reg brw_reg( GLuint file, GLuint writemask ) { struct brw_reg reg; + if (type == BRW_GENERAL_REGISTER_FILE) + assert(nr < 128); + else if (type == BRW_MESSAGE_REGISTER_FILE) + assert(nr < 9); + else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + reg.type = type; reg.file = file; reg.nr = nr; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 4e099b5945c..6dce1ca48e8 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -55,6 +55,9 @@ static void guess_execution_size( struct brw_instruction *insn, static void brw_set_dest( struct brw_instruction *insn, struct brw_reg dest ) { + if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(dest.nr < 128); + insn->bits1.da1.dest_reg_file = dest.file; insn->bits1.da1.dest_reg_type = dest.type; insn->bits1.da1.dest_address_mode = dest.address_mode; @@ -96,10 +99,13 @@ static void brw_set_dest( struct brw_instruction *insn, } static void brw_set_src0( struct brw_instruction *insn, - struct brw_reg reg ) + struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + insn->bits1.da1.src0_reg_file = reg.file; insn->bits1.da1.src0_reg_type = reg.type; insn->bits2.da1.src0_abs = reg.abs; @@ -169,10 +175,12 @@ static void brw_set_src0( struct brw_instruction *insn, void brw_set_src1( struct brw_instruction *insn, - struct brw_reg reg ) + struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + assert(reg.nr < 128); + insn->bits1.da1.src1_reg_file = reg.file; insn->bits1.da1.src1_reg_type = reg.type; insn->bits3.da1.src1_abs = reg.abs; @@ -323,13 +331,13 @@ static void brw_set_dp_read_message( struct brw_instruction *insn, } static void brw_set_sampler_message(struct brw_context *brw, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint sampler, - GLuint msg_type, - GLuint response_length, - GLuint msg_length, - GLboolean eot) + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint sampler, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot) { brw_set_src1(insn, brw_imm_d(0)); @@ -407,7 +415,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p, * Convenience routines. */ #define ALU1(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg dest, \ struct brw_reg src0) \ { \ @@ -415,7 +423,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ } #define ALU2(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg dest, \ struct brw_reg src0, \ struct brw_reg src1) \ @@ -469,9 +477,9 @@ void brw_NOP(struct brw_compile *p) */ struct brw_instruction *brw_JMPI(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) { struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); @@ -674,7 +682,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *do_insn) + struct brw_instruction *do_insn) { struct brw_instruction *insn; @@ -931,13 +939,13 @@ void brw_dp_READ_16( struct brw_compile *p, void brw_fb_WRITE(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - GLuint binding_table_index, - GLuint msg_length, - GLuint response_length, - GLboolean eot) + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); @@ -973,8 +981,8 @@ void brw_SAMPLE(struct brw_compile *p, { GLboolean need_stall = 0; - if(writemask == 0) { -/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + if (writemask == 0) { + /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } @@ -1006,7 +1014,7 @@ void brw_SAMPLE(struct brw_compile *p, if (newmask != writemask) { need_stall = 1; -/* _mesa_printf("need stall %x %x\n", newmask , writemask); */ + /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); @@ -1047,8 +1055,7 @@ void brw_SAMPLE(struct brw_compile *p, eot); } - if (need_stall) - { + if (need_stall) { struct brw_reg reg = vec8(offset(dest, response_length-1)); /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index e63098fdd4a..299357409ce 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -75,8 +75,8 @@ static GLboolean do_check_fallback(struct brw_context *brw) /* _NEW_STENCIL */ - if (ctx->Stencil.Enabled && - !brw->intel.hw_stencil) { + if (ctx->Stencil._Enabled && + (ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) { DBG("FALLBACK: stencil\n"); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 9dcdad7b4ef..5c94a49f60a 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -178,7 +178,7 @@ static void upload_psp_urb_cbs(struct brw_context *brw ) { upload_pipelined_state_pointers(brw); brw_upload_urb_fence(brw); - brw_upload_constant_buffer_state(brw); + brw_upload_cs_urb_state(brw); } const struct brw_tracked_state brw_psp_urb_cbs = { @@ -290,8 +290,21 @@ static void upload_polygon_stipple(struct brw_context *brw) bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; bps.header.length = sizeof(bps)/4-2; - for (i = 0; i < 32; i++) - bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */ + /* Polygon stipple is provided in OpenGL order, i.e. bottom + * row first. If we're rendering to a window (i.e. the + * default frame buffer object, 0), then we need to invert + * it to match our pixel layout. But if we're rendering + * to a FBO (i.e. any named frame buffer object), we *don't* + * need to invert - we already match the layout. + */ + if (ctx->DrawBuffer->Name == 0) { + for (i = 0; i < 32; i++) + bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */ + } + else { + for (i = 0; i < 32; i++) + bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */ + } BRW_CACHED_BATCH_STRUCT(brw, &bps); } @@ -319,8 +332,22 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; - bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; - bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0), + * we have to invert the Y axis in order to match the OpenGL + * pixel coordinate system, and our offset must be matched + * to the window position. If we're drawing to a FBO + * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate + * system works just fine, and there's no window system to + * worry about. + */ + if (brw->intel.ctx.DrawBuffer->Name == 0) { + bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; + bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + } + else { + bpso.bits0.y_offset = 0; + bpso.bits0.x_offset = 0; + } BRW_CACHED_BATCH_STRUCT(brw, &bpso); } diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 0c86911044b..d90bd820386 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -38,6 +38,7 @@ #include "brw_context.h" #include "brw_util.h" +#include "brw_wm.h" static void brwBindProgram( GLcontext *ctx, GLenum target, @@ -94,7 +95,6 @@ static struct gl_program *brwNewProgram( GLcontext *ctx, static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { - _mesa_delete_program( ctx, prog ); } @@ -110,30 +110,35 @@ static void brwProgramStringNotify( GLcontext *ctx, GLenum target, struct gl_program *prog ) { + struct brw_context *brw = brw_context(ctx); if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_context *brw = brw_context(ctx); - struct brw_fragment_program *p = (struct brw_fragment_program *)prog; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + struct brw_fragment_program *newFP = brw_fragment_program(fprog); + const struct brw_fragment_program *curFP = + brw_fragment_program_const(brw->fragment_program); + if (fprog->FogOption) { _mesa_append_fog_code(ctx, fprog); fprog->FogOption = GL_NONE; } - if (p == fp) + if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - p->id = brw->program_id++; + newFP->id = brw->program_id++; + newFP->isGLSL = brw_wm_is_glsl(fprog); } else if (target == GL_VERTEX_PROGRAM_ARB) { - struct brw_context *brw = brw_context(ctx); - struct brw_vertex_program *p = (struct brw_vertex_program *)prog; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - if (p == vp) + struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; + struct brw_vertex_program *newVP = brw_vertex_program(vprog); + const struct brw_vertex_program *curVP = + brw_vertex_program_const(brw->vertex_program); + + if (newVP == curVP) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - if (p->program.IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, &p->program); + if (newVP->program.IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, &newVP->program); } - p->id = brw->program_id++; + newVP->id = brw->program_id++; /* Also tell tnl about it: */ diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 8c1711538af..c3c85978f4d 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -167,8 +167,14 @@ static void upload_sf_prog(struct brw_context *brw) key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_POLYGON */ - if (key.do_twoside_color) - key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW); + if (key.do_twoside_color) { + /* If we're rendering to a FBO, we have to invert the polygon + * face orientation, just as we invert the viewport in + * sf_unit_create_from_key(). ctx->DrawBuffer->Name will be + * nonzero if we're rendering to such an FBO. + */ + key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0); + } dri_bo_unreference(brw->sf.prog_bo); brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index e96d5354b30..93a9686f718 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -44,6 +44,7 @@ static void upload_sf_vp(struct brw_context *brw) struct brw_sf_viewport sfv; GLfloat y_scale, y_bias; const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + const GLfloat *v = ctx->Viewport._WindowMap.m; memset(&sfv, 0, sizeof(sfv)); @@ -58,8 +59,6 @@ static void upload_sf_vp(struct brw_context *brw) /* _NEW_VIEWPORT */ - const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv.viewport.m00 = v[MAT_SX]; sfv.viewport.m11 = v[MAT_SY] * y_scale; sfv.viewport.m22 = v[MAT_SZ] * depth_scale; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index df839c5b300..81b0a45998f 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -52,7 +52,6 @@ const struct brw_tracked_state brw_cc_vp; const struct brw_tracked_state brw_check_fallback; const struct brw_tracked_state brw_clip_prog; const struct brw_tracked_state brw_clip_unit; -const struct brw_tracked_state brw_constant_buffer_state; const struct brw_tracked_state brw_constant_buffer; const struct brw_tracked_state brw_curbe_offsets; const struct brw_tracked_state brw_invarient_state; diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index dc87859f3f5..811940edc05 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw ) { clear_batch_cache(brw); -/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ - brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index b28c57c2bcf..5d332d010c2 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -162,6 +162,14 @@ static void brw_debug_prog(const char *name, dri_bo *prog) fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", name, (unsigned int)prog->offset + i * 4 * 4, data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + /* Stop at the end of the program. It'd be nice to keep track of the actual + * intended program size instead of guessing like this. + */ + if (data[i * 4 + 0] == 0 && + data[i * 4 + 1] == 0 && + data[i * 4 + 2] == 0 && + data[i * 4 + 3] == 0) + break; } dri_bo_unmap(prog); diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index d97ff27f0a1..89e29812034 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -439,7 +439,7 @@ struct brw_urb_fence } bits1; }; -struct brw_constant_buffer_state /* previously brw_command_streamer */ +struct brw_cs_urb_state { struct header header; diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 9977677fd74..d29eb17f8cf 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -170,8 +170,8 @@ static void calc_wm_input_sizes( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = - (struct brw_vertex_program *)brw->vertex_program; + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 24b7dc30fe1..3807dff9919 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -156,6 +156,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; c->prog_data.total_grf = reg; + + if (INTEL_DEBUG & DEBUG_VS) { + _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); + _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + _mesa_printf("%s reg = %d\n", __FUNCTION__, reg); + } } @@ -658,7 +664,7 @@ static void emit_nrm( struct brw_vs_compile *c, /* TODO: relative addressing! */ static struct brw_reg get_reg( struct brw_vs_compile *c, - GLuint file, + gl_register_file file, GLuint index ) { @@ -954,36 +960,27 @@ static void emit_vertex_write( struct brw_vs_compile *c) } +/** + * Called after code generation to resolve subroutine calls and the + * END instruction. + * \param end_inst points to brw code for END instruction + * \param last_inst points to last instruction emitted before vertex write + */ static void -post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) +post_vs_emit( struct brw_vs_compile *c, + struct brw_instruction *end_inst, + struct brw_instruction *last_inst ) { - GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn, target_insn; - struct prog_instruction *inst1, *inst2; - struct brw_instruction *brw_inst1, *brw_inst2; - int offset; - for (insn = 0; insn < nr_insns; insn++) { - inst1 = &c->vp->program.Base.Instructions[insn]; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case OPCODE_CAL: - case OPCODE_BRA: - target_insn = inst1->BranchTarget; - inst2 = &c->vp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - case OPCODE_END: - offset = end_inst - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } - } + GLint offset; + + brw_resolve_cals(&c->func); + + /* patch up the END code to jump past subroutines, etc */ + offset = last_inst - end_inst; + brw_set_src1(end_inst, brw_imm_d(offset * 16)); } + /* Emit the fragment program instructions here. */ void brw_vs_emit(struct brw_vs_compile *c ) @@ -992,7 +989,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) struct brw_compile *p = &c->func; GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn, if_insn = 0; - struct brw_instruction *end_inst; + GLuint end_offset = 0; + struct brw_instruction *end_inst, *last_inst; struct brw_instruction *if_inst[MAX_IFSN]; struct brw_indirect stack_index = brw_indirect(0, 0); @@ -1035,7 +1033,6 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* Get argument regs. SWZ is special and does this itself. */ - inst->Data = &p->store[p->nr_insn]; if (inst->Opcode != OPCODE_SWZ) for (i = 0; i < 3; i++) { struct prog_src_register *src = &inst->SrcReg[i]; @@ -1203,7 +1200,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(4)); - inst->Data = &p->store[p->nr_insn]; + brw_save_call(p, inst->Comment, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; case OPCODE_RET: @@ -1212,14 +1209,23 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); brw_set_access_mode(p, BRW_ALIGN_16); + break; case OPCODE_END: + end_offset = p->nr_insn; + /* this instruction will get patched later to jump past subroutine + * code, etc. + */ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; case OPCODE_PRINT: + /* no-op */ + break; case OPCODE_BGNSUB: + brw_save_label(p, inst->Comment, p->nr_insn); + break; case OPCODE_ENDSUB: - /* no-op instructions */ - break; + /* no-op */ + break; default: _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader", inst->Opcode, inst->Opcode < MAX_OPCODE ? @@ -1257,9 +1263,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) release_tmps(c); } - end_inst = &p->store[p->nr_insn]; + end_inst = &p->store[end_offset]; + last_inst = &p->store[p->nr_insn]; + + /* The END instruction will be patched to jump to this code */ emit_vertex_write(c); - post_vs_emit(c, end_inst); - for (insn = 0; insn < nr_insns; insn++) - c->vp->program.Base.Instructions[insn].Data = NULL; + + post_vs_emit(c, end_inst, last_inst); } diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index b501a59ccd5..960bbb311e3 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -67,11 +67,13 @@ static void brw_destroy_context( struct intel_context *intel ) brw_destroy_state(brw); brw_draw_destroy( brw ); + _mesa_free(brw->wm.compile_data); + brw_FrameBufferTexDestroy( brw ); - for (i = 0; i < brw->state.nr_draw_regions; i++) - intel_region_release(&brw->state.draw_regions[i]); - brw->state.nr_draw_regions = 0; + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); + brw->state.nr_color_regions = 0; intel_region_release(&brw->state.depth_region); dri_bo_release(&brw->curbe.curbe_bo); @@ -90,6 +92,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->wm.bind_bo); for (i = 0; i < BRW_WM_MAX_SURF; i++) dri_bo_release(&brw->wm.surf_bo[i]); + dri_bo_release(&brw->wm.sampler_bo); dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.state_bo); dri_bo_release(&brw->cc.prog_bo); @@ -102,25 +105,25 @@ static void brw_destroy_context( struct intel_context *intel ) * called from intelDrawBuffer() */ static void brw_set_draw_region( struct intel_context *intel, - struct intel_region *draw_regions[], + struct intel_region *color_regions[], struct intel_region *depth_region, - GLuint num_regions) + GLuint num_color_regions) { struct brw_context *brw = brw_context(&intel->ctx); - int i; + GLuint i; /* release old color/depth regions */ if (brw->state.depth_region != depth_region) brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; - for (i = 0; i < brw->state.nr_draw_regions; i++) - intel_region_release(&brw->state.draw_regions[i]); + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); intel_region_release(&brw->state.depth_region); /* reference new color/depth regions */ - for (i = 0; i < num_regions; i++) - intel_region_reference(&brw->state.draw_regions[i], draw_regions[i]); + for (i = 0; i < num_color_regions; i++) + intel_region_reference(&brw->state.color_regions[i], color_regions[i]); intel_region_reference(&brw->state.depth_region, depth_region); - brw->state.nr_draw_regions = num_regions; + brw->state.nr_color_regions = num_color_regions; } @@ -181,23 +184,6 @@ static void brw_note_unlock( struct intel_context *intel ) } -void brw_do_flush( struct brw_context *brw, GLuint flags ) -{ - struct brw_mi_flush flush; - memset(&flush, 0, sizeof(flush)); - flush.opcode = CMD_MI_FLUSH; - flush.flags = flags; - BRW_BATCH_STRUCT(brw, &flush); -} - - -static void brw_emit_flush( struct intel_context *intel, GLuint unused ) -{ - brw_do_flush(brw_context(&intel->ctx), - BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); -} - - /* called from intelWaitForIdle() and intelFlush() * * For now, just flush everything. Could be smarter later. @@ -234,6 +220,5 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; brw->intel.vtbl.flush_cmd = brw_flush_cmd; - brw->intel.vtbl.emit_flush = brw_emit_flush; brw->intel.vtbl.debug_batch = brw_debug_batch; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index ea708a06815..1645ca0b70e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -80,6 +80,53 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) } +/** + * Do GPU code generation for non-GLSL shader. non-GLSL shaders have + * no flow control instructions so we can more readily do SSA-style + * optimizations. + */ +static void +brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(c); + + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(c); + + /* Dead code removal. + */ + brw_wm_pass1(c); + + /* Register allocation. + */ + c->grf_limit = BRW_WM_MAX_GRF / 2; + + brw_wm_pass2(c); + + c->prog_data.total_grf = c->max_wm_grf; + if (c->last_scratch) { + c->prog_data.total_scratch = c->last_scratch + 0x40; + } + else { + c->prog_data.total_scratch = 0; + } + + /* Emit GEN4 code. + */ + brw_wm_emit(c); +} + + +/** + * All Mesa program -> GPU code generation goes through this function. + * Depending on the instructions used (i.e. flow control instructions) + * we'll use one of two code generators. + */ static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) @@ -90,52 +137,32 @@ static void do_wm_prog( struct brw_context *brw, c = brw->wm.compile_data; if (c == NULL) { - brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); - c = brw->wm.compile_data; + brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); + c = brw->wm.compile_data; } else { - memset(c, 0, sizeof(*brw->wm.compile_data)); + memset(c, 0, sizeof(*brw->wm.compile_data)); } memcpy(&c->key, key, sizeof(*key)); c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - brw_init_compile(brw, &c->func); - if (brw_wm_is_glsl(&c->fp->program)) { - brw_wm_glsl_emit(brw, c); - } else { - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); - - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Register allocation. - */ - c->grf_limit = BRW_WM_MAX_GRF/2; - - brw_wm_pass2(c); - - c->prog_data.total_grf = c->max_wm_grf; - if (c->last_scratch) { - c->prog_data.total_scratch = - c->last_scratch + 0x40; - } else { - c->prog_data.total_scratch = 0; - } - - /* Emit GEN4 code. - */ - brw_wm_emit(c); + brw_init_compile(brw, &c->func); + + /* temporary sanity check assertion */ + ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + + /* + * Shader which use GLSL features such as flow control are handled + * differently from "simple" shaders. + */ + if (fp->isGLSL) { + brw_wm_glsl_emit(brw, c); } + else { + brw_wm_non_glsl_emit(brw, c); + } + if (INTEL_DEBUG & DEBUG_WM) fprintf(stderr, "\n"); @@ -159,7 +186,7 @@ static void brw_wm_populate_key( struct brw_context *brw, { GLcontext *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ - struct brw_fragment_program *fp = + const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; GLuint lookup = 0; GLuint line_aa; @@ -174,7 +201,7 @@ static void brw_wm_populate_key( struct brw_context *brw, ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) + if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH)) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; /* _NEW_DEPTH */ @@ -186,7 +213,7 @@ static void brw_wm_populate_key( struct brw_context *brw, lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; /* _NEW_STENCIL */ - if (ctx->Stencil.Enabled) { + if (ctx->Stencil._Enabled) { lookup |= IZ_STENCIL_TEST_ENABLE_BIT; if (ctx->Stencil.WriteMask[0] || @@ -278,10 +305,8 @@ static void brw_wm_populate_key( struct brw_context *brw, key->drawable_height = brw->intel.driDrawable->h; } - /* Extra info: - */ + /* The unique fragment program ID */ key->program_string_id = fp->id; - } @@ -305,8 +330,6 @@ static void brw_prepare_wm_prog(struct brw_context *brw) } -/* See brw_wm.c: - */ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 0f46a25b1a1..7f0e5702f2e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -143,13 +143,12 @@ struct brw_wm_instruction { GLuint writemask:4; GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ + GLuint tex_shadow:1; /* do shadow comparison? */ GLuint eot:1; /* End of thread indicator for FB_WRITE*/ GLuint target:10; /* target binding table index for FB_WRITE*/ }; -#define PROGRAM_INTERNAL_PARAM - #define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) @@ -240,13 +239,15 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + /** Mapping from Mesa registers to hardware registers */ struct { GLboolean inited; struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; + GLuint reg_index; /**< Index of next free GRF register */ GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; @@ -281,4 +282,6 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); + + #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index b5050a3e40b..f2dca9caa6c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -671,7 +671,6 @@ static void emit_tex( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; GLuint msgLength, responseLength; - GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0; GLuint i, nr; GLuint emit; @@ -693,7 +692,7 @@ static void emit_tex( struct brw_wm_compile *c, break; } - if (shadow) { + if (inst->tex_shadow) { nr = 4; emit |= WRITEMASK_W; } @@ -718,7 +717,7 @@ static void emit_tex( struct brw_wm_compile *c, inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ inst->tex_unit, /* sampler */ inst->writemask, - (shadow ? + (inst->tex_shadow ? BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE : BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE), responseLength, @@ -886,6 +885,9 @@ static void emit_aa( struct brw_wm_compile *c, /* Post-fragment-program processing. Send the results to the * framebuffer. + * \param arg0 the fragment color + * \param arg1 the pass-through depth value + * \param arg2 the shader-computed depth value */ static void emit_fb_write( struct brw_wm_compile *c, struct brw_reg *arg0, diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index ea3f3fc678f..533be3858e9 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -129,7 +129,7 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx) reg.Index = idx; reg.WriteMask = WRITEMASK_XYZW; reg.RelAddr = 0; - reg.CondMask = 0; + reg.CondMask = COND_TR; reg.CondSwizzle = 0; reg.CondSrc = 0; reg.pad = 0; @@ -183,16 +183,16 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c, { struct prog_instruction *inst = get_fp_inst(c); *inst = *inst0; - inst->Data = (void *)inst0; return inst; } -static struct prog_instruction * emit_op(struct brw_wm_compile *c, +static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, GLuint op, struct prog_dst_register dest, GLuint saturate, GLuint tex_src_unit, GLuint tex_src_target, + GLuint tex_shadow, struct prog_src_register src0, struct prog_src_register src1, struct prog_src_register src2 ) @@ -206,6 +206,7 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, inst->SaturateMode = saturate; inst->TexSrcUnit = tex_src_unit; inst->TexSrcTarget = tex_src_target; + inst->TexShadow = tex_shadow; inst->SrcReg[0] = src0; inst->SrcReg[1] = src1; inst->SrcReg[2] = src2; @@ -213,6 +214,20 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, } +static struct prog_instruction * emit_op(struct brw_wm_compile *c, + GLuint op, + struct prog_dst_register dest, + GLuint saturate, + struct prog_src_register src0, + struct prog_src_register src1, + struct prog_src_register src2 ) +{ + return emit_tex_op(c, op, dest, saturate, + 0, 0, 0, /* tex unit, target, shadow */ + src0, src1, src2); +} + + /*********************************************************************** @@ -234,7 +249,7 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) emit_op(c, WM_PIXELXY, dst_mask(pixel_xy, WRITEMASK_XY), - 0, 0, 0, + 0, payload_r0_depth, src_undef(), src_undef()); @@ -257,7 +272,7 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) emit_op(c, WM_DELTAXY, dst_mask(delta_xy, WRITEMASK_XY), - 0, 0, 0, + 0, pixel_xy, payload_r0_depth, src_undef()); @@ -274,14 +289,13 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) struct prog_dst_register pixel_w = get_temp(c); struct prog_src_register deltas = get_delta_xy(c); struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); - - + /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ emit_op(c, WM_PIXELW, dst_mask(pixel_w, WRITEMASK_W), - 0, 0, 0, + 0, interp_wpos, deltas, src_undef()); @@ -316,7 +330,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_WPOSXY, dst_mask(dst, WRITEMASK_XY), - 0, 0, 0, + 0, get_pixel_xy(c), src_undef(), src_undef()); @@ -328,7 +342,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_LINTERP, dst, - 0, 0, 0, + 0, interp, deltas, arg2); @@ -339,7 +353,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_CINTERP, dst, - 0, 0, 0, + 0, interp, src_undef(), src_undef()); @@ -348,7 +362,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_LINTERP, dst, - 0, 0, 0, + 0, interp, deltas, src_undef()); @@ -358,7 +372,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_PINTERP, dst, - 0, 0, 0, + 0, interp, deltas, get_pixel_w(c)); @@ -378,7 +392,7 @@ static void emit_ddx( struct brw_wm_compile *c, emit_op(c, OPCODE_DDX, inst->DstReg, - 0, 0, 0, + 0, interp, get_pixel_w(c), src_undef()); @@ -394,7 +408,7 @@ static void emit_ddy( struct brw_wm_compile *c, emit_op(c, OPCODE_DDY, inst->DstReg, - 0, 0, 0, + 0, interp, get_pixel_w(c), src_undef()); @@ -489,13 +503,12 @@ static void precalc_dst( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, dst_mask(dst, WRITEMASK_Y), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src0, src1, src_undef()); } - if (dst.WriteMask & WRITEMASK_XZ) { struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); @@ -505,7 +518,7 @@ static void precalc_dst( struct brw_wm_compile *c, swz = emit_op(c, OPCODE_SWZ, dst_mask(dst, WRITEMASK_XZ), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), src_undef(), src_undef()); @@ -518,7 +531,7 @@ static void precalc_dst( struct brw_wm_compile *c, emit_op(c, OPCODE_MOV, dst_mask(dst, WRITEMASK_W), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src1, src_undef(), src_undef()); @@ -540,7 +553,7 @@ static void precalc_lit( struct brw_wm_compile *c, swz = emit_op(c, OPCODE_SWZ, dst_mask(dst, WRITEMASK_XW), - 0, 0, 0, + 0, src_swizzle1(src0, SWIZZLE_ONE), src_undef(), src_undef()); @@ -548,12 +561,11 @@ static void precalc_lit( struct brw_wm_compile *c, swz->SrcReg[0].NegateBase = 0; } - if (dst.WriteMask & WRITEMASK_YZ) { emit_op(c, OPCODE_LIT, dst_mask(dst, WRITEMASK_YZ), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src0, src_undef(), src_undef()); @@ -589,7 +601,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmpcoord = src0 (i.e.: coord = src0) */ out = emit_op(c, OPCODE_MOV, tmpcoord, - 0, 0, 0, + 0, src0, src_undef(), src_undef()); @@ -599,7 +611,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp0 = MAX(coord.X, coord.Y) */ emit_op(c, OPCODE_MAX, tmp0, - 0, 0, 0, + 0, src_swizzle1(coord, X), src_swizzle1(coord, Y), src_undef()); @@ -607,7 +619,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp1 = MAX(tmp0, coord.Z) */ emit_op(c, OPCODE_MAX, tmp1, - 0, 0, 0, + 0, tmp0src, src_swizzle1(coord, Z), src_undef()); @@ -615,7 +627,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp0 = 1 / tmp1 */ emit_op(c, OPCODE_RCP, tmp0, - 0, 0, 0, + 0, tmp1src, src_undef(), src_undef()); @@ -623,7 +635,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmpCoord = src0 * tmp0 */ emit_op(c, OPCODE_MUL, tmpcoord, - 0, 0, 0, + 0, src0, tmp0src, src_undef()); @@ -646,7 +658,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, tmpcoord, - 0, 0, 0, + 0, inst->SrcReg[0], scale, src_undef()); @@ -686,22 +698,23 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp = TEX ... */ - emit_op(c, - OPCODE_TEX, - tmp, - inst->SaturateMode, - unit, - inst->TexSrcTarget, - coord, - src_undef(), - src_undef()); + emit_tex_op(c, + OPCODE_TEX, + tmp, + inst->SaturateMode, + unit, + inst->TexSrcTarget, + inst->TexShadow, + coord, + src_undef(), + src_undef()); /* tmp.xyz = ADD TMP, C0 */ emit_op(c, OPCODE_ADD, dst_mask(tmp, WRITEMASK_XYZ), - 0, 0, 0, + 0, tmpsrc, C0, src_undef()); @@ -712,7 +725,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_Y), - 0, 0, 0, + 0, tmpsrc, src_swizzle1(C0, W), src_undef()); @@ -727,7 +740,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_XYZ), - 0, 0, 0, + 0, swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), C1, src_swizzle1(tmpsrc, Y)); @@ -737,7 +750,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_Y), - 0, 0, 0, + 0, src_swizzle1(tmpsrc, Z), src_swizzle1(C1, W), src_swizzle1(src_reg_from_dst(dst), Y)); @@ -746,15 +759,16 @@ static void precalc_tex( struct brw_wm_compile *c, } else { /* ordinary RGBA tex instruction */ - emit_op(c, - OPCODE_TEX, - inst->DstReg, - inst->SaturateMode, - unit, - inst->TexSrcTarget, - coord, - src_undef(), - src_undef()); + emit_tex_op(c, + OPCODE_TEX, + inst->DstReg, + inst->SaturateMode, + unit, + inst->TexSrcTarget, + inst->TexShadow, + coord, + src_undef(), + src_undef()); } /* For GL_EXT_texture_swizzle: */ @@ -764,7 +778,6 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_SWZ, inst->DstReg, SATURATE_OFF, /* saturate already done above */ - 0, 0, /* tex unit, target N/A */ src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), src_undef(), src_undef()); @@ -813,7 +826,7 @@ static void precalc_txp( struct brw_wm_compile *c, emit_op(c, OPCODE_RCP, dst_mask(tmp, WRITEMASK_W), - 0, 0, 0, + 0, src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), src_undef(), src_undef()); @@ -823,7 +836,7 @@ static void precalc_txp( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_XYZ), - 0, 0, 0, + 0, src0, src_swizzle1(src_reg_from_dst(tmp), W), src_undef()); @@ -849,42 +862,41 @@ static void precalc_txp( struct brw_wm_compile *c, static void emit_fb_write( struct brw_wm_compile *c ) { struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR); + struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); struct prog_src_register outcolor; GLuint i; struct prog_instruction *inst, *last_inst; struct brw_context *brw = c->func.brw; - /* inst->Sampler is not used by backend, - use it for fb write target and eot */ - - if (brw->state.nr_draw_regions > 1) { - for (i = 0 ; i < brw->state.nr_draw_regions; i++) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - last_inst = inst = emit_op(c, - WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0, - outcolor, payload_r0_depth, outdepth); - inst->Sampler = (i<<1); - if (c->fp_fragcolor_emitted) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, 0, 0, outcolor, payload_r0_depth, outdepth); - inst->Sampler = (i<<1); - } - } - last_inst->Sampler |= 1; //eot + /* The inst->Aux field is used for FB write target and the EOT marker */ + + if (brw->state.nr_color_regions > 1) { + for (i = 0 ; i < brw->state.nr_color_regions; i++) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); + last_inst = inst = emit_op(c, + WM_FB_WRITE, dst_mask(dst_undef(),0), 0, + outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + if (c->fp_fragcolor_emitted) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + } + } + last_inst->Aux |= 1; //eot } else { /* if gl_FragData[0] is written, use it, else use gl_FragColor */ if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); else - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, 0, 0, outcolor, payload_r0_depth, outdepth); - inst->Sampler = 1|(0<<1); + inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = 1|(0<<1); } } @@ -915,9 +927,9 @@ static void validate_dst_regs( struct brw_wm_compile *c, const struct prog_instruction *inst ) { if (inst->DstReg.File == PROGRAM_OUTPUT) { - GLuint idx = inst->DstReg.Index; - if (idx == FRAG_RESULT_COLR) - c->fp_fragcolor_emitted = 1; + GLuint idx = inst->DstReg.Index; + if (idx == FRAG_RESULT_COLOR) + c->fp_fragcolor_emitted = 1; } } @@ -937,11 +949,15 @@ static void print_insns( const struct prog_instruction *insn, 3); } else - _mesa_printf("UNKNOWN\n"); - + _mesa_printf("965 Opcode %d\n", insn->Opcode); } } + +/** + * Initial pass for fragment program code generation. + * This function is used by both the GLSL and non-GLSL paths. + */ void brw_wm_pass_fp( struct brw_wm_compile *c ) { struct brw_fragment_program *fp = c->fp; @@ -958,15 +974,19 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) c->pixel_w = src_undef(); c->nr_fp_insns = 0; - /* Emit preamble instructions: + /* Emit preamble instructions. This is where special instructions such as + * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to + * compute shader inputs from varying vars. */ - - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; validate_src_regs(c, inst); validate_dst_regs(c, inst); } + + /* Loop over all instructions doing assorted simplifications and + * transformations. + */ for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; struct prog_instruction *out; @@ -975,7 +995,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) * necessary: */ - switch (inst->Opcode) { case OPCODE_SWZ: out = emit_insn(c, inst); @@ -1055,9 +1074,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) } if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + _mesa_printf("pass_fp:\n"); + print_insns( c->prog_instructions, c->nr_fp_insns ); + _mesa_printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 8fd776ac393..4cf092226cf 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -8,12 +8,17 @@ enum _subroutine { SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 }; -/* Only guess, need a flag in gl_fragment_program later */ + +/** + * Determine if the given fragment program uses GLSL features such + * as flow conditionals, loops, subroutines. + * Some GLSL shaders may use these features, others might not. + */ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { int i; for (i = 0; i < fp->Base.NumInstructions; i++) { - struct prog_instruction *inst = &fp->Base.Instructions[i]; + const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { case OPCODE_IF: case OPCODE_TRUNC: @@ -36,6 +41,10 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) return GL_FALSE; } + +/** + * Record the mapping of a Mesa register to a hardware register. + */ static void set_reg(struct brw_wm_compile *c, int file, int index, int component, struct brw_reg reg) { @@ -43,6 +52,10 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, c->wm_regs[file][index][component].inited = GL_TRUE; } +/** + * Examine instruction's write mask to find index of first component + * enabled for writing. + */ static int get_scalar_dst_index(struct prog_instruction *inst) { int i; @@ -62,6 +75,10 @@ static struct brw_reg alloc_tmp(struct brw_wm_compile *c) return reg; } +/** + * Save current temp register info. + * There must be a matching call to release_tmps(). + */ static int mark_tmps(struct brw_wm_compile *c) { return c->tmp_index; @@ -77,8 +94,22 @@ static void release_tmps(struct brw_wm_compile *c, int mark) c->tmp_index = mark; } +/** + * Convert Mesa src register to brw register. + * + * Since we're running in SOA mode each Mesa register corresponds to four + * hardware registers. We allocate the hardware registers as needed here. + * + * \param file register file, one of PROGRAM_x + * \param index register number + * \param component src component (X=0, Y=1, Z=2, W=3) + * \param nr not used?!? + * \param neg negate value? + * \param abs take absolute value? + */ static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs) +get_reg(struct brw_wm_compile *c, int file, int index, int component, + int nr, GLuint neg, GLuint abs) { struct brw_reg reg; switch (file) { @@ -89,21 +120,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL break; case PROGRAM_UNDEFINED: return brw_null_reg(); - default: + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + case PROGRAM_PAYLOAD: break; + default: + _mesa_problem(NULL, "Unexpected file in get_reg()"); + return brw_null_reg(); } - if(c->wm_regs[file][index][component].inited) + /* see if we've already allocated a HW register for this Mesa register */ + if (c->wm_regs[file][index][component].inited) { + /* yes, re-use */ reg = c->wm_regs[file][index][component].reg; - else + } + else { + /* no, allocate new register */ reg = brw_vec8_grf(c->reg_index, 0); + } - if(!c->wm_regs[file][index][component].inited) { + /* if this is a new register allocation, record it in the table */ + if (!c->wm_regs[file][index][component].inited) { set_reg(c, file, index, component, reg); c->reg_index++; } - if (neg & (1<< component)) { + if (c->reg_index >= BRW_WM_MAX_GRF - 12) { + /* ran out of temporary registers! */ +#if 1 + /* This is a big hack for now. + * Return bad register index, just don't hang the GPU. + */ + _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); + c->reg_index = BRW_WM_MAX_GRF - 13; +#else + return brw_null_reg(); +#endif + } + + if (neg & (1 << component)) { reg = negate(reg); } if (abs) @@ -111,6 +167,12 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL return reg; } + +/** + * Preallocate registers. This sets up the Mesa to hardware register + * mapping for certain registers, such as constants (uniforms/state vars) + * and shader inputs. + */ static void prealloc_reg(struct brw_wm_compile *c) { int i, j; @@ -119,29 +181,42 @@ static void prealloc_reg(struct brw_wm_compile *c) GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; for (i = 0; i < 4; i++) { - reg = (i < c->key.nr_depth_regs) - ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); + if (i < c->key.nr_depth_regs) + reg = brw_vec8_grf(i * 2, 0); + else + reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2*c->key.nr_depth_regs; + c->reg_index += 2 * c->key.nr_depth_regs; + + /* constants */ { - int nr_params = c->fp->program.Base.Parameters->NumParameters; - struct gl_program_parameter_list *plist = + const int nr_params = c->fp->program.Base.Parameters->NumParameters; + const struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; int index = 0; - c->prog_data.nr_params = 4*nr_params; + + /* number of float constants */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ for (i = 0; i < nr_params; i++) { - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index/8, - index%8); - c->prog_data.param[index] = - &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); } } - c->nr_creg = 2*((4*nr_params+15)/16); + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); c->reg_index += c->nr_creg; } + + /* fragment shader inputs */ for (i = 0; i < FRAG_ATTRIB_MAX; i++) { if (inputs & (1<<i)) { nr_interp_regs++; @@ -149,9 +224,9 @@ static void prealloc_reg(struct brw_wm_compile *c) for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, i, j, reg); c->reg_index += 2; - } } + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = nr_interp_regs * 2; c->prog_data.curb_read_length = c->nr_creg; @@ -161,6 +236,10 @@ static void prealloc_reg(struct brw_wm_compile *c) c->reg_index += 2; } + +/** + * Convert Mesa dst register to brw register. + */ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, struct prog_instruction *inst, int component, int nr) { @@ -168,6 +247,10 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, 0, 0); } + +/** + * Convert Mesa src register to brw register. + */ static struct brw_reg get_src_reg(struct brw_wm_compile *c, struct prog_src_register *src, int index, int nr) { @@ -176,13 +259,15 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, src->NegateBase, src->Abs); } -/* Subroutines are minimal support for resusable instruction sequences. - They are implemented as simply as possible to minimise overhead: there - is no explicit support for communication between the caller and callee - other than saving the return address in a temporary register, nor is - there any automatic local storage. This implies that great care is - required before attempting reentrancy or any kind of nested - subroutine invocations. */ +/** + * Subroutines are minimal support for resusable instruction sequences. + * They are implemented as simply as possible to minimise overhead: there + * is no explicit support for communication between the caller and callee + * other than saving the return address in a temporary register, nor is + * there any automatic local storage. This implies that great care is + * required before attempting reentrancy or any kind of nested + * subroutine invocations. + */ static void invoke_subroutine( struct brw_wm_compile *c, enum _subroutine subroutine, void (*emit)( struct brw_wm_compile * ) ) @@ -319,11 +404,10 @@ static void emit_pixel_xy(struct brw_wm_compile *c, stride(suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); } - } static void emit_delta_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; @@ -351,10 +435,8 @@ static void emit_delta_xy(struct brw_wm_compile *c, negate(suboffset(r1,1))); } - } - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -397,33 +479,59 @@ static void emit_fb_write(struct brw_wm_compile *c, */ if (c->key.aa_dest_stencil_reg) nr += 1; - { - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); + + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); - if (c->key.source_depth_to_render_target) - { - if (c->key.computes_depth) { - src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } else { - src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } - - nr += 2; + if (c->key.source_depth_to_render_target) { + if (c->key.computes_depth) { + src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + else { + src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + + nr += 2; + } + + if (c->key.dest_depth_reg) { + GLuint comp = c->key.dest_depth_reg / 2; + GLuint off = c->key.dest_depth_reg % 2; + + assert(comp == 1); + assert(off == 0); +#if 0 + /* XXX do we need this code? comp always 1, off always 0, it seems */ + if (off != 0) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_pop_insn_state(p); + } + else +#endif + { + struct brw_reg src = get_src_reg(c, &inst->SrcReg[1], 1, 1); + brw_MOV(p, brw_message_reg(nr), src); + } + nr += 2; } - target = inst->Sampler >> 1; - eot = inst->Sampler & 1; + + target = inst->Aux >> 1; + eot = inst->Aux & 1; fire_fb_write(c, 0, nr, target, eot); } @@ -465,12 +573,12 @@ static void emit_linterp(struct brw_wm_compile *c, struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0; + GLuint nr, i; src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - GLuint nr = src0.nr; - int i; + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -494,10 +602,10 @@ static void emit_cinterp(struct brw_wm_compile *c, struct brw_reg interp[4]; struct brw_reg dst, src0; + GLuint nr, i; src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - GLuint nr = src0.nr; - int i; + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -521,13 +629,13 @@ static void emit_pinterp(struct brw_wm_compile *c, struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0, w; + GLuint nr, i; src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); w = get_src_reg(c, &inst->SrcReg[2], 3, 1); - GLuint nr = src0.nr; - int i; + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -627,23 +735,46 @@ static void emit_dph(struct brw_wm_compile *c, brw_set_saturate(p, 0); } +/** + * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. + * Note that the result of the function is smeared across the dest + * register's X, Y, Z and W channels (subject to writemasking of course). + */ static void emit_math1(struct brw_wm_compile *c, struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; - struct brw_reg src0, dst; + struct brw_reg src0, dst, tmp; + const int mark = mark_tmps( c ); + int i; + + tmp = alloc_tmp(c); + /* Get first component of source register */ src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + + /* tmp = func(src0) */ brw_MOV(p, brw_message_reg(2), src0); brw_math(p, - dst, - func, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); + tmp, + func, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/ + + /* replicate tmp value across enabled dest channels */ + for (i = 0; i < 4; i++) { + if (inst->DstReg.WriteMask & (1 << i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV(p, dst, tmp); + } + } + + release_tmps(c, mark); } static void emit_rcp(struct brw_wm_compile *c, @@ -1045,23 +1176,23 @@ static void emit_ddy(struct brw_wm_compile *c, brw_set_saturate(p, 0); } -static __inline struct brw_reg high_words( struct brw_reg reg ) +static INLINE struct brw_reg high_words( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), 0, 8, 2 ); } -static __inline struct brw_reg low_words( struct brw_reg reg ) +static INLINE struct brw_reg low_words( struct brw_reg reg ) { return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); } -static __inline struct brw_reg even_bytes( struct brw_reg reg ) +static INLINE struct brw_reg even_bytes( struct brw_reg reg ) { return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); } -static __inline struct brw_reg odd_bytes( struct brw_reg reg ) +static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), 0, 16, 2 ); @@ -1366,9 +1497,11 @@ static void emit_noise2( struct brw_wm_compile *c, release_tmps( c, mark ); } -/* The three-dimensional case is much like the one- and two- versions above, - but since the number of corners is rapidly growing we now pack 16 16-bit - hashes into each register to extract more parallelism from the EUs. */ +/** + * The three-dimensional case is much like the one- and two- versions above, + * but since the number of corners is rapidly growing we now pack 16 16-bit + * hashes into each register to extract more parallelism from the EUs. + */ static void noise3_sub( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; @@ -1670,13 +1803,15 @@ static void emit_noise3( struct brw_wm_compile *c, release_tmps( c, mark ); } -/* For the four-dimensional case, the little micro-optimisation benefits - we obtain by unrolling all the loops aren't worth the massive bloat it - now causes. Instead, we loop twice around performing a similar operation - to noise3, once for the w=0 cube and once for the w=1, with a bit more - code to glue it all together. */ -static void noise4_sub( struct brw_wm_compile *c ) { - +/** + * For the four-dimensional case, the little micro-optimisation benefits + * we obtain by unrolling all the loops aren't worth the massive bloat it + * now causes. Instead, we loop twice around performing a similar operation + * to noise3, once for the w=0 cube and once for the w=1, with a bit more + * code to glue it all together. + */ +static void noise4_sub( struct brw_wm_compile *c ) +{ struct brw_compile *p = &c->func; struct brw_reg param[ 4 ], x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ @@ -2244,28 +2379,12 @@ static void emit_tex(struct brw_wm_compile *c, brw_MOV(p, dst[3], brw_imm_f(1.0)); } +/** + * Resolve subroutine calls after code emit is done. + */ static void post_wm_emit( struct brw_wm_compile *c ) { - GLuint nr_insns = c->fp->program.Base.NumInstructions; - GLuint insn, target_insn; - struct prog_instruction *inst1, *inst2; - struct brw_instruction *brw_inst1, *brw_inst2; - int offset; - for (insn = 0; insn < nr_insns; insn++) { - inst1 = &c->fp->program.Base.Instructions[insn]; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case OPCODE_CAL: - target_insn = inst1->BranchTarget; - inst2 = &c->fp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } - } + brw_resolve_cals(&c->func); } static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) @@ -2285,10 +2404,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { struct prog_instruction *inst = &c->prog_instructions[i]; - struct prog_instruction *orig_inst; - - if ((orig_inst = inst->Data) != 0) - orig_inst->Data = current_insn(p); if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); @@ -2446,7 +2561,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_ENDIF(p, if_inst[--if_insn]); break; case OPCODE_BGNSUB: + brw_save_label(p, inst->Comment, p->nr_insn); + break; case OPCODE_ENDSUB: + /* no-op */ break; case OPCODE_CAL: brw_push_insn_state(p); @@ -2456,8 +2574,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(4)); - orig_inst = inst->Data; - orig_inst->Data = &p->store[p->nr_insn]; + brw_save_call(&c->func, inst->Comment, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_pop_insn_state(p); break; @@ -2510,14 +2627,34 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); - for (i = 0; i < c->fp->program.Base.NumInstructions; i++) - c->fp->program.Base.Instructions[i].Data = NULL; + + if (c->reg_index >= BRW_WM_MAX_GRF) { + _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()"); + /* XXX we need to do some proper error recovery here */ + } } + +/** + * Do GPU code generation for shaders that use GLSL features such as + * flow control. Other shaders will be compiled with the + */ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("brw_wm_glsl_emit:\n"); + } + + /* initial instruction translation/simplification */ brw_wm_pass_fp(c); + + /* actual code generation */ brw_wm_emit_glsl(brw, c); + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "brw_wm_glsl_emit done"); + } + c->prog_data.total_grf = c->reg_index; c->prog_data.total_scratch = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 590cd946ec3..2debd0678a5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -334,8 +334,9 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, } -static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, - const struct prog_instruction *inst ) +static void +translate_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst) { struct brw_wm_instruction *out = get_instruction(c); GLuint writemask = inst->DstReg.WriteMask; @@ -348,8 +349,9 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, out->saturate = (inst->SaturateMode != SATURATE_OFF); out->tex_unit = inst->TexSrcUnit; out->tex_idx = inst->TexSrcTarget; - out->eot = inst->Sampler & 1; - out->target = inst->Sampler>>1; + out->tex_shadow = inst->TexShadow; + out->eot = inst->Aux & 1; + out->target = inst->Aux >> 1; /* Args: */ @@ -365,8 +367,6 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, pass0_set_dst_scalar(c, out, inst, writemask); else pass0_set_dst(c, out, inst, writemask); - - return out; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index 6eaed8a665b..cf031899dd2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -210,9 +210,10 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case OPCODE_TEX: + case OPCODE_TXP: read0 = get_texcoord_mask(inst->tex_idx); - if (c->key.shadowtex_mask & (1<<inst->tex_unit)) + if (inst->tex_shadow) read0 |= WRITEMASK_Z; break; @@ -267,7 +268,6 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case OPCODE_DST: - case OPCODE_TXP: default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index b6dac0d6983..68a9296a713 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -95,6 +95,7 @@ struct wm_sampler_key { int sampler_count; struct wm_sampler_entry { + GLenum tex_target; GLenum wrap_r, wrap_s, wrap_t; float maxlod, minlod; float lod_bias; @@ -168,19 +169,20 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, } } - sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); - sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); - sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); - - /* Fulsim complains if I don't do this. Hardware doesn't mind: - */ -#if 0 - if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) { + if (key->tex_target == GL_TEXTURE_CUBE_MAP && + (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) { + /* If we're using anything but nearest sampling for a cube map, we + * need to set this wrap mode to avoid GPU lock-ups. + */ sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; } -#endif + else { + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); + } /* Set shadow function: */ @@ -234,6 +236,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw, struct gl_texture_image *firstImage = texObj->Image[0][intelObj->firstLevel]; + entry->tex_target = texObj->Target; + entry->wrap_r = texObj->WrapR; entry->wrap_s = texObj->WrapS; entry->wrap_t = texObj->WrapT; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 3c3b3473d63..63fc8a004fd 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -62,6 +62,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { GLcontext *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; + const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); @@ -103,11 +104,14 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* as far as we can tell */ key->computes_depth = - (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0; + (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->is_glsl = brw_wm_is_glsl(fp); + key->is_glsl = bfp->isGLSL; + + /* temporary sanity check assertion */ + ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); /* XXX: This needs a flag to indicate when it changes. */ key->stats_wm = intel->stats_wm; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index d70f9c646cd..9b320480b6d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -139,7 +139,18 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; case MESA_FORMAT_S8_Z24: - return BRW_SURFACEFORMAT_I24X8_UNORM; + /* XXX: these different surface formats don't seem to + * make any difference for shadow sampler/compares. + */ + if (depth_mode == GL_INTENSITY) + return BRW_SURFACEFORMAT_I24X8_UNORM; + else if (depth_mode == GL_ALPHA) + return BRW_SURFACEFORMAT_A24X8_UNORM; + else + return BRW_SURFACEFORMAT_L24X8_UNORM; + + case MESA_FORMAT_DUDV8: + return BRW_SURFACEFORMAT_R8G8_SNORM; default: assert(0); @@ -381,8 +392,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, * a more restrictive relocation to emit. */ dri_bo_emit_reloc(brw->wm.surf_bo[unit], - I915_GEM_DOMAIN_RENDER | - I915_GEM_DOMAIN_SAMPLER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0, offsetof(struct brw_surface_state, ss1), @@ -447,13 +457,13 @@ static void prepare_wm_surfaces(struct brw_context *brw ) GLuint i; int old_nr_surfaces; - if (brw->state.nr_draw_regions > 1) { - for (i = 0; i < brw->state.nr_draw_regions; i++) { - brw_update_region_surface(brw, brw->state.draw_regions[i], i, + if (brw->state.nr_color_regions > 1) { + for (i = 0; i < brw->state.nr_color_regions; i++) { + brw_update_region_surface(brw, brw->state.color_regions[i], i, GL_FALSE); } - }else { - brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE); + } else { + brw_update_region_surface(brw, brw->state.color_regions[0], 0, GL_TRUE); } old_nr_surfaces = brw->wm.nr_surfaces; diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 9d9937289a8..29dc05c518e 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -207,7 +207,7 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, used); /* Emit a flush if the bufmgr doesn't do it for us. */ - if (!intel->ttm) { + if (intel->always_flush_cache || !intel->ttm) { *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); batch->ptr += 4; used = batch->ptr - batch->map; diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 208f90c0abb..4e033082b4d 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -332,6 +332,8 @@ intelEmitCopyBlit(struct intel_context *intel, switch (cpp) { case 1: + CMD = XY_SRC_COPY_BLT_CMD; + break; case 2: case 3: BR13 |= (1 << 24); @@ -562,7 +564,6 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } } } - intel_batchbuffer_emit_mi_flush(intel->batch); } UNLOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 60d7bb3770c..b7c7eeb368f 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -35,9 +35,6 @@ #include "intel_batchbuffer.h" #include "intel_regions.h" -static GLboolean intel_bufferobj_unmap(GLcontext * ctx, - GLenum target, - struct gl_buffer_object *obj); /** Allocates a new dri_bo to store the data for the buffer object. */ static void @@ -103,12 +100,7 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj) struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - - /* Buffer objects are automatically unmapped when deleting according - * to the spec. - */ - if (obj->Pointer) - intel_bufferobj_unmap(ctx, 0, obj); + assert(!obj->Pointer); /* Mesa should have unmapped it */ if (intel_obj->region) { intel_bufferobj_release_region(intel, intel_obj); @@ -141,11 +133,7 @@ intel_bufferobj_data(GLcontext * ctx, intel_obj->Base.Size = size; intel_obj->Base.Usage = usage; - /* Buffer objects are automatically unmapped when creating new data buffers - * according to the spec. - */ - if (obj->Pointer) - intel_bufferobj_unmap(ctx, 0, obj); + assert(!obj->Pointer); /* Mesa should have unmapped it */ if (intel_obj->region) intel_bufferobj_release_region(intel, intel_obj); diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index 7f2144abd4b..0929a2c223c 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -263,7 +263,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) } } else { - /* XXX FBO: instead of FALSE, pass ctx->Stencil.Enabled ??? */ + /* XXX FBO: instead of FALSE, pass ctx->Stencil._Enabled ??? */ FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); } @@ -274,9 +274,14 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) ctx->Driver.Enable(ctx, GL_DEPTH_TEST, (ctx->Depth.Test && fb->Visual.depthBits > 0)); ctx->Driver.Enable(ctx, GL_STENCIL_TEST, - (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0)); + (ctx->Stencil._Enabled && fb->Visual.stencilBits > 0)); } else { + /* Mesa's Stencil._Enabled field is updated when + * _NEW_BUFFERS | _NEW_STENCIL, but i965 code assumes that the value + * only changes with _NEW_STENCIL (which seems sensible). So flag it + * here since this is the _NEW_BUFFERS path. + */ ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL); } diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h index d1b4941601e..4593d90df3d 100644 --- a/src/mesa/drivers/dri/intel/intel_chipset.h +++ b/src/mesa/drivers/dri/intel/intel_chipset.h @@ -46,6 +46,13 @@ #define PCI_CHIP_G33_G 0x29C2 #define PCI_CHIP_Q33_G 0x29D2 +#define PCI_CHIP_IGD_GM 0xA011 +#define PCI_CHIP_IGD_G 0xA001 + +#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM) +#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G) +#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid)) + #define PCI_CHIP_I965_G 0x29A2 #define PCI_CHIP_I965_Q 0x2992 #define PCI_CHIP_I965_G_1 0x2982 @@ -66,7 +73,7 @@ devid == PCI_CHIP_I945_GME || \ devid == PCI_CHIP_I965_GM || \ devid == PCI_CHIP_I965_GME || \ - devid == PCI_CHIP_GM45_GM) + devid == PCI_CHIP_GM45_GM || IS_IGD(devid)) #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ @@ -84,7 +91,7 @@ devid == PCI_CHIP_I945_GME || \ devid == PCI_CHIP_G33_G || \ devid == PCI_CHIP_Q33_G || \ - devid == PCI_CHIP_Q35_G) + devid == PCI_CHIP_Q35_G || IS_IGD(devid)) #define IS_965(devid) (devid == PCI_CHIP_I965_G || \ devid == PCI_CHIP_I965_Q || \ diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index b2291363164..28281b38615 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -54,6 +54,17 @@ #define FILE_DEBUG_FLAG DEBUG_BLIT +#define TRI_CLEAR_COLOR_BITS (BUFFER_BIT_BACK_LEFT | \ + BUFFER_BIT_FRONT_LEFT | \ + BUFFER_BIT_COLOR0 | \ + BUFFER_BIT_COLOR1 | \ + BUFFER_BIT_COLOR2 | \ + BUFFER_BIT_COLOR3 | \ + BUFFER_BIT_COLOR4 | \ + BUFFER_BIT_COLOR5 | \ + BUFFER_BIT_COLOR6 | \ + BUFFER_BIT_COLOR7) + /** * Perform glClear where mask contains only color, depth, and/or stencil. * @@ -72,11 +83,11 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) struct gl_framebuffer *fb = ctx->DrawBuffer; int i; GLboolean saved_fp_enable = GL_FALSE, saved_vp_enable = GL_FALSE; - GLboolean saved_shader_program = 0; + GLuint saved_shader_program = 0; unsigned int saved_active_texture; - assert((mask & ~(BUFFER_BIT_BACK_LEFT | BUFFER_BIT_FRONT_LEFT | - BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) == 0); + assert((mask & ~(TRI_CLEAR_COLOR_BITS | BUFFER_BIT_DEPTH | + BUFFER_BIT_STENCIL)) == 0); _mesa_PushAttrib(GL_COLOR_BUFFER_BIT | GL_CURRENT_BIT | @@ -170,11 +181,11 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) while (mask != 0) { GLuint this_mask = 0; + GLuint color_bit; - if (mask & BUFFER_BIT_BACK_LEFT) - this_mask = BUFFER_BIT_BACK_LEFT; - else if (mask & BUFFER_BIT_FRONT_LEFT) - this_mask = BUFFER_BIT_FRONT_LEFT; + color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS); + if (color_bit != 0) + this_mask |= (1 << (color_bit - 1)); /* Clear depth/stencil in the same pass as color. */ this_mask |= (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)); @@ -186,6 +197,9 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) _mesa_DrawBuffer(GL_FRONT_LEFT); else if (this_mask & BUFFER_BIT_BACK_LEFT) _mesa_DrawBuffer(GL_BACK_LEFT); + else if (color_bit != 0) + _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0 + + (color_bit - BUFFER_COLOR0 - 1)); else _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); @@ -233,13 +247,10 @@ static const char *buffer_names[] = { [BUFFER_BACK_LEFT] = "back", [BUFFER_FRONT_RIGHT] = "front right", [BUFFER_BACK_RIGHT] = "back right", - [BUFFER_AUX0] = "aux0", - [BUFFER_AUX1] = "aux1", - [BUFFER_AUX2] = "aux2", - [BUFFER_AUX3] = "aux3", [BUFFER_DEPTH] = "depth", [BUFFER_STENCIL] = "stencil", [BUFFER_ACCUM] = "accum", + [BUFFER_AUX0] = "aux0", [BUFFER_COLOR0] = "color0", [BUFFER_COLOR1] = "color1", [BUFFER_COLOR2] = "color2", @@ -313,8 +324,11 @@ intelClear(GLcontext *ctx, GLbitfield mask) * buffer with it. */ if (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) { - tri_mask |= blit_mask & BUFFER_BIT_BACK_LEFT; - blit_mask &= ~BUFFER_BIT_BACK_LEFT; + int color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS); + if (color_bit != 0) { + tri_mask |= blit_mask & (1 << (color_bit - 1)); + blit_mask &= ~(1 << (color_bit - 1)); + } } /* SW fallback clearing */ diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index d7ccfa06058..888bb3f18f6 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -123,6 +123,10 @@ intelGetString(GLcontext * ctx, GLenum name) case PCI_CHIP_Q33_G: chipset = "Intel(R) Q33"; break; + case PCI_CHIP_IGD_GM: + case PCI_CHIP_IGD_G: + chipset = "Intel(R) IGD"; + break; case PCI_CHIP_I965_Q: chipset = "Intel(R) 965Q"; break; @@ -502,10 +506,16 @@ intelInitContext(struct intel_context *intel, * start. */ if (getenv("INTEL_STRICT_CONFORMANCE")) { - intel->strict_conformance = 1; + unsigned int value = atoi(getenv("INTEL_STRICT_CONFORMANCE")); + if (value > 0) { + intel->conformance_mode = value; + } + else { + intel->conformance_mode = 1; + } } - if (intel->strict_conformance) { + if (intel->conformance_mode > 0) { ctx->Const.MinLineWidth = 1.0; ctx->Const.MinLineWidthAA = 1.0; ctx->Const.MaxLineWidth = 1.0; @@ -573,8 +583,6 @@ intelInitContext(struct intel_context *intel, intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); - _math_matrix_ctr(&intel->ViewportMatrix); - if (IS_965(intelScreen->deviceID) && !intel->intelScreen->irq_active) { _mesa_printf("IRQs not active. Exiting\n"); exit(1); @@ -610,6 +618,16 @@ intelInitContext(struct intel_context *intel, intel->no_rast = 1; } + if (driQueryOptionb(&intel->optionCache, "always_flush_batch")) { + fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); + intel->always_flush_batch = 1; + } + + if (driQueryOptionb(&intel->optionCache, "always_flush_cache")) { + fprintf(stderr, "flushing GPU caches before/after each draw call\n"); + intel->always_flush_cache = 1; + } + /* Disable all hardware rendering (skip emitting batches and fences/waits * to the kernel) */ diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 18dc43c4a42..e520ecf220d 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -100,7 +100,6 @@ struct intel_context GLuint num_regions); GLuint (*flush_cmd) (void); - void (*emit_flush) (struct intel_context *intel, GLuint unused); void (*reduced_primitive_state) (struct intel_context * intel, GLenum rprim); @@ -229,7 +228,14 @@ struct intel_context GLboolean hw_stipple; GLboolean depth_buffer_is_float; GLboolean no_rast; - GLboolean strict_conformance; + GLboolean always_flush_batch; + GLboolean always_flush_cache; + + /* 0 - nonconformant, best performance; + * 1 - fallback to sw for known conformance bugs + * 2 - always fallback to sw + */ + GLuint conformance_mode; /* State for intelvb.c and inteltris.c. */ diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c index 136221c37fc..f2e2e619358 100644 --- a/src/mesa/drivers/dri/intel/intel_decode.c +++ b/src/mesa/drivers/dri/intel/intel_decode.c @@ -1595,7 +1595,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) "3DPRIMITIVE: %s %s\n", get_965_prim_type(data[0]), (data[0] & (1 << 15)) ? "random" : "sequential"); - instr_out(data, hw_offset, 1, "primitive count\n"); + instr_out(data, hw_offset, 1, "vertex count\n"); instr_out(data, hw_offset, 2, "start vertex\n"); instr_out(data, hw_offset, 3, "instance count\n"); instr_out(data, hw_offset, 4, "start instance\n"); diff --git a/src/mesa/drivers/dri/intel/intel_depthtmp.h b/src/mesa/drivers/dri/intel/intel_depthtmp.h new file mode 100644 index 00000000000..16d77084530 --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_depthtmp.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +/** + * Wrapper around the depthtmp.h macrofest to generate spans code for + * all the tiling styles. + */ + +#define VALUE_TYPE INTEL_VALUE_TYPE +#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(NO_TILE(_x, _y), d) +#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(NO_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel##x) +#include "depthtmp.h" + +#define VALUE_TYPE INTEL_VALUE_TYPE +#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(X_TILE(_x, _y), d) +#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(X_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel_XTile_##x) +#include "depthtmp.h" + +#define VALUE_TYPE INTEL_VALUE_TYPE +#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(Y_TILE(_x, _y), d) +#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(Y_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel_YTile_##x) +#include "depthtmp.h" + +#undef INTEL_VALUE_TYPE +#undef INTEL_WRITE_DEPTH +#undef INTEL_READ_DEPTH +#undef INTEL_TAG diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 28223ca1415..8dd0b2461bd 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -49,6 +49,7 @@ #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side #define need_GL_ATI_separate_stencil +#define need_GL_ATI_envmap_bumpmap #define need_GL_NV_point_sprite #define need_GL_NV_vertex_program #define need_GL_VERSION_2_0 @@ -129,11 +130,7 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_point_sprite", NULL }, { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, -#if 0 - /* Support for GLSL 1.20 is currently broken in core Mesa. - */ { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, -#endif { "GL_ARB_shadow", NULL }, { "GL_ARB_texture_non_power_of_two", NULL }, { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, @@ -142,6 +139,7 @@ static const struct dri_extension brw_extensions[] = { { "GL_EXT_texture_sRGB", NULL }, { "GL_EXT_texture_swizzle", NULL }, { "GL_EXT_vertex_array_bgra", NULL }, + { "GL_ATI_envmap_bumpmap", GL_ATI_envmap_bumpmap_functions }, { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions }, { "GL_ATI_texture_env_combine3", NULL }, { "GL_NV_texture_env_combine4", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 54d6044ad3b..a401f730ba2 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -210,7 +210,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, height, pitch); - irb->region = intel_region_alloc(intel, cpp, width, height, pitch); + irb->region = intel_region_alloc(intel, cpp, width, height, pitch, + GL_TRUE); if (!irb->region) return GL_FALSE; /* out of memory? */ @@ -322,6 +323,15 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.BlueBits = 5; irb->Base.DataType = GL_UNSIGNED_BYTE; break; + case GL_RGB8: + irb->Base._ActualFormat = GL_RGB8; + irb->Base._BaseFormat = GL_RGB; + irb->Base.RedBits = 8; + irb->Base.GreenBits = 8; + irb->Base.BlueBits = 8; + irb->Base.AlphaBits = 0; + irb->Base.DataType = GL_UNSIGNED_BYTE; + break; case GL_RGBA8: irb->Base._ActualFormat = GL_RGBA8; irb->Base._BaseFormat = GL_RGBA; @@ -602,19 +612,16 @@ static void intel_finish_render_texture(GLcontext * ctx, struct gl_renderbuffer_attachment *att) { - struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); - - DBG("End render texture (tid %x) tex %u\n", _glthread_GetID(), att->Texture->Name); - - if (irb) { - /* just release the region */ - intel_region_release(&irb->region); - } - else if (att->Renderbuffer) { - /* software fallback */ - _mesa_finish_render_texture(ctx, att); - /* XXX FBO: Need to unmap the buffer (or in intelSpanRenderStart???) */ - } + /* no-op + * Previously we released the renderbuffer's intel_region but + * that's not necessary and actually caused problems when trying + * to do a glRead/CopyPixels from the renderbuffer later. + * The region will be released later if the texture is replaced + * or the renderbuffer deleted. + * + * The intention of this driver hook is more of a "done rendering + * to texture, please re-twiddle/etc if necessary". + */ } @@ -624,6 +631,7 @@ intel_finish_render_texture(GLcontext * ctx, static void intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) { + struct intel_context *intel = intel_context(ctx); const struct intel_renderbuffer *depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH); const struct intel_renderbuffer *stencilRb = @@ -635,6 +643,35 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) */ fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; } + + /* check that texture color buffers are a format we can render into */ + { + const struct gl_texture_format *supportedFormat; + GLuint i; + + /* The texture format we can render into seems to depend on the + * screen depth. There currently seems to be a problem when + * rendering into a rgb565 texture when the screen is abgr8888. + */ + + if (intel->ctx.Visual.rgbBits >= 24) + supportedFormat = &_mesa_texformat_argb8888; + else + supportedFormat = &_mesa_texformat_rgb565; + + for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { + const struct gl_texture_object *texObj = + fb->Attachment[BUFFER_COLOR0 + i].Texture; + if (texObj) { + const struct gl_texture_image *texImg = + texObj->Image[0][texObj->BaseLevel]; + if (texImg && texImg->TexFormat != supportedFormat) { + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + break; + } + } + } + } } diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index bf1c3f03f0e..6e1e034e53d 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -103,7 +103,8 @@ intel_miptree_create(struct intel_context *intel, GLuint last_level, GLuint width0, GLuint height0, - GLuint depth0, GLuint cpp, GLuint compress_byte) + GLuint depth0, GLuint cpp, GLuint compress_byte, + GLboolean expect_accelerated_upload) { struct intel_mipmap_tree *mt; @@ -120,7 +121,8 @@ intel_miptree_create(struct intel_context *intel, mt->cpp, mt->pitch, mt->total_height, - mt->pitch); + mt->pitch, + expect_accelerated_upload); if (!mt->region) { free(mt); diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index c9537dbb9a4..4060b9df78f 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -133,7 +133,8 @@ struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel, GLuint height0, GLuint depth0, GLuint cpp, - GLuint compress_byte); + GLuint compress_byte, + GLboolean expect_accelerated_upload); struct intel_mipmap_tree * intel_miptree_create_for_region(struct intel_context *intel, diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c index 5e32288844c..7041ff389ac 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.c +++ b/src/mesa/drivers/dri/intel/intel_pixel.c @@ -30,6 +30,7 @@ #include "main/context.h" #include "main/enable.h" #include "main/matrix.h" +#include "main/viewport.h" #include "swrast/swrast.h" #include "shader/arbprogram.h" #include "shader/program.h" @@ -112,7 +113,7 @@ intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one) return GL_FALSE; } - if (ctx->Stencil.Enabled) { + if (ctx->Stencil._Enabled) { DBG("fallback due to image stencil\n"); return GL_FALSE; } diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index 7c7aa6097c8..d50dd68092d 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -87,7 +87,7 @@ intel_check_copypixel_blit_fragment_ops(GLcontext * ctx) ctx->Color.AlphaEnabled || ctx->Depth.Test || ctx->Fog.Enabled || - ctx->Stencil.Enabled || + ctx->Stencil._Enabled || !ctx->Color.ColorMask[0] || !ctx->Color.ColorMask[1] || !ctx->Color.ColorMask[2] || diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index 7be7ea82b35..e8d5ac8569d 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -233,7 +233,7 @@ intel_stencil_drawpixels(GLcontext * ctx, } /* We don't support stencil testing/ops here */ - if (ctx->Stencil.Enabled) + if (ctx->Stencil._Enabled) return GL_FALSE; /* We use FBOs for our wrapping of the depthbuffer into a color diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index ec85c4131a1..0aa5b8c02c9 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -109,12 +109,18 @@ intel_region_alloc_internal(struct intel_context *intel, struct intel_region * intel_region_alloc(struct intel_context *intel, - GLuint cpp, GLuint width, GLuint height, GLuint pitch) + GLuint cpp, GLuint width, GLuint height, GLuint pitch, + GLboolean expect_accelerated_upload) { dri_bo *buffer; - buffer = dri_bo_alloc(intel->bufmgr, "region", - pitch * cpp * height, 64); + if (expect_accelerated_upload) { + buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region", + pitch * cpp * height, 64); + } else { + buffer = drm_intel_bo_alloc(intel->bufmgr, "region", + pitch * cpp * height, 64); + } return intel_region_alloc_internal(intel, cpp, width, height, pitch, buffer); } @@ -460,7 +466,8 @@ intel_recreate_static(struct intel_context *intel, else region->cpp = intel->ctx.Visual.rgbBits / 8; region->pitch = intelScreen->pitch; - region->height = intelScreen->height; /* needed? */ + region->width = intelScreen->width; + region->height = intelScreen->height; if (region->buffer != NULL) { dri_bo_unreference(region->buffer); diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h index 4b120ba4cee..45e2bf4e77a 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.h +++ b/src/mesa/drivers/dri/intel/intel_regions.h @@ -74,7 +74,8 @@ struct intel_region */ struct intel_region *intel_region_alloc(struct intel_context *intel, GLuint cpp, GLuint width, - GLuint height, GLuint pitch); + GLuint height, GLuint pitch, + GLboolean expect_accelerated_upload); struct intel_region * intel_region_alloc_for_handle(struct intel_context *intel, diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index a52271158c6..e8c074712cd 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -71,10 +71,12 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG DRI_CONF_NO_RAST(false) + DRI_CONF_ALWAYS_FLUSH_BATCH(false) + DRI_CONF_ALWAYS_FLUSH_CACHE(false) DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 6; +const GLuint __driNConfigOptions = 8; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; @@ -302,6 +304,7 @@ intelDestroyScreen(__DRIscreenPrivate * sPriv) dri_bufmgr_destroy(intelScreen->bufmgr); intelUnmapScreenRegions(intelScreen); + driDestroyOptionCache(&intelScreen->optionCache); FREE(intelScreen); sPriv->private = NULL; @@ -322,7 +325,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, else { GLboolean swStencil = (mesaVis->stencilBits > 0 && mesaVis->depthBits != 24); - GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8); + GLenum rgbFormat; struct intel_framebuffer *intel_fb = CALLOC_STRUCT(intel_framebuffer); @@ -331,6 +334,13 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis); + if (mesaVis->redBits == 5) + rgbFormat = GL_RGB5; + else if (mesaVis->alphaBits == 0) + rgbFormat = GL_RGB8; + else + rgbFormat = GL_RGBA8; + /* setup the hardware-based renderbuffers */ intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, @@ -384,7 +394,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, static void intelDestroyBuffer(__DRIdrawablePrivate * driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } @@ -757,13 +767,34 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) fb_format[2] = GL_BGRA; fb_type[2] = GL_UNSIGNED_INT_8_8_8_8_REV; + depth_bits[0] = 0; + stencil_bits[0] = 0; + for (color = 0; color < ARRAY_SIZE(fb_format); color++) { __DRIconfig **new_configs; + int depth_factor; + /* With DRI2 right now, GetBuffers always returns a depth/stencil buffer + * with the same cpp as the drawable. So we can't support depth cpp != + * color cpp currently. + */ + if (fb_type[color] == GL_UNSIGNED_SHORT_5_6_5) { + depth_bits[1] = 16; + stencil_bits[1] = 0; + + depth_factor = 2; + } else { + depth_bits[1] = 24; + stencil_bits[1] = 0; + depth_bits[2] = 24; + stencil_bits[2] = 8; + + depth_factor = 3; + } new_configs = driCreateConfigs(fb_format[color], fb_type[color], depth_bits, stencil_bits, - ARRAY_SIZE(depth_bits), + depth_factor, back_buffer_modes, ARRAY_SIZE(back_buffer_modes), msaa_samples_array, diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index bdd2fd9e857..c3a873f1abd 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -131,6 +131,18 @@ pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val) dri_bo_subdata(irb->region->buffer, offset, 1, &val); } +static uint32_t +z24s8_to_s8z24(uint32_t val) +{ + return (val << 24) | (val >> 8); +} + +static uint32_t +s8z24_to_z24s8(uint32_t val) +{ + return (val >> 24) | (val << 8); +} + static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb, int x, int y) { @@ -293,101 +305,29 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define X_TILE(_X, _Y) x_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off) #define Y_TILE(_X, _Y) y_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off) -/* 16 bit, RGB565 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) intel##x##_RGB565 -#define TAG2(x,y) intel##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, NO_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, NO_TILE(X, Y), V) -#include "spantmp2.h" - -/* 32 bit, ARGB8888 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel##x##_ARGB8888 -#define TAG2(x,y) intel##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, NO_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, NO_TILE(X, Y), V) -#include "spantmp2.h" - -/* 32 bit, xRGB8888 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel##x##_xRGB8888 -#define TAG2(x,y) intel##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, NO_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, NO_TILE(X, Y), V) -#include "spantmp2.h" - -/* 16 bit RGB565 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) intel_XTile_##x##_RGB565 -#define TAG2(x,y) intel_XTile_##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, X_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, X_TILE(X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) intel_YTile_##x##_RGB565 -#define TAG2(x,y) intel_YTile_##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, Y_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, Y_TILE(X, Y), V) -#include "spantmp2.h" - -/* 32 bit ARGB888 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_XTile_##x##_ARGB8888 -#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, X_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, X_TILE(X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_YTile_##x##_ARGB8888 -#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, Y_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, Y_TILE(X, Y), V) -#include "spantmp2.h" - -/* 32 bit xRGB888 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_XTile_##x##_xRGB8888 -#define TAG2(x,y) intel_XTile_##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, X_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, X_TILE(X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_YTile_##x##_xRGB8888 -#define TAG2(x,y) intel_YTile_##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, Y_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, Y_TILE(X, Y), V) -#include "spantmp2.h" +/* r5g6b5 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_RGB +#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 +#define INTEL_READ_VALUE(offset) pread_16(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v) +#define INTEL_TAG(x) x##_RGB565 +#include "intel_spantmp.h" + +/* a8r8g8b8 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV +#define INTEL_READ_VALUE(offset) pread_32(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_32(irb, offset, v) +#define INTEL_TAG(x) x##_ARGB8888 +#include "intel_spantmp.h" + +/* x8r8g8b8 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV +#define INTEL_READ_VALUE(offset) pread_xrgb8888(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_xrgb8888(irb, offset, v) +#define INTEL_TAG(x) x##_xRGB8888 +#include "intel_spantmp.h" #define LOCAL_DEPTH_VARS \ struct intel_context *intel = intel_context(ctx); \ @@ -402,98 +342,26 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS -/** - ** 16-bit depthbuffer functions. - **/ -#define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, NO_TILE(_x, _y), d) -#define READ_DEPTH(d, _x, _y) d = pread_16(irb, NO_TILE(_x, _y)) -#define TAG(x) intel##x##_z16 -#include "depthtmp.h" +/* z16 depthbuffer functions. */ +#define INTEL_VALUE_TYPE GLushort +#define INTEL_WRITE_DEPTH(offset, d) pwrite_16(irb, offset, d) +#define INTEL_READ_DEPTH(offset) pread_16(irb, offset) +#define INTEL_TAG(name) name##_z16 +#include "intel_depthtmp.h" +/* z24 depthbuffer functions. */ +#define INTEL_VALUE_TYPE GLuint +#define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, d) +#define INTEL_READ_DEPTH(offset) pread_32(irb, offset) +#define INTEL_TAG(name) name##_z24 +#include "intel_depthtmp.h" -/** - ** 16-bit x tile depthbuffer functions. - **/ -#define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, X_TILE(_x, _y), d) -#define READ_DEPTH(d, _x, _y) d = pread_16(irb, X_TILE(_x, _y)) -#define TAG(x) intel_XTile_##x##_z16 -#include "depthtmp.h" - -/** - ** 16-bit y tile depthbuffer functions. - **/ -#define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, Y_TILE(_x, _y), d) -#define READ_DEPTH(d, _x, _y) d = pread_16(irb, Y_TILE(_x, _y)) -#define TAG(x) intel_YTile_##x##_z16 -#include "depthtmp.h" - - -/** - ** 24/8-bit interleaved depth/stencil functions - ** Note: we're actually reading back combined depth+stencil values. - ** The wrappers in main/depthstencil.c are used to extract the depth - ** and stencil values. - **/ -#define VALUE_TYPE GLuint - -/* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, NO_TILE(_x, _y), ((d) >> 8) | ((d) << 24)) - -/* Change SZZZ -> ZZZS */ -#define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, NO_TILE(_x, _y)); \ - d = (tmp << 8) | (tmp >> 24); \ -} - -#define TAG(x) intel##x##_z24_s8 -#include "depthtmp.h" - - -/** - ** 24/8-bit x-tile interleaved depth/stencil functions - ** Note: we're actually reading back combined depth+stencil values. - ** The wrappers in main/depthstencil.c are used to extract the depth - ** and stencil values. - **/ -#define VALUE_TYPE GLuint - -/* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, X_TILE(_x, _y), ((d) >> 8) | ((d) << 24)) - -/* Change SZZZ -> ZZZS */ -#define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, X_TILE(_x, _y)); \ - d = (tmp << 8) | (tmp >> 24); \ -} - -#define TAG(x) intel_XTile_##x##_z24_s8 -#include "depthtmp.h" - -/** - ** 24/8-bit y-tile interleaved depth/stencil functions - ** Note: we're actually reading back combined depth+stencil values. - ** The wrappers in main/depthstencil.c are used to extract the depth - ** and stencil values. - **/ -#define VALUE_TYPE GLuint - -/* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, Y_TILE(_x, _y), ((d) >> 8) | ((d) << 24)) - -/* Change SZZZ -> ZZZS */ -#define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, Y_TILE(_x, _y)); \ - d = (tmp << 8) | (tmp >> 24); \ -} - -#define TAG(x) intel_YTile_##x##_z24_s8 -#include "depthtmp.h" +/* z24s8 depthbuffer functions. */ +#define INTEL_VALUE_TYPE GLuint +#define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, z24s8_to_s8z24(d)) +#define INTEL_READ_DEPTH(offset) s8z24_to_z24s8(pread_32(irb, offset)) +#define INTEL_TAG(name) name##_z24_s8 +#include "intel_depthtmp.h" /** @@ -752,8 +620,21 @@ intel_set_span_functions(struct intel_context *intel, break; } } - else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 || /* XXX FBO remove */ - rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { + else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24) { + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitDepthPointers_z24(rb); + break; + case I915_TILING_X: + intel_XTile_InitDepthPointers_z24(rb); + break; + case I915_TILING_Y: + intel_YTile_InitDepthPointers_z24(rb); + break; + } + } + else if (rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { switch (tiling) { case I915_TILING_NONE: default: diff --git a/src/mesa/drivers/dri/intel/intel_spantmp.h b/src/mesa/drivers/dri/intel/intel_spantmp.h new file mode 100644 index 00000000000..ead0b1c1683 --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_spantmp.h @@ -0,0 +1,61 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +/** + * Wrapper around the spantmp.h macrofest to generate spans code for + * all the tiling styles. + */ + +#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT +#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE +#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(NO_TILE(_x, _y), v) +#define GET_VALUE(_x, _y) INTEL_READ_VALUE(NO_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel##x) +#define TAG2(x, y) INTEL_TAG(intel##x)##y +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT +#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE +#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(X_TILE(_x, _y), v) +#define GET_VALUE(_x, _y) INTEL_READ_VALUE(X_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel_XTile_##x) +#define TAG2(x, y) INTEL_TAG(intel_XTile_##x)##y +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT +#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE +#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(X_TILE(_x, _y), v) +#define GET_VALUE(_x, _y) INTEL_READ_VALUE(X_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel_YTile_##x) +#define TAG2(x, y) INTEL_TAG(intel_YTile_##x)##y +#include "spantmp2.h" + +#undef INTEL_PIXEL_FMT +#undef INTEL_PIXEL_TYPE +#undef INTEL_WRITE_VALUE +#undef INTEL_READ_VALUE +#undef INTEL_TAG diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index 2715a540d02..8732354e7a5 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -3,11 +3,15 @@ #include "main/texformat.h" #include "main/enums.h" -/* It works out that this function is fine for all the supported + +/** + * Choose hardware texture format given the user's glTexImage parameters. + * + * It works out that this function is fine for all the supported * hardware. However, there is still a need to map the formats onto * hardware descriptors. - */ -/* Note that the i915 can actually support many more formats than + * + * Note that the i915 can actually support many more formats than * these if we take the step of simply swizzling the colors * immediately after sampling... */ @@ -18,6 +22,11 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, struct intel_context *intel = intel_context(ctx); const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24); +#if 0 + printf("%s intFmt=0x%x format=0x%x type=0x%x\n", + __FUNCTION__, internalFormat, format, type); +#endif + switch (internalFormat) { case 4: case GL_RGBA: @@ -165,6 +174,10 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: return &_mesa_texformat_srgb_dxt1; + + case GL_DUDV_ATI: + case GL_DU8DV8_ATI: + return &_mesa_texformat_dudv8; #endif default: diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 866022d0ce2..943636c37b2 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -62,7 +62,8 @@ logbase2(int n) static void guess_and_alloc_mipmap_tree(struct intel_context *intel, struct intel_texture_object *intelObj, - struct intel_texture_image *intelImage) + struct intel_texture_image *intelImage, + GLboolean expect_accelerated_upload) { GLuint firstLevel; GLuint lastLevel; @@ -136,7 +137,8 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel, height, depth, intelImage->base.TexFormat->TexelBytes, - comp_byte); + comp_byte, + expect_accelerated_upload); DBG("%s - success\n", __FUNCTION__); } @@ -385,7 +387,7 @@ intelTexImage(GLcontext * ctx, } if (!intelObj->mt) { - guess_and_alloc_mipmap_tree(intel, intelObj, intelImage); + guess_and_alloc_mipmap_tree(intel, intelObj, intelImage, pixels == NULL); if (!intelObj->mt) { DBG("guess_and_alloc_mipmap_tree: failed\n"); } @@ -415,7 +417,7 @@ intelTexImage(GLcontext * ctx, level, level, width, height, depth, intelImage->base.TexFormat->TexelBytes, - comp_byte); + comp_byte, pixels == NULL); } @@ -753,16 +755,21 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) _mesa_lock_texture(&intel->ctx, texObj); + texImage = _mesa_get_tex_image(&intel->ctx, texObj, target, level); + intelImage = intel_texture_image(texImage); + + if (intelImage->mt) { + intel_miptree_release(intel, &intelImage->mt); + assert(!texImage->Data); + } if (intelObj->mt) intel_miptree_release(intel, &intelObj->mt); intelObj->mt = mt; - texImage = _mesa_get_tex_image(&intel->ctx, texObj, target, level); _mesa_init_teximage_fields(&intel->ctx, target, texImage, rb->region->width, rb->region->height, 1, 0, internalFormat); - intelImage = intel_texture_image(texImage); intelImage->face = target_to_face(target); intelImage->level = level; texImage->TexFormat = intelChooseTextureFormat(&intel->ctx, internalFormat, diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 820683d42eb..05a375e1f3b 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -206,7 +206,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) firstImage->base.Height, firstImage->base.Depth, cpp, - comp_byte); + comp_byte, + GL_TRUE); } /* Pull in any images not in the object's tree: diff --git a/src/mesa/drivers/dri/mach64/mach64_screen.c b/src/mesa/drivers/dri/mach64/mach64_screen.c index 43e59594dd5..6440027ca4a 100644 --- a/src/mesa/drivers/dri/mach64/mach64_screen.c +++ b/src/mesa/drivers/dri/mach64/mach64_screen.c @@ -374,7 +374,7 @@ mach64CreateBuffer( __DRIscreenPrivate *driScrnPriv, static void mach64DestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } diff --git a/src/mesa/drivers/dri/mga/mga_texstate.c b/src/mesa/drivers/dri/mga/mga_texstate.c index d4c5b6fd97b..ad765d1dd76 100644 --- a/src/mesa/drivers/dri/mga/mga_texstate.c +++ b/src/mesa/drivers/dri/mga/mga_texstate.c @@ -206,8 +206,8 @@ static void mgaUpdateTextureEnvG200( GLcontext *ctx, GLuint unit ) mgaTextureObjectPtr t = (mgaTextureObjectPtr) tObj->DriverData; GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat; - if (tObj != ctx->Texture.Unit[0].Current2D && - tObj != ctx->Texture.Unit[0].CurrentRect) + if (tObj != ctx->Texture.Unit[0].CurrentTex[TEXTURE_2D_INDEX] && + tObj != ctx->Texture.Unit[0].CurrentTex[TEXTURE_RECT_INDEX]) return; @@ -635,8 +635,8 @@ static void mgaUpdateTextureEnvG400( GLcontext *ctx, GLuint unit ) mgaTextureObjectPtr t = (mgaTextureObjectPtr) tObj->DriverData; GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat; - if (tObj != ctx->Texture.Unit[source].Current2D && - tObj != ctx->Texture.Unit[source].CurrentRect) + if (tObj != ctx->Texture.Unit[source].CurrentTex[TEXTURE_2D_INDEX] && + tObj != ctx->Texture.Unit[source].CurrentTex[TEXTURE_RECT_INDEX]) return; switch (ctx->Texture.Unit[source].EnvMode) { diff --git a/src/mesa/drivers/dri/mga/mga_xmesa.c b/src/mesa/drivers/dri/mga/mga_xmesa.c index 926534d6dff..0dc76fea50a 100644 --- a/src/mesa/drivers/dri/mga/mga_xmesa.c +++ b/src/mesa/drivers/dri/mga/mga_xmesa.c @@ -816,7 +816,7 @@ mgaCreateBuffer( __DRIscreenPrivate *driScrnPriv, static void mgaDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } static void diff --git a/src/mesa/drivers/dri/mga/mgapixel.c b/src/mesa/drivers/dri/mga/mgapixel.c index 9f90047ba59..977dfa0b760 100644 --- a/src/mesa/drivers/dri/mga/mgapixel.c +++ b/src/mesa/drivers/dri/mga/mgapixel.c @@ -133,7 +133,7 @@ check_color_per_fragment_ops( const GLcontext *ctx ) ctx->Depth.Test || ctx->Fog.Enabled || ctx->Scissor.Enabled || - ctx->Stencil.Enabled || + ctx->Stencil._Enabled || !ctx->Color.ColorMask[0] || !ctx->Color.ColorMask[1] || !ctx->Color.ColorMask[2] || diff --git a/src/mesa/drivers/dri/r128/r128_screen.c b/src/mesa/drivers/dri/r128/r128_screen.c index 7cda4ca5d38..f5bcc2f2906 100644 --- a/src/mesa/drivers/dri/r128/r128_screen.c +++ b/src/mesa/drivers/dri/r128/r128_screen.c @@ -353,7 +353,7 @@ r128CreateBuffer( __DRIscreenPrivate *driScrnPriv, static void r128DestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c index a6c655866ac..354daef07f6 100644 --- a/src/mesa/drivers/dri/r200/r200_pixel.c +++ b/src/mesa/drivers/dri/r200/r200_pixel.c @@ -87,7 +87,7 @@ check_color_per_fragment_ops( const GLcontext *ctx ) ctx->Depth.Test || ctx->Fog.Enabled || ctx->Scissor.Enabled || - ctx->Stencil.Enabled || + ctx->Stencil._Enabled || !ctx->Color.ColorMask[0] || !ctx->Color.ColorMask[1] || !ctx->Color.ColorMask[2] || diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 058e0ee5bec..e9cee1a637d 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -1234,26 +1234,26 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit); if (texUnit->TexGenEnabled & S_BIT) { - mode = texUnit->GenModeS; + mode = texUnit->GenS.Mode; } else { tgcm |= R200_TEXGEN_COMP_S << (unit * 4); } if (texUnit->TexGenEnabled & T_BIT) { - if (texUnit->GenModeT != mode) + if (texUnit->GenT.Mode != mode) mixed_fallback = GL_TRUE; } else { tgcm |= R200_TEXGEN_COMP_T << (unit * 4); } if (texUnit->TexGenEnabled & R_BIT) { - if (texUnit->GenModeR != mode) + if (texUnit->GenR.Mode != mode) mixed_fallback = GL_TRUE; } else { tgcm |= R200_TEXGEN_COMP_R << (unit * 4); } if (texUnit->TexGenEnabled & Q_BIT) { - if (texUnit->GenModeQ != mode) + if (texUnit->GenQ.Mode != mode) mixed_fallback = GL_TRUE; } else { tgcm |= R200_TEXGEN_COMP_Q << (unit * 4); @@ -1262,8 +1262,8 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) if (mixed_fallback) { if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "fallback mixed texgen, 0x%x (0x%x 0x%x 0x%x 0x%x)\n", - texUnit->TexGenEnabled, texUnit->GenModeS, texUnit->GenModeT, - texUnit->GenModeR, texUnit->GenModeQ); + texUnit->TexGenEnabled, texUnit->GenS.Mode, texUnit->GenT.Mode, + texUnit->GenR.Mode, texUnit->GenQ.Mode); return GL_FALSE; } @@ -1281,8 +1281,10 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) switch (mode) { case GL_OBJECT_LINEAR: { GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled, - texUnit->ObjectPlaneS, texUnit->ObjectPlaneT, - texUnit->ObjectPlaneR, texUnit->ObjectPlaneQ ); + texUnit->GenS.ObjectPlane, + texUnit->GenT.ObjectPlane, + texUnit->GenR.ObjectPlane, + texUnit->GenQ.ObjectPlane ); if (needtgenable & (S_BIT | T_BIT)) { if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "fallback mixed texgen / obj plane, 0x%x\n", @@ -1298,17 +1300,19 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) tgi |= R200_TEXGEN_INPUT_OBJ << inputshift; set_texgen_matrix( rmesa, unit, - (texUnit->TexGenEnabled & S_BIT) ? texUnit->ObjectPlaneS : I, - (texUnit->TexGenEnabled & T_BIT) ? texUnit->ObjectPlaneT : I + 4, - (texUnit->TexGenEnabled & R_BIT) ? texUnit->ObjectPlaneR : I + 8, - (texUnit->TexGenEnabled & Q_BIT) ? texUnit->ObjectPlaneQ : I + 12); + (texUnit->TexGenEnabled & S_BIT) ? texUnit->GenS.ObjectPlane : I, + (texUnit->TexGenEnabled & T_BIT) ? texUnit->GenT.ObjectPlane : I + 4, + (texUnit->TexGenEnabled & R_BIT) ? texUnit->GenR.ObjectPlane : I + 8, + (texUnit->TexGenEnabled & Q_BIT) ? texUnit->GenQ.ObjectPlane : I + 12); } break; case GL_EYE_LINEAR: { GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled, - texUnit->EyePlaneS, texUnit->EyePlaneT, - texUnit->EyePlaneR, texUnit->EyePlaneQ ); + texUnit->GenS.EyePlane, + texUnit->GenT.EyePlane, + texUnit->GenR.EyePlane, + texUnit->GenQ.EyePlane ); if (needtgenable & (S_BIT | T_BIT)) { if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "fallback mixed texgen / eye plane, 0x%x\n", @@ -1323,10 +1327,10 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) } tgi |= R200_TEXGEN_INPUT_EYE << inputshift; set_texgen_matrix( rmesa, unit, - (texUnit->TexGenEnabled & S_BIT) ? texUnit->EyePlaneS : I, - (texUnit->TexGenEnabled & T_BIT) ? texUnit->EyePlaneT : I + 4, - (texUnit->TexGenEnabled & R_BIT) ? texUnit->EyePlaneR : I + 8, - (texUnit->TexGenEnabled & Q_BIT) ? texUnit->EyePlaneQ : I + 12); + (texUnit->TexGenEnabled & S_BIT) ? texUnit->GenS.EyePlane : I, + (texUnit->TexGenEnabled & T_BIT) ? texUnit->GenT.EyePlane : I + 4, + (texUnit->TexGenEnabled & R_BIT) ? texUnit->GenR.EyePlane : I + 8, + (texUnit->TexGenEnabled & Q_BIT) ? texUnit->GenQ.EyePlane : I + 12); } break; @@ -1362,7 +1366,7 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) */ if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "fallback unsupported texgen, %d\n", - texUnit->GenModeS); + texUnit->GenS.Mode); return GL_FALSE; } diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 888f91db73d..aadd1443ad9 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -202,7 +202,7 @@ static unsigned long t_dst(struct prog_dst_register *dst) } } -static unsigned long t_src_class(enum register_file file) +static unsigned long t_src_class(gl_register_file file) { switch(file){ diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 488fd44740c..5bae37e1b1e 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -82,7 +82,7 @@ int hw_tcl_on = 1; #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate #define need_GL_EXT_blend_minmax -//#define need_GL_EXT_fog_coord +#define need_GL_EXT_fog_coord #define need_GL_EXT_gpu_program_parameters #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side @@ -110,7 +110,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, {"GL_EXT_blend_subtract", NULL}, -// {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, {"GL_EXT_shadow_funcs", NULL}, diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index 1512e906e89..4bf0e7a1898 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -177,14 +177,23 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten) { - GLuint i, ret = 0; + GLuint i, ret = 0, first_free_texcoord = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) { ret |= (4 << (3 * i)); + ++first_free_texcoord; } } + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { + if (first_free_texcoord > 8) { + fprintf(stderr, "\tout of free texcoords to write fog coord\n"); + _mesa_exit(-1); + } + ret |= 4 << (3 * first_free_texcoord); + } + return ret; } diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 8d030c63fb3..32182bb6674 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -356,8 +356,8 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) static void nqssadce_init(struct nqssadce_state* s) { - s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W; + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; } diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 5f344be1163..ed552d09bbc 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1224,6 +1224,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_INST_COL_ADDR_SHIFT 18 #define R500_RS_INST_TEX_ADJ (1 << 25) #define R500_RS_INST_W_CN (1 << 26) +#define R500_RS_INST_TEX_ID(x) ((x) << R500_RS_INST_TEX_ID_SHIFT) +#define R500_RS_INST_TEX_ADDR(x) ((x) << R500_RS_INST_TEX_ADDR_SHIFT) +#define R500_RS_INST_COL_ID(x) ((x) << R500_RS_INST_COL_ID_SHIFT) +#define R500_RS_INST_COL_ADDR(x) ((x) << R500_RS_INST_COL_ADDR_SHIFT) /* These DWORDs control how vertex data is routed into fragment program * registers, after interpolators. @@ -1239,9 +1243,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_INST_TEX_ID(x) ((x) << 0) # define R300_RS_INST_TEX_CN_WRITE (1 << 3) # define R300_RS_INST_TEX_ADDR_SHIFT 6 +# define R300_RS_INST_TEX_ADDR(x) ((x) << R300_RS_INST_TEX_ADDR_SHIFT) # define R300_RS_INST_COL_ID(x) ((x) << 11) # define R300_RS_INST_COL_CN_WRITE (1 << 14) # define R300_RS_INST_COL_ADDR_SHIFT 17 +# define R300_RS_INST_COL_ADDR(x) ((x) << R300_RS_INST_COL_ADDR_SHIFT) # define R300_RS_INST_TEX_ADJ (1 << 22) # define R300_RS_COL_BIAS_UNUSED_SHIFT 23 diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 17e7b5227ab..f423029ee68 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -728,134 +728,6 @@ static void r300ColorMask(GLcontext * ctx, } /* ============================================================= - * Fog - */ -static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - union { - int i; - float f; - } fogScale, fogStart; - - (void)param; - - fogScale.i = r300->hw.fogp.cmd[R300_FOGP_SCALE]; - fogStart.i = r300->hw.fogp.cmd[R300_FOGP_START]; - - switch (pname) { - case GL_FOG_MODE: - switch (ctx->Fog.Mode) { - case GL_LINEAR: - R300_STATECHANGE(r300, fogs); - r300->hw.fogs.cmd[R300_FOGS_STATE] = - (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | - R300_FG_FOG_BLEND_FN_LINEAR; - - if (ctx->Fog.Start == ctx->Fog.End) { - fogScale.f = -1.0; - fogStart.f = 1.0; - } else { - fogScale.f = - 1.0 / (ctx->Fog.End - ctx->Fog.Start); - fogStart.f = - -ctx->Fog.Start / (ctx->Fog.End - - ctx->Fog.Start); - } - break; - case GL_EXP: - R300_STATECHANGE(r300, fogs); - r300->hw.fogs.cmd[R300_FOGS_STATE] = - (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | - R300_FG_FOG_BLEND_FN_EXP; - fogScale.f = 0.0933 * ctx->Fog.Density; - fogStart.f = 0.0; - break; - case GL_EXP2: - R300_STATECHANGE(r300, fogs); - r300->hw.fogs.cmd[R300_FOGS_STATE] = - (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | - R300_FG_FOG_BLEND_FN_EXP2; - fogScale.f = 0.3 * ctx->Fog.Density; - fogStart.f = 0.0; - break; - default: - return; - } - break; - case GL_FOG_DENSITY: - switch (ctx->Fog.Mode) { - case GL_EXP: - fogScale.f = 0.0933 * ctx->Fog.Density; - fogStart.f = 0.0; - break; - case GL_EXP2: - fogScale.f = 0.3 * ctx->Fog.Density; - fogStart.f = 0.0; - default: - break; - } - break; - case GL_FOG_START: - case GL_FOG_END: - if (ctx->Fog.Mode == GL_LINEAR) { - if (ctx->Fog.Start == ctx->Fog.End) { - fogScale.f = -1.0; - fogStart.f = 1.0; - } else { - fogScale.f = - 1.0 / (ctx->Fog.End - ctx->Fog.Start); - fogStart.f = - -ctx->Fog.Start / (ctx->Fog.End - - ctx->Fog.Start); - } - } - break; - case GL_FOG_COLOR: - R300_STATECHANGE(r300, fogc); - r300->hw.fogc.cmd[R300_FOGC_R] = - (GLuint) (ctx->Fog.Color[0] * 1023.0F) & 0x3FF; - r300->hw.fogc.cmd[R300_FOGC_G] = - (GLuint) (ctx->Fog.Color[1] * 1023.0F) & 0x3FF; - r300->hw.fogc.cmd[R300_FOGC_B] = - (GLuint) (ctx->Fog.Color[2] * 1023.0F) & 0x3FF; - break; - case GL_FOG_COORD_SRC: - break; - default: - return; - } - - if (fogScale.i != r300->hw.fogp.cmd[R300_FOGP_SCALE] || - fogStart.i != r300->hw.fogp.cmd[R300_FOGP_START]) { - R300_STATECHANGE(r300, fogp); - r300->hw.fogp.cmd[R300_FOGP_SCALE] = fogScale.i; - r300->hw.fogp.cmd[R300_FOGP_START] = fogStart.i; - } -} - -static void r300SetFogState(GLcontext * ctx, GLboolean state) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - - R300_STATECHANGE(r300, fogs); - if (state) { - r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FG_FOG_BLEND_ENABLE; - - r300Fogfv(ctx, GL_FOG_MODE, NULL); - r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); - r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); - r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); - r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); - } else { - r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FG_FOG_BLEND_ENABLE; - } -} - -/* ============================================================= * Point state */ static void r300PointSize(GLcontext * ctx, GLfloat size) @@ -1140,7 +1012,7 @@ r300FetchStateParameter(GLcontext * ctx, case STATE_R300_TEXRECT_FACTOR:{ struct gl_texture_object *t = - ctx->Texture.Unit[state[2]].CurrentRect; + ctx->Texture.Unit[state[2]].CurrentTex[TEXTURE_RECT_INDEX]; if (t && t->Image[0][t->BaseLevel]) { struct gl_texture_image *image = @@ -1531,18 +1403,14 @@ union r300_outputs_written { static void r300SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - /* I'm still unsure if these are needed */ - GLuint interp_col[8]; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; union r300_outputs_written OutputsWritten; GLuint InputsRead; int fp_reg, high_rr; - int col_interp_nr; - int rs_tex_count = 0, rs_col_count = 0; - int i, count; - - memset(interp_col, 0, sizeof(interp_col)); + int col_ip, tex_ip; + int rs_tex_count = 0; + int i, count, col_fmt; if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; @@ -1560,51 +1428,66 @@ static void r300SetupRSUnit(GLcontext * ctx) R300_STATECHANGE(r300, rc); R300_STATECHANGE(r300, rr); - fp_reg = col_interp_nr = high_rr = 0; + fp_reg = col_ip = tex_ip = col_fmt = 0; - r300->hw.rr.cmd[R300_RR_INST_1] = 0; - - if (InputsRead & FRAG_BIT_WPOS) { - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (!(InputsRead & (FRAG_BIT_TEX0 << i))) - break; + r300->hw.rc.cmd[1] = 0; + r300->hw.rc.cmd[2] = 0; + for (i=0; i<R300_RR_CMDSIZE-1; ++i) + r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0; - if (i == ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tno free texcoord found...\n"); - _mesa_exit(-1); - } + for (i=0; i<R300_RI_CMDSIZE-1; ++i) + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0; - InputsRead |= (FRAG_BIT_TEX0 << i); - InputsRead &= ~FRAG_BIT_WPOS; - } if (InputsRead & FRAG_BIT_COL0) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; - interp_col[0] |= R300_RS_COL_PTR(rs_col_count); - if (count == 3) - interp_col[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB1); - rs_col_count += count; + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL0; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } } - else - interp_col[0] = R300_RS_COL_FMT(R300_RS_COL_FMT_0001); if (InputsRead & FRAG_BIT_COL1) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; - if (count == 3) - interp_col[1] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB0); - interp_col[1] |= R300_RS_COL_PTR(1); - rs_col_count += count; - } - - if (InputsRead & FRAG_BIT_FOGC) { - /* XXX FIX THIS - * Just turn off the bit for now. - * Need to do something similar to the color/texcoord inputs. - */ - InputsRead &= ~FRAG_BIT_FOGC; + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL1; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } } for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) + continue; + + if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + continue; + } + int swiz; /* with TCL we always seem to route 4 components */ @@ -1613,7 +1496,6 @@ static void r300SetupRSUnit(GLcontext * ctx) else count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count; switch(count) { case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; @@ -1622,63 +1504,48 @@ static void r300SetupRSUnit(GLcontext * ctx) case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; }; - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] |= swiz; - - r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - - rs_tex_count += count; - - //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R300_RS_INST_TEX_CN_WRITE | i /* source INTERP */ - | (fp_reg << R300_RS_INST_TEX_ADDR_SHIFT); - high_rr = fp_reg; - - /* Passing invalid data here can lock the GPU. */ - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { - InputsRead &= ~(FRAG_BIT_TEX0 << i); - fp_reg++; - } else { - WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); - } - } + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~(FRAG_BIT_TEX0 << i); + rs_tex_count += count; + ++tex_ip; + ++fp_reg; } - if (InputsRead & FRAG_BIT_COL0) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT); - InputsRead &= ~FRAG_BIT_COL0; - col_interp_nr++; + if (InputsRead & FRAG_BIT_FOGC) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_FOGC; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; } else { - WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n"); } } - if (InputsRead & FRAG_BIT_COL1) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - r300->hw.rr.cmd[R300_RR_INST_1] |= R300_RS_INST_COL_ID(1) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT); - InputsRead &= ~FRAG_BIT_COL1; - if (high_rr < 1) - high_rr = 1; - col_interp_nr++; - } else { - WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); - } + if (InputsRead & FRAG_BIT_WPOS) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_WPOS; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; } + InputsRead &= ~FRAG_BIT_WPOS; - /* Need at least one. This might still lock as the values are undefined... */ - if (rs_tex_count == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT); - col_interp_nr++; + /* Setup default color if no color or tex was set */ + if (rs_tex_count == 0 && col_ip == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + ++col_ip; } - r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT) - | (col_interp_nr << R300_IC_COUNT_SHIFT) - | R300_HIRES_EN; + high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; + r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[2] |= high_rr - 1; - assert(high_rr >= 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr + 1); - r300->hw.rc.cmd[2] = high_rr; + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr); if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); @@ -1687,18 +1554,15 @@ static void r300SetupRSUnit(GLcontext * ctx) static void r500SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - /* I'm still unsure if these are needed */ - GLuint interp_col[8]; - union r300_outputs_written OutputsWritten; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; + union r300_outputs_written OutputsWritten; GLuint InputsRead; int fp_reg, high_rr; - int rs_col_count = 0; - int in_texcoords, col_interp_nr; - int i, count; + int col_ip, tex_ip; + int rs_tex_count = 0; + int i, count, col_fmt; - memset(interp_col, 0, sizeof(interp_col)); if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; else @@ -1715,130 +1579,151 @@ static void r500SetupRSUnit(GLcontext * ctx) R300_STATECHANGE(r300, rc); R300_STATECHANGE(r300, rr); - fp_reg = col_interp_nr = high_rr = in_texcoords = 0; - - r300->hw.rr.cmd[R300_RR_INST_1] = 0; + fp_reg = col_ip = tex_ip = col_fmt = 0; - if (InputsRead & FRAG_BIT_WPOS) { - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (!(InputsRead & (FRAG_BIT_TEX0 << i))) - break; + r300->hw.rc.cmd[1] = 0; + r300->hw.rc.cmd[2] = 0; + for (i=0; i<R300_RR_CMDSIZE-1; ++i) + r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0; - if (i == ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tno free texcoord found...\n"); - _mesa_exit(-1); - } + for (i=0; i<R500_RI_CMDSIZE-1; ++i) + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0; - InputsRead |= (FRAG_BIT_TEX0 << i); - InputsRead &= ~FRAG_BIT_WPOS; - } if (InputsRead & FRAG_BIT_COL0) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; - interp_col[0] |= R500_RS_COL_PTR(rs_col_count); - if (count == 3) - interp_col[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1); - rs_col_count += count; + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL0; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } } - else - interp_col[0] = R500_RS_COL_FMT(R300_RS_COL_FMT_0001); if (InputsRead & FRAG_BIT_COL1) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; - interp_col[1] |= R500_RS_COL_PTR(1); - if (count == 3) - interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB0); - rs_col_count += count; + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL1; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } } + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - GLuint swiz = 0; + if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) + continue; - /* with TCL we always seem to route 4 components */ - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - - if (hw_tcl_on) - count = 4; - else - count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; - - /* always have on texcoord */ - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT; - if (count >= 2) - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; - else - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; - - if (count >= 3) - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT; - else - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; - - if (count == 4) - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT; - else - swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - - } else - swiz = (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); - - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | swiz; - - r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ - | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT); - high_rr = fp_reg; - - /* Passing invalid data here can lock the GPU. */ - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { - InputsRead &= ~(FRAG_BIT_TEX0 << i); - fp_reg++; - } else { - WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); - } + if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + continue; } - } - if (InputsRead & FRAG_BIT_COL0) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); - InputsRead &= ~FRAG_BIT_COL0; - col_interp_nr++; + int swiz = 0; + + /* with TCL we always seem to route 4 components */ + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + if (count == 4) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= (rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT; + } else if (count == 3) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + } else if (count == 2) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + } else if (count == 1) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; } else { - WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; } + + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz; + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~(FRAG_BIT_TEX0 << i); + rs_tex_count += count; + ++tex_ip; + ++fp_reg; } - if (InputsRead & FRAG_BIT_COL1) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); - InputsRead &= ~FRAG_BIT_COL1; - if (high_rr < 1) - high_rr = 1; - col_interp_nr++; + if (InputsRead & FRAG_BIT_FOGC) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | + ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | + ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | + ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_FOGC; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; } else { - WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n"); } } - /* Need at least one. This might still lock as the values are undefined... */ - if (in_texcoords == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); - col_interp_nr++; + if (InputsRead & FRAG_BIT_WPOS) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | + ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | + ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | + ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_WPOS; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; } - r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_IT_COUNT_SHIFT) - | (col_interp_nr << R300_IC_COUNT_SHIFT) - | R300_HIRES_EN; + /* Setup default color if no color or tex was set */ + if (rs_tex_count == 0 && col_ip == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + ++col_ip; + } + + high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; + r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[2] |= 0xC0 | (high_rr - 1); - assert(high_rr >= 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr + 1); - r300->hw.rc.cmd[2] = 0xC0 | high_rr; + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr); if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); @@ -2078,7 +1963,7 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) /* empty */ break; case GL_FOG: - r300SetFogState(ctx, state); + /* empty */ break; case GL_ALPHA_TEST: r300SetAlphaState(ctx); @@ -2149,7 +2034,7 @@ static void r300ResetHwState(r300ContextPtr r300) r300DepthFunc(ctx, ctx->Depth.Func); /* stencil */ - r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled); + r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled); r300StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]); r300StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0], ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]); @@ -2236,11 +2121,9 @@ static void r300ResetHwState(r300ContextPtr r300) break; } - /* XXX: set to 0 when fog is disabled? */ - r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = R300_GB_FOG_SELECT_1_1_W; - /* XXX: Enable anti-aliasing? */ r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE; + r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = 0; r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0); r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0); @@ -2289,17 +2172,11 @@ static void r300ResetHwState(r300ContextPtr r300) R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED | R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; - r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W24; + r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W0 | R300_W_SRC_US; - r300Enable(ctx, GL_FOG, ctx->Fog.Enabled); - r300Fogfv(ctx, GL_FOG_MODE, NULL); - r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); - r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); - r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); - r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); - r300Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL); - - r300->hw.fg_depth_src.cmd[1] = 0; + /* disable fog unit */ + r300->hw.fogs.cmd[R300_FOGS_STATE] = 0; + r300->hw.fg_depth_src.cmd[1] = R300_FG_DEPTH_SRC_SCAN; r300->hw.rb3d_cctl.cmd[1] = 0; @@ -2524,16 +2401,6 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) } code = &fp->code; - if (fp->mesa_program.FogOption != GL_NONE) { - /* Enable HW fog. Try not to squish GL context. - * (Anybody sane remembered to set glFog() opts first!) */ - r300SetFogState(ctx, GL_TRUE); - ctx->Fog.Mode = fp->mesa_program.FogOption; - r300Fogfv(ctx, GL_FOG_MODE, NULL); - } else - /* Make sure HW is matching GL context. */ - r300SetFogState(ctx, ctx->Fog.Enabled); - r300SetupTextures(ctx); R300_STATECHANGE(rmesa, fp); @@ -2581,9 +2448,22 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) r300SetEarlyZState(ctx); - GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; - if (current_fragment_program_writes_depth(ctx)) + /* w_fmt value is set to get best performance + * see p.130 R5xx 3D acceleration guide v1.3 */ + GLuint w_fmt, fgdepthsrc; + if (current_fragment_program_writes_depth(ctx)) { fgdepthsrc = R300_FG_DEPTH_SRC_SHADER; + w_fmt = R300_W_FMT_W24 | R300_W_SRC_US; + } else { + fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; + w_fmt = R300_W_FMT_W0 | R300_W_SRC_US; + } + + if (w_fmt != rmesa->hw.us_out_fmt.cmd[5]) { + R300_STATECHANGE(rmesa, us_out_fmt); + rmesa->hw.us_out_fmt.cmd[5] = w_fmt; + } + if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) { R300_STATECHANGE(rmesa, fg_depth_src); rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc; @@ -2690,7 +2570,6 @@ void r300InitStateFuncs(struct dd_function_table *functions) functions->DepthFunc = r300DepthFunc; functions->DepthMask = r300DepthMask; functions->CullFace = r300CullFace; - functions->Fogfv = r300Fogfv; functions->FrontFace = r300FrontFace; functions->ShadeModel = r300ShadeModel; functions->LogicOpcode = r300LogicOpcode; diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 153582ce489..f57516acf41 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -82,14 +82,15 @@ static void r300SetVertexFormat( GLcontext *ctx ) struct vertex_buffer *VB = &tnl->vb; DECLARE_RENDERINPUTS(index_bitset); GLuint InputsRead = 0, OutputsWritten = 0; - int vap_fmt_0 = 0; + int vap_fmt_1 = 0; int offset = 0; int vte = 0; + int fog_id; GLint inputs[VERT_ATTRIB_MAX]; GLint tab[VERT_ATTRIB_MAX]; int swizzle[VERT_ATTRIB_MAX][4]; GLuint i, nr; - GLuint sz, vap_fmt_1 = 0; + GLuint sz; DECLARE_RENDERINPUTS(render_inputs_bitset); RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); @@ -125,13 +126,12 @@ static void r300SetVertexFormat( GLcontext *ctx ) offset = 4; EMIT_PAD(4 * sizeof(float)); } - +/* if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); - vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; offset += 1; } - +*/ if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) { sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size; rmesa->swtcl.coloroffset = offset; @@ -150,6 +150,33 @@ static void r300SetVertexFormat( GLcontext *ctx ) OutputsWritten |= 1 << VERT_RESULT_COL1; } + fog_id = -1; + if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG)) { + /* find first free tex coord slot */ + if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + fog_id = i; + break; + } + } + } else { + fog_id = 0; + } + + if (fog_id == -1) { + fprintf(stderr, "\tout of free texcoords to do fog\n"); + _mesa_exit(-1); + } + + sz = VB->AttribPtr[VERT_ATTRIB_FOG]->size; + EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F + sz - 1); + InputsRead |= 1 << VERT_ATTRIB_FOG; + OutputsWritten |= 1 << VERT_RESULT_FOGC; + vap_fmt_1 |= sz << (3 * fog_id); + } + if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { int i; @@ -164,6 +191,37 @@ static void r300SetVertexFormat( GLcontext *ctx ) } } + /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */ + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) { + int first_free_tex = -1; + if (fog_id >= 0) { + first_free_tex = fog_id+1; + } else { + if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + first_free_tex = i; + break; + } + } + } else { + first_free_tex = 0; + } + } + + if (first_free_tex == -1) { + fprintf(stderr, "\tout of free texcoords to write w pos\n"); + _mesa_exit(-1); + } + + sz = VB->AttribPtr[VERT_ATTRIB_POS]->size; + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex); + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 ); + vap_fmt_1 |= sz << (3 * first_free_tex); + } + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { if (InputsRead & (1 << i)) { inputs[i] = nr++; @@ -179,6 +237,8 @@ static void r300SetVertexFormat( GLcontext *ctx ) inputs[VERT_ATTRIB_COLOR0] = 2; if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) inputs[VERT_ATTRIB_COLOR1] = 3; + if (InputsRead & (1 << VERT_ATTRIB_FOG)) + inputs[VERT_ATTRIB_FOG] = 6 + fog_id; for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) if (InputsRead & (1 << i)) inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); @@ -224,6 +284,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr); } + R300_STATECHANGE(rmesa, vic); rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); @@ -520,9 +581,12 @@ static void r300RenderStart(GLcontext *ctx) r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); + + /* investigate if we can put back flush optimisation if needed */ if (rmesa->radeon.dma.flush != NULL) { rmesa->radeon.dma.flush(ctx); } + } static void r300RenderFinish(GLcontext *ctx) @@ -652,5 +716,4 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) rmesa->radeon.swtcl.numverts); r300EmitCacheFlush(rmesa); COMMIT_BATCH(); - } diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index c4e325e6a76..50806575ced 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -126,7 +126,7 @@ static unsigned long t_dst_mask(GLuint mask) return mask & VSF_FLAG_ALL; } -static unsigned long t_dst_class(enum register_file file) +static unsigned long t_dst_class(gl_register_file file) { switch (file) { @@ -161,7 +161,7 @@ static unsigned long t_dst_index(struct r300_vertex_program *vp, return dst->Index; } -static unsigned long t_src_class(enum register_file file) +static unsigned long t_src_class(gl_register_file file) { switch (file) { case PROGRAM_TEMPORARY: @@ -993,17 +993,16 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) vp->outputs[VERT_RESULT_COL0] + 3; cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1; } -#if 0 - if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { - vp->outputs[VERT_RESULT_FOGC] = cur_reg++; - } -#endif for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { if (vp->key.OutputsWritten & (1 << i)) { vp->outputs[i] = cur_reg++; } } + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { + vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + } } static void r300TranslateVertexShader(struct r300_vertex_program *vp, @@ -1271,7 +1270,6 @@ static void position_invariant(struct gl_program *prog) else vpi[i].Opcode = OPCODE_MAD; - vpi[i].StringPos = 0; vpi[i].Data = 0; if (i == 3) diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 926ddd59644..07a2a7b17ce 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -298,8 +298,8 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) static void nqssadce_init(struct nqssadce_state* s) { - s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W; + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; } static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c index 97ce016c99c..a083c3d2436 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -191,7 +191,7 @@ static void process_instruction(struct nqssadce_state* s) if (inst->Opcode != OPCODE_KIL) { if (s->Descr->RewriteDepthOut) { - if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPR) + if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH) rewrite_depth_out(inst); } diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c index 8a945d8537c..49aa90dd94a 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c @@ -451,19 +451,7 @@ static void allocate_input_registers(struct pair_state *s) int i; GLuint hwindex = 0; - /* Texcoords come first */ - for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); - InputsRead &= ~FRAG_BIT_WPOS; - - /* Then primary colour */ + /* Primary colour */ if (InputsRead & FRAG_BIT_COL0) alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); InputsRead &= ~FRAG_BIT_COL0; @@ -473,11 +461,23 @@ static void allocate_input_registers(struct pair_state *s) alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); InputsRead &= ~FRAG_BIT_COL1; - /* Fog coordinate */ + /* Texcoords */ + for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* Fogcoords treated as a texcoord */ if (InputsRead & FRAG_BIT_FOGC) alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++); InputsRead &= ~FRAG_BIT_FOGC; + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); + InputsRead &= ~FRAG_BIT_WPOS; + /* Anything else */ if (InputsRead) error("Don't know how to handle inputs 0x%x\n", InputsRead); @@ -778,10 +778,10 @@ static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruc struct prog_instruction *inst = s->Program->Instructions + ip; if (inst->DstReg.File == PROGRAM_OUTPUT) { - if (inst->DstReg.Index == FRAG_RESULT_COLR) { + if (inst->DstReg.Index == FRAG_RESULT_COLOR) { pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == FRAG_RESULT_DEPR) { + } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); } } else { diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 5c34ca89fab..840233ff896 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -245,7 +245,7 @@ static uint32_t radeonGetLastFrame(radeonContextPtr radeon) { drm_radeon_getparam_t gp; int ret; - uint32_t frame; + uint32_t frame = 0; gp.param = RADEON_PARAM_LAST_FRAME; gp.value = (int *)&frame; diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 02101978ac4..1d4f008cbc7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -732,7 +732,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) unsigned char *RADEONMMIO = NULL; int i; int ret; - uint32_t temp; + uint32_t temp = 0; if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) { fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n"); @@ -1359,7 +1359,7 @@ radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) radeon_bo_unref(rb->bo); rb->bo = NULL; } - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index b9adab18d49..0ece1acbeb2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -915,11 +915,11 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) */ else if ( (texUnit->TexGenEnabled & S_BIT) && (texUnit->TexGenEnabled & T_BIT) && - (texUnit->GenModeS == texUnit->GenModeT) ) { + (texUnit->GenS.Mode == texUnit->GenT.Mode) ) { if ( ((texUnit->TexGenEnabled & R_BIT) && - (texUnit->GenModeS != texUnit->GenModeR)) || + (texUnit->GenS.Mode != texUnit->GenR.Mode)) || ((texUnit->TexGenEnabled & Q_BIT) && - (texUnit->GenModeS != texUnit->GenModeQ)) ) { + (texUnit->GenS.Mode != texUnit->GenQ.Mode)) ) { /* Mixed modes, fallback: */ if (RADEON_DEBUG & DEBUG_FALLBACKS) @@ -943,23 +943,23 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_Q_BIT(unit); } - switch (texUnit->GenModeS) { + switch (texUnit->GenS.Mode) { case GL_OBJECT_LINEAR: rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift; set_texgen_matrix( rmesa, unit, - texUnit->ObjectPlaneS, - texUnit->ObjectPlaneT, - texUnit->ObjectPlaneR, - texUnit->ObjectPlaneQ); + texUnit->GenS.ObjectPlane, + texUnit->GenT.ObjectPlane, + texUnit->GenR.ObjectPlane, + texUnit->GenQ.ObjectPlane); break; case GL_EYE_LINEAR: rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift; set_texgen_matrix( rmesa, unit, - texUnit->EyePlaneS, - texUnit->EyePlaneT, - texUnit->EyePlaneR, - texUnit->EyePlaneQ); + texUnit->GenS.EyePlane, + texUnit->GenT.EyePlane, + texUnit->GenR.EyePlane, + texUnit->GenQ.EyePlane); break; case GL_REFLECTION_MAP_NV: diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h index 0df634b84db..866807462a4 100644 --- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h +++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h @@ -1500,7 +1500,7 @@ # define RADEON_ALPHA_ARG_C_T1_ALPHA (6 << 8) # define RADEON_ALPHA_ARG_C_T2_ALPHA (7 << 8) # define RADEON_ALPHA_ARG_C_T3_ALPHA (8 << 8) -# define RADEON_DOT_ALPHA_DONT_REPLICATE (1 << 9) +# define RADEON_DOT_ALPHA_DONT_REPLICATE (1 << 12) # define RADEON_ALPHA_ARG_MASK 0xf #define RADEON_PP_TFACTOR_0 0x1c68 diff --git a/src/mesa/drivers/dri/s3v/s3v_tex.c b/src/mesa/drivers/dri/s3v/s3v_tex.c index 8bf2ea98783..db660263638 100644 --- a/src/mesa/drivers/dri/s3v/s3v_tex.c +++ b/src/mesa/drivers/dri/s3v/s3v_tex.c @@ -502,20 +502,20 @@ static void s3vInitTextureObjects( GLcontext *ctx ) #if 1 ctx->Texture.CurrentUnit = 0; - texObj = ctx->Texture.Unit[0].Current1D; + texObj = ctx->Texture.Unit[0].CurrentTex[TEXTURE_1D_INDEX]; s3vBindTexture( ctx, GL_TEXTURE_1D, texObj ); - texObj = ctx->Texture.Unit[0].Current2D; + texObj = ctx->Texture.Unit[0].CurrentTex[TEXTURE_2D_INDEX]; s3vBindTexture( ctx, GL_TEXTURE_2D, texObj ); #endif #if 0 ctx->Texture.CurrentUnit = 1; - texObj = ctx->Texture.Unit[1].Current1D; + texObj = ctx->Texture.Unit[1].CurrentTex[TEXTURE_1D_INDEX]; s3vBindTexture( ctx, GL_TEXTURE_1D, texObj ); - texObj = ctx->Texture.Unit[1].Current2D; + texObj = ctx->Texture.Unit[1].CurrentTex[TEXTURE_2D_INDEX]; s3vBindTexture( ctx, GL_TEXTURE_2D, texObj ); #endif diff --git a/src/mesa/drivers/dri/s3v/s3v_xmesa.c b/src/mesa/drivers/dri/s3v/s3v_xmesa.c index b18c8763c3a..85f14817696 100644 --- a/src/mesa/drivers/dri/s3v/s3v_xmesa.c +++ b/src/mesa/drivers/dri/s3v/s3v_xmesa.c @@ -4,11 +4,12 @@ #include "s3v_context.h" #include "s3v_vb.h" +#include "s3v_dri.h" #include "main/context.h" #include "main/matrix.h" -#include "s3v_dri.h" #include "main/framebuffer.h" #include "main/renderbuffer.h" +#include "main/viewport.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" @@ -131,7 +132,7 @@ s3vCreateBuffer( __DRIscreenPrivate *driScrnPriv, static void s3vDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } static void diff --git a/src/mesa/drivers/dri/savage/savage_xmesa.c b/src/mesa/drivers/dri/savage/savage_xmesa.c index 326d595352f..a94f1c076ca 100644 --- a/src/mesa/drivers/dri/savage/savage_xmesa.c +++ b/src/mesa/drivers/dri/savage/savage_xmesa.c @@ -678,7 +678,7 @@ savageCreateBuffer( __DRIscreenPrivate *driScrnPriv, static void savageDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } #if 0 diff --git a/src/mesa/drivers/dri/savage/savagestate.c b/src/mesa/drivers/dri/savage/savagestate.c index 73d85ed57b4..84e1b525854 100644 --- a/src/mesa/drivers/dri/savage/savagestate.c +++ b/src/mesa/drivers/dri/savage/savagestate.c @@ -514,7 +514,7 @@ static void savageDDDepthFunc_s4(GLcontext *ctx, GLenum func) imesa->regs.s4.drawLocalCtrl.ni.flushPdZbufWrites = GL_TRUE; imesa->regs.s4.zBufCtrl.ni.zBufEn = GL_TRUE; } - else if (imesa->glCtx->Stencil.Enabled && imesa->hw_stencil) + else if (imesa->glCtx->Stencil._Enabled && imesa->hw_stencil) { /* Need to keep Z on for Stencil. */ imesa->regs.s4.zBufCtrl.ni.zCmpFunc = CF_Always; @@ -1092,7 +1092,7 @@ static void savageDDEnable_s4(GLcontext *ctx, GLenum cap, GLboolean state) FALLBACK (ctx, SAVAGE_FALLBACK_STENCIL, state); else { imesa->regs.s4.stencilCtrl.ni.stencilEn = state; - if (ctx->Stencil.Enabled && + if (ctx->Stencil._Enabled && imesa->regs.s4.zBufCtrl.ni.zBufEn != GL_TRUE) { /* Stencil buffer requires Z enabled. */ diff --git a/src/mesa/drivers/dri/sis/sis_screen.c b/src/mesa/drivers/dri/sis/sis_screen.c index 9eb27fef087..b5f04ae28d0 100644 --- a/src/mesa/drivers/dri/sis/sis_screen.c +++ b/src/mesa/drivers/dri/sis/sis_screen.c @@ -221,7 +221,7 @@ sisCreateBuffer( __DRIscreenPrivate *driScrnPriv, static void sisDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } static void sisCopyBuffer( __DRIdrawablePrivate *dPriv ) diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index 2e7f11327e6..fbfa49c99d0 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -476,7 +476,7 @@ driDestroyDrawable(__DRIdrawable *buf) _mesa_free(buf->row); fb->DeletePending = GL_TRUE; - _mesa_unreference_framebuffer(&fb); + _mesa_reference_framebuffer(&fb, NULL); } } diff --git a/src/mesa/drivers/dri/tdfx/tdfx_context.c b/src/mesa/drivers/dri/tdfx/tdfx_context.c index 20046fcb3af..68b5027561b 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_context.c +++ b/src/mesa/drivers/dri/tdfx/tdfx_context.c @@ -67,7 +67,6 @@ #define need_GL_EXT_fog_coord #define need_GL_EXT_paletted_texture /* #define need_GL_EXT_secondary_color */ -/* #define need_GL_MESA_program_debug */ /* #define need_GL_NV_vertex_program */ #include "extension_helper.h" @@ -102,9 +101,6 @@ const struct dri_extension card_extensions[] = { "GL_NV_vertex_program", GL_NV_vertex_program_functions } { "GL_NV_vertex_program1_1", NULL }, #endif -#ifdef need_GL_MESA_program_debug - { "GL_MESA_program_debug", GL_MESA_program_debug_functions }, -#endif { NULL, NULL } }; diff --git a/src/mesa/drivers/dri/tdfx/tdfx_pixels.c b/src/mesa/drivers/dri/tdfx/tdfx_pixels.c index 9ab9c05f2bd..18729d5ae08 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_pixels.c +++ b/src/mesa/drivers/dri/tdfx/tdfx_pixels.c @@ -610,7 +610,7 @@ tdfx_drawpixels_R8G8B8A8(GLcontext * ctx, GLint x, GLint y, ctx->Depth.Test || ctx->Fog.Enabled || ctx->Scissor.Enabled || - ctx->Stencil.Enabled || + ctx->Stencil._Enabled || !ctx->Color.ColorMask[0] || !ctx->Color.ColorMask[1] || !ctx->Color.ColorMask[2] || diff --git a/src/mesa/drivers/dri/tdfx/tdfx_render.c b/src/mesa/drivers/dri/tdfx/tdfx_render.c index cf840c57a7a..2cd8e12d955 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_render.c +++ b/src/mesa/drivers/dri/tdfx/tdfx_render.c @@ -740,7 +740,7 @@ void tdfxEmitHwStateLocked( tdfxContextPtr fxMesa ) } if ( fxMesa->dirty & TDFX_UPLOAD_STENCIL ) { - if (fxMesa->glCtx->Stencil.Enabled) { + if (fxMesa->glCtx->Stencil._Enabled) { fxMesa->Glide.grEnable(GR_STENCIL_MODE_EXT); fxMesa->Glide.grStencilOp(fxMesa->Stencil.FailFunc, fxMesa->Stencil.ZFailFunc, diff --git a/src/mesa/drivers/dri/tdfx/tdfx_screen.c b/src/mesa/drivers/dri/tdfx/tdfx_screen.c index 5f2f5cfff51..58bd48b294c 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_screen.c +++ b/src/mesa/drivers/dri/tdfx/tdfx_screen.c @@ -232,7 +232,7 @@ tdfxCreateBuffer( __DRIscreenPrivate *driScrnPriv, static void tdfxDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } diff --git a/src/mesa/drivers/dri/tdfx/tdfx_state.c b/src/mesa/drivers/dri/tdfx/tdfx_state.c index a2d7bcd97d9..591df8a905a 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_state.c +++ b/src/mesa/drivers/dri/tdfx/tdfx_state.c @@ -459,7 +459,7 @@ static void tdfxUpdateStencil( GLcontext *ctx ) } if (fxMesa->haveHwStencil) { - if (ctx->Stencil.Enabled) { + if (ctx->Stencil._Enabled) { fxMesa->Stencil.Function = ctx->Stencil.Function[0] - GL_NEVER + GR_CMP_NEVER; fxMesa->Stencil.RefValue = ctx->Stencil.Ref[0] & 0xff; fxMesa->Stencil.ValueMask = ctx->Stencil.ValueMask[0] & 0xff; diff --git a/src/mesa/drivers/dri/trident/trident_context.c b/src/mesa/drivers/dri/trident/trident_context.c index aefd8a243f6..b5126b07ea6 100644 --- a/src/mesa/drivers/dri/trident/trident_context.c +++ b/src/mesa/drivers/dri/trident/trident_context.c @@ -41,6 +41,7 @@ #include "main/extensions.h" #include "main/framebuffer.h" #include "main/renderbuffer.h" +#include "main/viewport.h" #if defined(USE_X86_ASM) #include "x86/common_x86_asm.h" #endif @@ -281,7 +282,7 @@ tridentCreateBuffer( __DRIscreenPrivate *driScrnPriv, static void tridentDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } static void diff --git a/src/mesa/drivers/dri/unichrome/via_screen.c b/src/mesa/drivers/dri/unichrome/via_screen.c index e9f566161c2..3dbb5705710 100644 --- a/src/mesa/drivers/dri/unichrome/via_screen.c +++ b/src/mesa/drivers/dri/unichrome/via_screen.c @@ -316,7 +316,7 @@ viaCreateBuffer(__DRIscreenPrivate *driScrnPriv, static void viaDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } static const __DRIconfig ** diff --git a/src/mesa/drivers/dri/unichrome/via_state.c b/src/mesa/drivers/dri/unichrome/via_state.c index 1cef01ab033..840e4e42da9 100644 --- a/src/mesa/drivers/dri/unichrome/via_state.c +++ b/src/mesa/drivers/dri/unichrome/via_state.c @@ -1342,7 +1342,7 @@ static void viaChooseStencilState(GLcontext *ctx) { struct via_context *vmesa = VIA_CONTEXT(ctx); - if (ctx->Stencil.Enabled) { + if (ctx->Stencil._Enabled) { GLuint temp; vmesa->regEnable |= HC_HenST_MASK; |