diff options
Diffstat (limited to 'src/mesa/drivers')
92 files changed, 3838 insertions, 4579 deletions
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 854dea94504..0dbc7c3e853 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -41,6 +41,7 @@ #include "main/bufferobj.h" #include "main/fbobject.h" #include "main/texrender.h" +#include "main/samplerobj.h" #include "main/syncobj.h" #include "main/texturebarrier.h" #include "main/transformfeedback.h" @@ -200,6 +201,8 @@ _mesa_init_driver_functions(struct dd_function_table *driver) _mesa_init_transform_feedback_functions(driver); + _mesa_init_sampler_object_functions(driver); + /* T&L stuff */ driver->NeedValidate = GL_FALSE; driver->ValidateTnlModule = NULL; diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 6c35fa10d8a..08b6024639f 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -40,6 +40,7 @@ #include "main/bufferobj.h" #include "main/buffers.h" #include "main/colortab.h" +#include "main/condrender.h" #include "main/depth.h" #include "main/enable.h" #include "main/fbobject.h" @@ -94,6 +95,7 @@ #define META_VIEWPORT 0x4000 #define META_CLAMP_FRAGMENT_COLOR 0x8000 #define META_CLAMP_VERTEX_COLOR 0x10000 +#define META_CONDITIONAL_RENDER 0x20000 /*@}*/ @@ -188,6 +190,10 @@ struct save_state /** META_CLAMP_VERTEX_COLOR */ GLenum ClampVertexColor; + /** META_CONDITIONAL_RENDER */ + struct gl_query_object *CondRenderQuery; + GLenum CondRenderMode; + /** Miscellaneous (always disabled) */ GLboolean Lighting; }; @@ -597,6 +603,14 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, GL_FALSE); } + if (state & META_CONDITIONAL_RENDER) { + save->CondRenderQuery = ctx->Query.CondRenderQuery; + save->CondRenderMode = ctx->Query.CondRenderMode; + + if (ctx->Query.CondRenderQuery) + _mesa_EndConditionalRender(); + } + /* misc */ { save->Lighting = ctx->Light.Enabled; @@ -869,6 +883,12 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, save->ClampVertexColor); } + if (state & META_CONDITIONAL_RENDER) { + if (save->CondRenderQuery) + _mesa_BeginConditionalRender(save->CondRenderQuery->Id, + save->CondRenderMode); + } + /* misc */ if (save->Lighting) { _mesa_set_enable(ctx, GL_LIGHTING, GL_TRUE); @@ -1442,7 +1462,10 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) }; struct vertex verts[4]; /* save all state but scissor, pixel pack/unpack */ - GLbitfield metaSave = META_ALL - META_SCISSOR - META_PIXEL_STORE; + GLbitfield metaSave = (META_ALL - + META_SCISSOR - + META_PIXEL_STORE - + META_CONDITIONAL_RENDER); const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; if (buffers & BUFFER_BITS_COLOR) { @@ -1848,7 +1871,8 @@ _mesa_meta_DrawPixels(struct gl_context *ctx, * just going for the matching set of channels, in floating * point. */ - if (ctx->Color.ClampFragmentColor != GL_TRUE) + if (ctx->Color.ClampFragmentColor != GL_TRUE && + ctx->Extensions.ARB_texture_float) texIntFormat = GL_RGBA32F; } else if (_mesa_is_stencil_format(format)) { diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index 7554bd5e7b9..3298dbb69f5 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -29,6 +29,7 @@ #include "main/enums.h" #include "main/colormac.h" #include "main/macros.h" +#include "main/samplerobj.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" @@ -120,6 +121,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) struct gl_texture_object *tObj = tUnit->_Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage; + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); GLuint *state = i830->state.Tex[unit], format, pitch; GLint lodbias; GLubyte border[4]; @@ -193,7 +195,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) float maxlod; uint32_t minlod_fixed, maxlod_fixed; - switch (tObj->Sampler.MinFilter) { + switch (sampler->MinFilter) { case GL_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NONE; @@ -222,12 +224,12 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) return GL_FALSE; } - if (tObj->Sampler.MaxAnisotropy > 1.0) { + if (sampler->MaxAnisotropy > 1.0) { minFilt = FILTER_ANISOTROPIC; magFilt = FILTER_ANISOTROPIC; } else { - switch (tObj->Sampler.MagFilter) { + switch (sampler->MagFilter) { case GL_NEAREST: magFilt = FILTER_NEAREST; break; @@ -239,7 +241,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) } } - lodbias = (int) ((tUnit->LodBias + tObj->Sampler.LodBias) * 16.0); + lodbias = (int) ((tUnit->LodBias + sampler->LodBias) * 16.0); if (lodbias < -64) lodbias = -64; if (lodbias > 63) @@ -259,8 +261,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) * addressable (smallest resolution) LOD. Use it to cover both * MAX_LEVEL and MAX_LOD. */ - minlod_fixed = U_FIXED(CLAMP(tObj->Sampler.MinLod, 0.0, 11), 4); - maxlod = MIN2(tObj->Sampler.MaxLod, tObj->_MaxLevel - tObj->BaseLevel); + minlod_fixed = U_FIXED(CLAMP(sampler->MinLod, 0.0, 11), 4); + maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel); if (intel->intelScreen->deviceID == PCI_CHIP_I855_GM || intel->intelScreen->deviceID == PCI_CHIP_I865_G) { maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11.75), 2); @@ -279,8 +281,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) } { - GLenum ws = tObj->Sampler.WrapS; - GLenum wt = tObj->Sampler.WrapT; + GLenum ws = sampler->WrapS; + GLenum wt = sampler->WrapT; /* 3D textures not available on i830 @@ -300,10 +302,10 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) } /* convert border color from float to ubyte */ - CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->Sampler.BorderColor.f[0]); - CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->Sampler.BorderColor.f[1]); - CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->Sampler.BorderColor.f[2]); - CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->Sampler.BorderColor.f[3]); + CLAMPED_FLOAT_TO_UBYTE(border[0], sampler->BorderColor.f[0]); + CLAMPED_FLOAT_TO_UBYTE(border[1], sampler->BorderColor.f[1]); + CLAMPED_FLOAT_TO_UBYTE(border[2], sampler->BorderColor.f[2]); + CLAMPED_FLOAT_TO_UBYTE(border[3], sampler->BorderColor.f[3]); state[I830_TEXREG_TM0S4] = PACK_COLOR_8888(border[3], border[0], diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index 742bb994adb..5aa2ea18048 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -29,6 +29,7 @@ #include "main/enums.h" #include "main/macros.h" #include "main/colormac.h" +#include "main/samplerobj.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" @@ -136,6 +137,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) struct gl_texture_object *tObj = tUnit->_Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage; + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); GLuint *state = i915->state.Tex[unit], format, pitch; GLint lodbias, aniso = 0; GLubyte border[4]; @@ -164,7 +166,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) format = translate_texture_format(firstImage->TexFormat, firstImage->InternalFormat, - tObj->Sampler.DepthMode); + sampler->DepthMode); pitch = intelObj->mt->region->pitch * intelObj->mt->cpp; state[I915_TEXREG_MS3] = @@ -181,7 +183,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) * (lowest resolution) LOD. Use it to cover both MAX_LEVEL and * MAX_LOD. */ - maxlod = MIN2(tObj->Sampler.MaxLod, tObj->_MaxLevel - tObj->BaseLevel); + maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel); state[I915_TEXREG_MS4] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK | @@ -192,7 +194,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) { GLuint minFilt, mipFilt, magFilt; - switch (tObj->Sampler.MinFilter) { + switch (sampler->MinFilter) { case GL_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NONE; @@ -221,16 +223,16 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) return GL_FALSE; } - if (tObj->Sampler.MaxAnisotropy > 1.0) { + if (sampler->MaxAnisotropy > 1.0) { minFilt = FILTER_ANISOTROPIC; magFilt = FILTER_ANISOTROPIC; - if (tObj->Sampler.MaxAnisotropy > 2.0) + if (sampler->MaxAnisotropy > 2.0) aniso = SS2_MAX_ANISO_4; else aniso = SS2_MAX_ANISO_2; } else { - switch (tObj->Sampler.MagFilter) { + switch (sampler->MagFilter) { case GL_NEAREST: magFilt = FILTER_NEAREST; break; @@ -242,7 +244,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) } } - lodbias = (int) ((tUnit->LodBias + tObj->Sampler.LodBias) * 16.0); + lodbias = (int) ((tUnit->LodBias + sampler->LodBias) * 16.0); if (lodbias < -256) lodbias = -256; if (lodbias > 255) @@ -258,14 +260,14 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) /* Shadow: */ - if (tObj->Sampler.CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB && + if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB && tObj->Target != GL_TEXTURE_3D) { if (tObj->Target == GL_TEXTURE_1D) return GL_FALSE; state[I915_TEXREG_SS2] |= (SS2_SHADOW_ENABLE | - intel_translate_shadow_compare_func(tObj->Sampler.CompareFunc)); + intel_translate_shadow_compare_func(sampler->CompareFunc)); minFilt = FILTER_4X4_FLAT; magFilt = FILTER_4X4_FLAT; @@ -278,9 +280,9 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) } { - GLenum ws = tObj->Sampler.WrapS; - GLenum wt = tObj->Sampler.WrapT; - GLenum wr = tObj->Sampler.WrapR; + GLenum ws = sampler->WrapS; + GLenum wt = sampler->WrapT; + GLenum wr = sampler->WrapR; float minlod; /* We program 1D textures as 2D textures, so the 2D texcoord could @@ -298,8 +300,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) * clamp_to_border. */ if (tObj->Target == GL_TEXTURE_3D && - (tObj->Sampler.MinFilter != GL_NEAREST || - tObj->Sampler.MagFilter != GL_NEAREST) && + (sampler->MinFilter != GL_NEAREST || + sampler->MagFilter != GL_NEAREST) && (ws == GL_CLAMP || wt == GL_CLAMP || wr == GL_CLAMP || @@ -322,7 +324,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) | (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); - minlod = MIN2(tObj->Sampler.MinLod, tObj->_MaxLevel - tObj->BaseLevel); + minlod = MIN2(sampler->MinLod, tObj->_MaxLevel - tObj->BaseLevel); state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); state[I915_TEXREG_SS3] |= (U_FIXED(CLAMP(minlod, 0.0, 11.0), 4) << SS3_MIN_LOD_SHIFT); @@ -330,10 +332,10 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) } /* convert border color from float to ubyte */ - CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->Sampler.BorderColor.f[0]); - CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->Sampler.BorderColor.f[1]); - CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->Sampler.BorderColor.f[2]); - CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->Sampler.BorderColor.f[3]); + CLAMPED_FLOAT_TO_UBYTE(border[0], sampler->BorderColor.f[0]); + CLAMPED_FLOAT_TO_UBYTE(border[1], sampler->BorderColor.f[1]); + CLAMPED_FLOAT_TO_UBYTE(border[2], sampler->BorderColor.f[2]); + CLAMPED_FLOAT_TO_UBYTE(border[3], sampler->BorderColor.f[3]); if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { /* GL specs that border color for depth textures is taken from the diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index b05ba35d65f..849018b74ae 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -61,7 +61,6 @@ DRIVER_SOURCES = \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ - brw_state.c \ brw_state_batch.c \ brw_state_cache.c \ brw_state_dump.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 74a66af31a5..94b8c20b019 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -37,28 +37,36 @@ #include "main/macros.h" #include "intel_batchbuffer.h" -void -brw_update_cc_vp(struct brw_context *brw) +static void +prepare_cc_vp(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; - struct brw_cc_viewport ccv; + struct brw_cc_viewport *ccv; - memset(&ccv, 0, sizeof(ccv)); + ccv = brw_state_batch(brw, sizeof(*ccv), 32, &brw->cc.vp_offset); /* _NEW_TRANSOFORM */ if (ctx->Transform.DepthClamp) { /* _NEW_VIEWPORT */ - ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); - ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); + ccv->min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); + ccv->max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); } else { - ccv.min_depth = 0.0; - ccv.max_depth = 1.0; + ccv->min_depth = 0.0; + ccv->max_depth = 1.0; } - drm_intel_bo_unreference(brw->cc.vp_bo); - brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv)); + brw->state.dirty.cache |= CACHE_NEW_CC_VP; } +const struct brw_tracked_state brw_cc_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, + .brw = BRW_NEW_BATCH, + .cache = 0 + }, + .prepare = prepare_cc_vp +}; + /** * Modify blend function to force destination alpha to 1.0 * @@ -81,11 +89,6 @@ fix_xRGB_alpha(GLenum function) return function; } -static void prepare_cc_unit(struct brw_context *brw) -{ - brw_add_validated_bo(brw, brw->cc.vp_bo); -} - /** * Creates the state cache entry for the given CC unit key. */ @@ -209,7 +212,8 @@ static void upload_cc_unit(struct brw_context *brw) cc->cc5.statistics_enable = 1; /* CACHE_NEW_CC_VP */ - cc->cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ + cc->cc4.cc_viewport_state_offset = (intel->batch.bo->offset + + brw->cc.vp_offset) >> 5; /* reloc */ brw->state.dirty.cache |= CACHE_NEW_CC_UNIT; @@ -217,7 +221,7 @@ static void upload_cc_unit(struct brw_context *brw) drm_intel_bo_emit_reloc(brw->intel.batch.bo, (brw->cc.state_offset + offsetof(struct brw_cc_unit_state, cc4)), - brw->cc.vp_bo, 0, + intel->batch.bo, brw->cc.vp_offset, I915_GEM_DOMAIN_INSTRUCTION, 0); } @@ -227,7 +231,6 @@ const struct brw_tracked_state brw_cc_unit = { .brw = BRW_NEW_BATCH, .cache = CACHE_NEW_CC_VP }, - .prepare = prepare_cc_unit, .emit = upload_cc_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 1be165cc9a1..3c175515408 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -144,14 +144,12 @@ static void compile_clip_prog( struct brw_context *brw, /* Upload */ drm_intel_bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, - BRW_CLIP_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - sizeof(c.prog_data), - &brw->clip.prog_data); + brw->clip.prog_bo = brw_upload_cache(&brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->clip.prog_data); } /* Calculate interpolants for triangle and line rasterization. @@ -270,7 +268,6 @@ static void upload_clip_prog(struct brw_context *brw) drm_intel_bo_unreference(brw->clip.prog_bo); brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, &key, sizeof(key), - NULL, 0, &brw->clip.prog_data); if (brw->clip.prog_bo == NULL) compile_clip_prog( brw, &key ); diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 60fd5fa7d9e..6015c8cbe9f 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -33,148 +33,101 @@ #include "brw_state.h" #include "brw_defines.h" -struct brw_clip_unit_key { - unsigned int total_grf; - unsigned int urb_entry_read_length; - unsigned int curb_entry_read_length; - unsigned int clip_mode; - - unsigned int curbe_offset; - - unsigned int nr_urb_entries, urb_size; - - GLboolean depth_clamp; -}; - static void -clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) -{ - struct gl_context *ctx = &brw->intel.ctx; - memset(key, 0, sizeof(*key)); - - /* CACHE_NEW_CLIP_PROG */ - key->total_grf = brw->clip.prog_data->total_grf; - key->urb_entry_read_length = brw->clip.prog_data->urb_read_length; - key->curb_entry_read_length = brw->clip.prog_data->curb_read_length; - key->clip_mode = brw->clip.prog_data->clip_mode; - - /* BRW_NEW_CURBE_OFFSETS */ - key->curbe_offset = brw->curbe.clip_start; - - /* BRW_NEW_URB_FENCE */ - key->nr_urb_entries = brw->urb.nr_clip_entries; - key->urb_size = brw->urb.vsize; - - /* _NEW_TRANSOFORM */ - key->depth_clamp = ctx->Transform.DepthClamp; -} - -static drm_intel_bo * -clip_unit_create_from_key(struct brw_context *brw, - struct brw_clip_unit_key *key) +brw_prepare_clip_unit(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - struct brw_clip_unit_state clip; - drm_intel_bo *bo; + struct gl_context *ctx = &intel->ctx; + struct brw_clip_unit_state *clip; - memset(&clip, 0, sizeof(clip)); + clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); + memset(clip, 0, sizeof(*clip)); - clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + /* CACHE_NEW_CLIP_PROG */ + clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) / + 16 - 1); /* reloc */ - clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; + clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; - clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - clip.thread1.single_program_flow = 1; + clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + clip->thread1.single_program_flow = 1; - clip.thread3.urb_entry_read_length = key->urb_entry_read_length; - clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length; - clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - clip.thread3.dispatch_grf_start_reg = 1; - clip.thread3.urb_entry_read_offset = 0; + clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; + clip->thread3.const_urb_entry_read_length = + brw->clip.prog_data->curb_read_length; + + /* BRW_NEW_CURBE_OFFSETS */ + clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + clip->thread3.dispatch_grf_start_reg = 1; + clip->thread3.urb_entry_read_offset = 0; - clip.thread4.nr_urb_entries = key->nr_urb_entries; - clip.thread4.urb_entry_allocation_size = key->urb_size - 1; + /* BRW_NEW_URB_FENCE */ + clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries; + clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1; /* If we have enough clip URB entries to run two threads, do so. */ - if (key->nr_urb_entries >= 10) { + if (brw->urb.nr_clip_entries >= 10) { /* Half of the URB entries go to each thread, and it has to be an * even number. */ - assert(key->nr_urb_entries % 2 == 0); + assert(brw->urb.nr_clip_entries % 2 == 0); /* Although up to 16 concurrent Clip threads are allowed on Ironlake, * only 2 threads can output VUEs at a time. */ if (intel->gen == 5) - clip.thread4.max_threads = 16 - 1; + clip->thread4.max_threads = 16 - 1; else - clip.thread4.max_threads = 2 - 1; + clip->thread4.max_threads = 2 - 1; } else { - assert(key->nr_urb_entries >= 5); - clip.thread4.max_threads = 1 - 1; + assert(brw->urb.nr_clip_entries >= 5); + clip->thread4.max_threads = 1 - 1; } if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD)) - clip.thread4.max_threads = 0; + clip->thread4.max_threads = 0; if (unlikely(INTEL_DEBUG & DEBUG_STATS)) - clip.thread4.stats_enable = 1; - - clip.clip5.userclip_enable_flags = 0x7f; - clip.clip5.userclip_must_clip = 1; - clip.clip5.guard_band_enable = 0; - if (!key->depth_clamp) - clip.clip5.viewport_z_clip_enable = 1; - clip.clip5.viewport_xy_clip_enable = 1; - clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; - clip.clip5.api_mode = BRW_CLIP_API_OGL; - clip.clip5.clip_mode = key->clip_mode; + clip->thread4.stats_enable = 1; - if (intel->is_g4x) - clip.clip5.negative_w_clip_test = 1; + clip->clip5.userclip_enable_flags = 0x7f; + clip->clip5.userclip_must_clip = 1; + clip->clip5.guard_band_enable = 0; + /* _NEW_TRANSOFORM */ + if (!ctx->Transform.DepthClamp) + clip->clip5.viewport_z_clip_enable = 1; + clip->clip5.viewport_xy_clip_enable = 1; + clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE; + clip->clip5.api_mode = BRW_CLIP_API_OGL; + clip->clip5.clip_mode = brw->clip.prog_data->clip_mode; - clip.clip6.clipper_viewport_state_ptr = 0; - clip.viewport_xmin = -1; - clip.viewport_xmax = 1; - clip.viewport_ymin = -1; - clip.viewport_ymax = 1; + if (intel->is_g4x) + clip->clip5.negative_w_clip_test = 1; - bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, - key, sizeof(*key), - &brw->clip.prog_bo, 1, - &clip, sizeof(clip)); + clip->clip6.clipper_viewport_state_ptr = 0; + clip->viewport_xmin = -1; + clip->viewport_xmax = 1; + clip->viewport_ymin = -1; + clip->viewport_ymax = 1; /* Emit clip program relocation */ assert(brw->clip.prog_bo); - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_clip_unit_state, thread0), - brw->clip.prog_bo, clip.thread0.grf_reg_count << 1, + drm_intel_bo_emit_reloc(intel->batch.bo, + (brw->clip.state_offset + + offsetof(struct brw_clip_unit_state, thread0)), + brw->clip.prog_bo, clip->thread0.grf_reg_count << 1, I915_GEM_DOMAIN_INSTRUCTION, 0); - return bo; -} - -static void upload_clip_unit( struct brw_context *brw ) -{ - struct brw_clip_unit_key key; - - clip_unit_populate_key(brw, &key); - - drm_intel_bo_unreference(brw->clip.state_bo); - brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT, - &key, sizeof(key), - &brw->clip.prog_bo, 1, - NULL); - if (brw->clip.state_bo == NULL) { - brw->clip.state_bo = clip_unit_create_from_key(brw, &key); - } + brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT; } const struct brw_tracked_state brw_clip_unit = { .dirty = { .mesa = _NEW_TRANSFORM, - .brw = (BRW_NEW_CURBE_OFFSETS | + .brw = (BRW_NEW_BATCH | + BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG }, - .prepare = upload_clip_unit, + .prepare = brw_prepare_clip_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 230d326fa12..db6466ff1ae 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -51,9 +51,6 @@ static void brwInitDriverFunctions( struct dd_function_table *functions ) brwInitFragProgFuncs( functions ); brw_init_queryobj_functions(functions); - - functions->Enable = brw_enable; - functions->DepthRange = brw_depth_range; } GLboolean brwCreateContext( int api, @@ -232,11 +229,6 @@ GLboolean brwCreateContext( int api, brw_draw_init( brw ); - /* Now that most driver functions are hooked up, initialize some of the - * immediate state. - */ - brw_update_cc_vp(brw); - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1daa49abfb3..26cd8209c65 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -204,13 +204,16 @@ struct brw_wm_prog_data { GLuint urb_read_length; GLuint first_curbe_grf; + GLuint first_curbe_grf_16; GLuint total_grf; + GLuint total_grf_16; GLuint total_scratch; GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; GLboolean error; int dispatch_width; + uint32_t prog_offset_16; /* Pointer to tracked values (only valid once * _mesa_load_state_parameters has been called at runtime). @@ -308,7 +311,6 @@ enum brw_cache_id { BRW_CC_VP, BRW_CC_UNIT, BRW_WM_PROG, - BRW_SAMPLER_DEFAULT_COLOR, BRW_SAMPLER, BRW_WM_UNIT, BRW_SF_PROG, @@ -336,8 +338,6 @@ struct brw_cache_item { GLuint hash; GLuint key_size; /* for variable-sized keys */ const void *key; - drm_intel_bo **reloc_bufs; - GLuint nr_reloc_bufs; drm_intel_bo *bo; @@ -381,7 +381,6 @@ struct brw_tracked_state { #define CACHE_NEW_CC_VP (1<<BRW_CC_VP) #define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) #define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) -#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) #define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) #define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) #define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) @@ -630,29 +629,38 @@ struct brw_context int8_t *constant_map; /* variable array following prog_data */ drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; drm_intel_bo *const_bo; + uint32_t state_offset; /** Binding table of pointers to surf_bo entries */ uint32_t bind_bo_offset; uint32_t surf_offset[BRW_VS_MAX_SURF]; GLuint nr_surfaces; + + uint32_t push_const_offset; /* Offset in the batchbuffer */ + int push_const_size; /* in 256-bit register increments */ } vs; struct { struct brw_gs_prog_data *prog_data; GLboolean prog_active; + uint32_t state_offset; drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; } gs; struct { struct brw_clip_prog_data *prog_data; drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; - drm_intel_bo *vp_bo; + + /* Offset in the batch to the CLIP state on pre-gen6. */ + uint32_t state_offset; + + /* As of gen6, this is the offset in the batch to the CLIP VP, + * instead of vp_bo. + */ + uint32_t vp_offset; } clip; @@ -660,9 +668,7 @@ struct brw_context struct brw_sf_prog_data *prog_data; drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; uint32_t state_offset; - drm_intel_bo *vp_bo; uint32_t vp_offset; } sf; @@ -675,8 +681,9 @@ struct brw_context */ GLbitfield input_size_masks[4]; - /** Array of surface default colors (texture border color) */ - drm_intel_bo *sdc_bo[BRW_MAX_TEX_UNIT]; + /** offsets in the batch to sampler default colors (texture border color) + */ + uint32_t sdc_offset[BRW_MAX_TEX_UNIT]; GLuint render_surf; GLuint nr_surfaces; @@ -685,35 +692,32 @@ struct brw_context drm_intel_bo *scratch_bo; GLuint sampler_count; - drm_intel_bo *sampler_bo; + uint32_t sampler_offset; /** Binding table of pointers to surf_bo entries */ uint32_t bind_bo_offset; uint32_t surf_offset[BRW_WM_MAX_SURF]; + uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */ drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; drm_intel_bo *const_bo; /* pull constant buffer. */ /** - * This is the push constant BO on gen6. + * This is offset in the batch to the push constants on gen6. * * Pre-gen6, push constants live in the CURBE. */ - drm_intel_bo *push_const_bo; + uint32_t push_const_offset; } wm; struct { /* gen4 */ drm_intel_bo *prog_bo; - drm_intel_bo *vp_bo; - - /* gen6 */ - drm_intel_bo *blend_state_bo; - drm_intel_bo *depth_stencil_state_bo; - drm_intel_bo *color_calc_state_bo; uint32_t state_offset; + uint32_t blend_state_offset; + uint32_t depth_stencil_state_offset; + uint32_t vp_offset; } cc; struct { @@ -783,9 +787,6 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ); */ void brw_upload_urb_fence(struct brw_context *brw); -/* brw_cc.c */ -void brw_update_cc_vp(struct brw_context *brw); - /* brw_curbe.c */ void brw_upload_cs_urb_state(struct brw_context *brw); @@ -793,10 +794,6 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); -/* brw_state.c */ -void brw_enable(struct gl_context * ctx, GLenum cap, GLboolean state); -void brw_depth_range(struct gl_context *ctx, GLclampd nearval, GLclampd farval); - /*====================================================================== * Inline conversion functions. These are better-typed than the * macros used previously: diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 2db70c543ea..9ab533179b8 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -28,6 +28,8 @@ #include "main/glheader.h" #include "main/context.h" +#include "main/condrender.h" +#include "main/samplerobj.h" #include "main/state.h" #include "main/enums.h" #include "tnl/tnl.h" @@ -278,22 +280,25 @@ static GLboolean check_fallbacks( struct brw_context *brw, int u; for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; + if (texUnit->Enabled) { + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, u); + if (texUnit->Enabled & TEXTURE_1D_BIT) { - if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->Sampler.WrapS == GL_CLAMP) { + if (sampler->WrapS == GL_CLAMP) { return GL_TRUE; } } if (texUnit->Enabled & TEXTURE_2D_BIT) { - if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->Sampler.WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_2D_INDEX]->Sampler.WrapT == GL_CLAMP) { + if (sampler->WrapS == GL_CLAMP || + sampler->WrapT == GL_CLAMP) { return GL_TRUE; } } if (texUnit->Enabled & TEXTURE_3D_BIT) { - if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapT == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapR == GL_CLAMP) { + if (sampler->WrapS == GL_CLAMP || + sampler->WrapT == GL_CLAMP || + sampler->WrapR == GL_CLAMP) { return GL_TRUE; } } @@ -359,15 +364,21 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx, for (i = 0; i < nr_prims; i++) { uint32_t hw_prim; + int estimated_max_prim_size; + + estimated_max_prim_size = 512; /* batchbuffer commands */ + estimated_max_prim_size += (BRW_MAX_TEX_UNIT * + (sizeof(struct brw_sampler_state) + + sizeof(struct gen5_sampler_default_color))); + estimated_max_prim_size += 1024; /* gen6 VS push constants */ + estimated_max_prim_size += 1024; /* gen6 WM push constants */ + estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the - * primitives. This fraction is just a guess (minimal full state plus - * a primitive is around 512 bytes), and would be better if we had - * an upper bound of how much we might emit in a single - * brw_try_draw_prims(). + * primitives. */ - intel_batchbuffer_require_space(intel, 1024, false); + intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); hw_prim = brw_set_prim(brw, &prim[i]); if (brw->state.dirty.brw) { @@ -438,6 +449,9 @@ void brw_draw_prims( struct gl_context *ctx, { GLboolean retval; + if (!_mesa_check_conditional_render(ctx)) + return; + if (!vbo_all_varyings_in_vbos(arrays)) { if (!index_bounds_valid) vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 718b3800423..4eb67d57a5a 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -293,6 +293,14 @@ static INLINE struct brw_reg retype( struct brw_reg reg, return reg; } +static inline struct brw_reg +sechalf(struct brw_reg reg) +{ + if (reg.vstride) + reg.nr++; + return reg; +} + static INLINE struct brw_reg suboffset( struct brw_reg reg, GLuint delta ) { @@ -856,7 +864,6 @@ void brw_ff_sync(struct brw_compile *p, void brw_fb_WRITE(struct brw_compile *p, int dispatch_width, - struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, GLuint binding_table_index, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 71485cd1f71..859068ec4eb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1871,7 +1871,6 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, void brw_fb_WRITE(struct brw_compile *p, int dispatch_width, - struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, GLuint binding_table_index, @@ -1883,6 +1882,12 @@ void brw_fb_WRITE(struct brw_compile *p, struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_control, msg_type; + struct brw_reg dest; + + if (dispatch_width == 16) + dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + else + dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); if (intel->gen >= 6 && binding_table_index == 0) { insn = next_insn(p, BRW_OPCODE_SENDC); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5426925e372..21eb9e4e5e1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -194,6 +194,32 @@ fs_visitor::fail(const char *format, ...) } } +void +fs_visitor::push_force_uncompressed() +{ + force_uncompressed_stack++; +} + +void +fs_visitor::pop_force_uncompressed() +{ + force_uncompressed_stack--; + assert(force_uncompressed_stack >= 0); +} + +void +fs_visitor::push_force_sechalf() +{ + force_sechalf_stack++; +} + +void +fs_visitor::pop_force_sechalf() +{ + force_sechalf_stack--; + assert(force_sechalf_stack >= 0); +} + /** * Returns how many MRFs an FS opcode will write over. * @@ -214,9 +240,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case FS_OPCODE_LOG2: case FS_OPCODE_SIN: case FS_OPCODE_COS: - return 1; + return 1 * c->dispatch_width / 8; case FS_OPCODE_POW: - return 2; + return 2 * c->dispatch_width / 8; case FS_OPCODE_TEX: case FS_OPCODE_TXB: case FS_OPCODE_TXD: @@ -313,6 +339,31 @@ fs_visitor::variable_storage(ir_variable *var) return (fs_reg *)hash_table_find(this->variable_ht, var); } +void +import_uniforms_callback(const void *key, + void *data, + void *closure) +{ + struct hash_table *dst_ht = (struct hash_table *)closure; + const fs_reg *reg = (const fs_reg *)data; + + if (reg->file != UNIFORM) + return; + + hash_table_insert(dst_ht, data, key); +} + +/* For 16-wide, we need to follow from the uniform setup of 8-wide dispatch. + * This brings in those uniform definitions + */ +void +fs_visitor::import_uniforms(struct hash_table *src_variable_ht) +{ + hash_table_call_foreach(src_variable_ht, + import_uniforms_callback, + variable_ht); +} + /* Our support for uniforms is piggy-backed on the struct * gl_fragment_program, because that's where the values actually * get stored, rather than in some global gl_shader_program uniform @@ -614,7 +665,7 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) if (intel->gen < 6) { inst->base_mrf = 2; - inst->mlen = 1; + inst->mlen = c->dispatch_width / 8; } return inst; @@ -652,7 +703,7 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) inst = emit(opcode, dst, src0, reg_null_f); inst->base_mrf = base_mrf; - inst->mlen = 2; + inst->mlen = 2 * c->dispatch_width / 8; } return inst; } @@ -689,6 +740,13 @@ fs_visitor::visit(ir_variable *ir) if (ir->mode == ir_var_uniform) { int param_index = c->prog_data.nr_params; + if (c->dispatch_width == 16) { + if (!variable_storage(ir)) { + fail("Failed to find uniform '%s' in 16-wide\n", ir->name); + } + return; + } + if (!strncmp(ir->name, "gl_", 3)) { setup_builtin_uniform_values(ir); } else { @@ -1233,32 +1291,34 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) return inst; } +/* gen5's sampler has slots for u, v, r, array index, then optional + * parameters like shadow comparitor or LOD bias. If optional + * parameters aren't present, those base slots are optional and don't + * need to be included in the message. + * + * We don't fill in the unnecessary slots regardless, which may look + * surprising in the disassembly. + */ fs_inst * fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) { - /* gen5's SIMD8 sampler has slots for u, v, r, array index, then - * optional parameters like shadow comparitor or LOD bias. If - * optional parameters aren't present, those base slots are - * optional and don't need to be included in the message. - * - * We don't fill in the unnecessary slots regardless, which may - * look surprising in the disassembly. - */ int mlen = 1; /* g0 header always present. */ int base_mrf = 1; + int reg_width = c->dispatch_width / 8; for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width), + coordinate); coordinate.reg_offset++; } - mlen += ir->coordinate->type->vector_elements; + mlen += ir->coordinate->type->vector_elements * reg_width; if (ir->shadow_comparitor) { - mlen = MAX2(mlen, 5); + mlen = MAX2(mlen, 1 + 4 * reg_width); ir->shadow_comparitor->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen++; + mlen += reg_width; } fs_inst *inst = NULL; @@ -1268,17 +1328,18 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) break; case ir_txb: ir->lod_info.bias->accept(this); - mlen = MAX2(mlen, 5); + mlen = MAX2(mlen, 1 + 4 * reg_width); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen++; + mlen += reg_width; inst = emit(FS_OPCODE_TXB, dst); + break; case ir_txl: ir->lod_info.lod->accept(this); - mlen = MAX2(mlen, 5); + mlen = MAX2(mlen, 1 + 4 * reg_width); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen++; + mlen += reg_width; inst = emit(FS_OPCODE_TXL, dst); break; @@ -1290,6 +1351,10 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) inst->base_mrf = base_mrf; inst->mlen = mlen; + if (mlen > 11) { + fail("Message length >11 disallowed by hardware\n"); + } + return inst; } @@ -1355,6 +1420,12 @@ fs_visitor::visit(ir_texture *ir) 0 }; + if (c->dispatch_width == 16) { + fail("rectangle scale uniform setup not supported on 16-wide\n"); + this->result = fs_reg(this, ir->type); + return; + } + c->prog_data.param_convert[c->prog_data.nr_params] = PARAM_NO_CONVERT; c->prog_data.param_convert[c->prog_data.nr_params + 1] = @@ -1731,6 +1802,10 @@ fs_visitor::visit(ir_if *ir) { fs_inst *inst; + if (c->dispatch_width == 16) { + fail("Can't support (non-uniform) control flow on 16-wide\n"); + } + /* Don't point the annotation at the if statement, because then it plus * the then and else blocks get printed. */ @@ -1771,6 +1846,10 @@ fs_visitor::visit(ir_loop *ir) { fs_reg counter = reg_undef; + if (c->dispatch_width == 16) { + fail("Can't support (non-uniform) control flow on 16-wide\n"); + } + if (ir->counter) { this->base_ir = ir->counter; ir->counter->accept(this); @@ -1874,6 +1953,11 @@ fs_visitor::emit(fs_inst inst) fs_inst *list_inst = new(mem_ctx) fs_inst; *list_inst = inst; + if (force_uncompressed_stack > 0) + list_inst->force_uncompressed = true; + else if (force_sechalf_stack > 0) + list_inst->force_sechalf = true; + list_inst->annotation = this->current_annotation; list_inst->ir = this->base_ir; @@ -1916,21 +2000,14 @@ fs_visitor::interp_reg(int location, int channel) void fs_visitor::emit_interpolation_setup_gen4() { - struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - this->current_annotation = "compute pixel centers"; this->pixel_x = fs_reg(this, glsl_type::uint_type); this->pixel_y = fs_reg(this, glsl_type::uint_type); this->pixel_x.type = BRW_REGISTER_TYPE_UW; this->pixel_y.type = BRW_REGISTER_TYPE_UW; - emit(BRW_OPCODE_ADD, - this->pixel_x, - fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010))); - emit(BRW_OPCODE_ADD, - this->pixel_y, - fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100))); + + emit(FS_OPCODE_PIXEL_X, this->pixel_x); + emit(FS_OPCODE_PIXEL_Y, this->pixel_y); this->current_annotation = "compute pixel deltas from v0"; if (brw->has_pln) { @@ -2001,11 +2078,69 @@ fs_visitor::emit_interpolation_setup_gen6() } void +fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) +{ + int reg_width = c->dispatch_width / 8; + + if (c->dispatch_width == 8 || intel->gen == 6) { + /* SIMD8 write looks like: + * m + 0: r0 + * m + 1: r1 + * m + 2: g0 + * m + 3: g1 + * + * gen6 SIMD16 DP write looks like: + * m + 0: r0 + * m + 1: r1 + * m + 2: g0 + * m + 3: g1 + * m + 4: b0 + * m + 5: b1 + * m + 6: a0 + * m + 7: a1 + */ + emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width), + color); + } else { + /* pre-gen6 SIMD16 single source DP write looks like: + * m + 0: r0 + * m + 1: g0 + * m + 2: b0 + * m + 3: a0 + * m + 4: r1 + * m + 5: g1 + * m + 6: b1 + * m + 7: a1 + */ + if (brw->has_compr4) { + /* By setting the high bit of the MRF register number, we + * indicate that we want COMPR4 mode - instead of doing the + * usual destination + 1 for the second half we get + * destination + 4. + */ + emit(BRW_OPCODE_MOV, + fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color); + } else { + push_force_uncompressed(); + emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color); + pop_force_uncompressed(); + + push_force_sechalf(); + color.sechalf = true; + emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color); + pop_force_sechalf(); + color.sechalf = false; + } + } +} + +void fs_visitor::emit_fb_writes() { this->current_annotation = "FB write header"; GLboolean header_present = GL_TRUE; int nr = 0; + int reg_width = c->dispatch_width / 8; if (intel->gen >= 6 && !this->kill_emitted && @@ -2019,31 +2154,44 @@ fs_visitor::emit_fb_writes() } if (c->aa_dest_stencil_reg) { + push_force_uncompressed(); emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))); + pop_force_uncompressed(); } /* Reserve space for color. It'll be filled in per MRT below. */ int color_mrf = nr; - nr += 4; + nr += 4 * reg_width; if (c->source_depth_to_render_target) { + if (intel->gen == 6 && c->dispatch_width == 16) { + /* For outputting oDepth on gen6, SIMD8 writes have to be + * used. This would require 8-wide moves of each half to + * message regs, kind of like pre-gen5 SIMD16 FB writes. + * Just bail on doing so for now. + */ + fail("Missing support for simd16 depth writes on gen6\n"); + } + if (c->computes_depth) { /* Hand over gl_FragDepth. */ assert(this->frag_depth); fs_reg depth = *(variable_storage(this->frag_depth)); - emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth); + emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth); } else { /* Pass through the payload depth. */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), + emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); } + nr += reg_width; } if (c->dest_depth_reg) { - emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), + emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))); + nr += reg_width; } fs_reg color = reg_undef; @@ -2060,7 +2208,7 @@ fs_visitor::emit_fb_writes() target); if (this->frag_color || this->frag_data) { for (int i = 0; i < 4; i++) { - emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + i), color); + emit_color_write(i, color_mrf, color); color.reg_offset++; } } @@ -2084,7 +2232,7 @@ fs_visitor::emit_fb_writes() * renderbuffer. */ color.reg_offset += 3; - emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + 3), color); + emit_color_write(3, color_mrf, color); } fs_inst *inst = emit(FS_OPCODE_FB_WRITE); @@ -2144,8 +2292,7 @@ fs_visitor::generate_fb_write(fs_inst *inst) brw_pop_insn_state(p); brw_fb_WRITE(p, - 8, /* dispatch_width */ - retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + c->dispatch_width, inst->base_mrf, implied_header, inst->target, @@ -2155,6 +2302,40 @@ fs_visitor::generate_fb_write(fs_inst *inst) inst->header_present); } +/* Computes the integer pixel x,y values from the origin. + * + * This is the basis of gl_FragCoord computation, but is also used + * pre-gen6 for computing the deltas from v0 for computing + * interpolation. + */ +void +fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x) +{ + struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + struct brw_reg src; + struct brw_reg deltas; + + if (is_x) { + src = stride(suboffset(g1_uw, 4), 2, 4, 0); + deltas = brw_imm_v(0x10101010); + } else { + src = stride(suboffset(g1_uw, 5), 2, 4, 0); + deltas = brw_imm_v(0x11001100); + } + + if (c->dispatch_width == 16) { + dst = vec16(dst); + } + + /* We do this 8 or 16-wide, but since the destination is UW we + * don't do compression in the 16-wide case. + */ + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_ADD(p, dst, src, deltas); + brw_pop_insn_state(p); +} + void fs_visitor::generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src) @@ -2214,8 +2395,16 @@ fs_visitor::generate_math(fs_inst *inst, assert(inst->mlen == 0); if (inst->opcode == FS_OPCODE_POW) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math2(p, dst, op, src[0], src[1]); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1])); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst, op, inst->saturate ? BRW_MATH_SATURATE_SATURATE : @@ -2223,10 +2412,23 @@ fs_visitor::generate_math(fs_inst *inst, 0, src[0], BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, sechalf(dst), + op, + inst->saturate ? BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE, + 0, sechalf(src[0]), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } } - } else { + } else /* gen <= 5 */{ assert(inst->mlen >= 1); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst, op, inst->saturate ? BRW_MATH_SATURATE_SATURATE : @@ -2234,6 +2436,19 @@ fs_visitor::generate_math(fs_inst *inst, inst->base_mrf, src[0], BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, sechalf(dst), + op, + inst->saturate ? BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE, + inst->base_mrf + 1, sechalf(src[0]), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } } } @@ -2244,6 +2459,12 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) int rlen = 4; uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; + if (c->dispatch_width == 16) { + rlen = 8; + dst = vec16(dst); + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } + if (intel->gen >= 5) { switch (inst->opcode) { case FS_OPCODE_TEX: @@ -2311,11 +2532,6 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) } assert(msg_type != -1); - if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { - rlen = 8; - dst = vec16(dst); - } - brw_SAMPLE(p, retype(dst, BRW_REGISTER_TYPE_UW), inst->base_mrf, @@ -2408,6 +2624,7 @@ fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask) } else { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */ brw_pop_insn_state(p); } @@ -2432,6 +2649,7 @@ fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_AND(p, g1, f0, g1); brw_pop_insn_state(p); } else { @@ -2441,6 +2659,7 @@ fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_AND(p, g0, mask, g0); brw_pop_insn_state(p); } @@ -2527,6 +2746,9 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) void fs_visitor::setup_paramvalues_refs() { + if (c->dispatch_width != 8) + return; + /* Set up the pointers to ParamValues now that that array is finalized. */ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { c->prog_data.param[i] = @@ -2538,8 +2760,12 @@ fs_visitor::setup_paramvalues_refs() void fs_visitor::assign_curb_setup() { - c->prog_data.first_curbe_grf = c->nr_payload_regs; c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; + if (c->dispatch_width == 8) { + c->prog_data.first_curbe_grf = c->nr_payload_regs; + } else { + c->prog_data.first_curbe_grf_16 = c->nr_payload_regs; + } /* Map the offsets in the UNIFORM file to fixed HW regs. */ foreach_iter(exec_list_iterator, iter, this->instructions) { @@ -2548,7 +2774,7 @@ fs_visitor::assign_curb_setup() for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == UNIFORM) { int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; - struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + + struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs + constant_nr / 8, constant_nr % 8); @@ -2600,7 +2826,7 @@ fs_visitor::calculate_urb_setup() void fs_visitor::assign_urb_setup() { - int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; + int urb_start = c->nr_payload_regs + c->prog_data.curb_read_length; /* Offset all the urb_setup[] index by the actual position of the * setup regs, now that the location of the constants has been chosen. @@ -2725,6 +2951,11 @@ fs_visitor::setup_pull_constants() if (c->prog_data.nr_params <= max_uniform_components) return; + if (c->dispatch_width == 16) { + fail("Pull constants not supported in 16-wide\n"); + return; + } + /* Just demote the end of the list. We could probably do better * here, demoting things that are rarely used in the program first. */ @@ -2884,7 +3115,9 @@ fs_visitor::propagate_constants() if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || inst->dst.file != GRF || inst->src[0].file != IMM || - inst->dst.type != inst->src[0].type) + inst->dst.type != inst->src[0].type || + (c->dispatch_width == 16 && + (inst->force_uncompressed || inst->force_sechalf))) continue; /* Don't bother with cases where we should have had the @@ -3152,6 +3385,20 @@ fs_visitor::compute_to_mrf() inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1) continue; + /* Work out which hardware MRF registers are written by this + * instruction. + */ + int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int mrf_high; + if (inst->dst.hw_reg & BRW_MRF_COMPR4) { + mrf_high = mrf_low + 4; + } else if (c->dispatch_width == 16 && + (!inst->force_uncompressed && !inst->force_sechalf)) { + mrf_high = mrf_low + 1; + } else { + mrf_high = mrf_low; + } + /* Can't compute-to-MRF this GRF if someone else was going to * read it later. */ @@ -3179,11 +3426,21 @@ fs_visitor::compute_to_mrf() } /* If it's predicated, it (probably) didn't populate all - * the channels. + * the channels. We might be able to rewrite everything + * that writes that reg, but it would require smarter + * tracking to delay the rewriting until complete success. */ if (scan_inst->predicated) break; + /* If it's half of register setup and not the same half as + * our MOV we're trying to remove, bail for now. + */ + if (scan_inst->force_uncompressed != inst->force_uncompressed || + scan_inst->force_sechalf != inst->force_sechalf) { + break; + } + /* SEND instructions can't have MRF as a destination. */ if (scan_inst->mlen) break; @@ -3233,12 +3490,29 @@ fs_visitor::compute_to_mrf() if (interfered) break; - if (scan_inst->dst.file == MRF && - scan_inst->dst.hw_reg == inst->dst.hw_reg) { - /* Somebody else wrote our MRF here, so we can't can't + if (scan_inst->dst.file == MRF) { + /* If somebody else writes our MRF here, we can't * compute-to-MRF before that. */ - break; + int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int scan_mrf_high; + + if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) { + scan_mrf_high = scan_mrf_low + 4; + } else if (c->dispatch_width == 16 && + (!scan_inst->force_uncompressed && + !scan_inst->force_sechalf)) { + scan_mrf_high = scan_mrf_low + 1; + } else { + scan_mrf_high = scan_mrf_low; + } + + if (mrf_low == scan_mrf_low || + mrf_low == scan_mrf_high || + mrf_high == scan_mrf_low || + mrf_high == scan_mrf_high) { + break; + } } if (scan_inst->mlen > 0) { @@ -3247,8 +3521,12 @@ fs_visitor::compute_to_mrf() * scan_inst->mlen - 1. Don't go pushing our MRF write up * above it. */ - if (inst->dst.hw_reg >= scan_inst->base_mrf && - inst->dst.hw_reg < scan_inst->base_mrf + scan_inst->mlen) { + if (mrf_low >= scan_inst->base_mrf && + mrf_low < scan_inst->base_mrf + scan_inst->mlen) { + break; + } + if (mrf_high >= scan_inst->base_mrf && + mrf_high < scan_inst->base_mrf + scan_inst->mlen) { break; } } @@ -3268,6 +3546,10 @@ fs_visitor::remove_duplicate_mrf_writes() fs_inst *last_mrf_move[16]; bool progress = false; + /* Need to update the MRF tracking for compressed instructions. */ + if (c->dispatch_width == 16) + return false; + memset(last_mrf_move, 0, sizeof(last_mrf_move)); foreach_iter(exec_list_iterator, iter, this->instructions) { @@ -3347,6 +3629,29 @@ fs_visitor::virtual_grf_interferes(int a, int b) (this->virtual_grf_use[b] != -1 || this->virtual_grf_def[b] == MAX_INSTRUCTION)); + /* If the register is used to store 16 values of less than float + * size (only the case for pixel_[xy]), then we can't allocate + * another dword-sized thing to that register that would be used in + * the same instruction. This is because when the GPU decodes (for + * example): + * + * (declare (in ) vec4 gl_FragCoord@0x97766a0) + * add(16) g6<1>F g6<8,8,1>UW 0.5F { align1 compr }; + * + * it's actually processed as: + * add(8) g6<1>F g6<8,8,1>UW 0.5F { align1 }; + * add(8) g7<1>F g6.8<8,8,1>UW 0.5F { align1 sechalf }; + * + * so our second half values in g6 got overwritten in the first + * half. + */ + if (c->dispatch_width == 16 && (this->pixel_x.reg == a || + this->pixel_x.reg == b || + this->pixel_y.reg == a || + this->pixel_y.reg == b)) { + return start <= end; + } + return start < end; } @@ -3366,6 +3671,8 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) reg->hw_reg, reg->smear); } brw_reg = retype(brw_reg, reg->type); + if (reg->sechalf) + brw_reg = sechalf(brw_reg); break; case IMM: switch (reg->type) { @@ -3411,7 +3718,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) void fs_visitor::generate_code() { - int last_native_inst = 0; + int last_native_inst = p->nr_insn; const char *last_annotation_string = NULL; ir_instruction *last_annotation_ir = NULL; @@ -3427,8 +3734,8 @@ fs_visitor::generate_code() if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("Native code for fragment shader %d:\n", - ctx->Shader.CurrentFragmentProgram->Name); + printf("Native code for fragment shader %d (%d-wide dispatch):\n", + ctx->Shader.CurrentFragmentProgram->Name, c->dispatch_width); } foreach_iter(exec_list_iterator, iter, this->instructions) { @@ -3461,6 +3768,14 @@ fs_visitor::generate_code() brw_set_predicate_inverse(p, inst->predicate_inverse); brw_set_saturate(p, inst->saturate); + if (inst->force_uncompressed || c->dispatch_width == 8) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + } else if (inst->force_sechalf) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } + switch (inst->opcode) { case BRW_OPCODE_MOV: brw_MOV(p, dst, src[0]); @@ -3602,6 +3917,12 @@ fs_visitor::generate_code() case FS_OPCODE_COS: generate_math(inst, dst, src); break; + case FS_OPCODE_PIXEL_X: + generate_pixel_xy(dst, true); + break; + case FS_OPCODE_PIXEL_Y: + generate_pixel_xy(dst, false); + break; case FS_OPCODE_CINTERP: brw_MOV(p, dst, src[0]); break; @@ -3668,6 +3989,10 @@ fs_visitor::generate_code() last_native_inst = p->nr_insn; } + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + printf("\n"); + } + ralloc_free(if_stack); ralloc_free(loop_stack); ralloc_free(if_depth_in_loop); @@ -3693,108 +4018,146 @@ fs_visitor::generate_code() } } -GLboolean -brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) +bool +fs_visitor::run() { - struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &intel->ctx; - struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; + uint32_t prog_offset_16 = 0; + uint32_t orig_nr_params = c->prog_data.nr_params; - if (!prog) - return GL_FALSE; + brw_wm_payload_setup(brw, c); - struct brw_shader *shader = - (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - if (!shader) - return GL_FALSE; + if (c->dispatch_width == 16) { + /* align to 64 byte boundary. */ + while ((c->func.nr_insn * sizeof(struct brw_instruction)) % 64) { + brw_NOP(p); + } - /* We always use 8-wide mode, at least for now. For one, flow - * control only works in 8-wide. Also, when we're fragment shader - * bound, we're almost always under register pressure as well, so - * 8-wide would save us from the performance cliff of spilling - * regs. - */ - c->dispatch_width = 8; + /* Save off the start of this 16-wide program in case we succeed. */ + prog_offset_16 = c->func.nr_insn * sizeof(struct brw_instruction); - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("GLSL IR for native fragment shader %d:\n", prog->Name); - _mesa_print_ir(shader->ir, NULL); - printf("\n"); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); } - /* Now the main event: Visit the shader IR and generate our FS IR for it. - */ - fs_visitor v(c, shader); - if (0) { - v.emit_dummy_fs(); + emit_dummy_fs(); } else { - v.calculate_urb_setup(); + calculate_urb_setup(); if (intel->gen < 6) - v.emit_interpolation_setup_gen4(); + emit_interpolation_setup_gen4(); else - v.emit_interpolation_setup_gen6(); + emit_interpolation_setup_gen6(); /* Generate FS IR for main(). (the visitor only descends into * functions called "main"). */ foreach_iter(exec_list_iterator, iter, *shader->ir) { ir_instruction *ir = (ir_instruction *)iter.get(); - v.base_ir = ir; - ir->accept(&v); + base_ir = ir; + ir->accept(this); } - v.emit_fb_writes(); + emit_fb_writes(); - v.split_virtual_grfs(); + split_virtual_grfs(); - v.setup_paramvalues_refs(); - v.setup_pull_constants(); + setup_paramvalues_refs(); + setup_pull_constants(); bool progress; do { progress = false; - progress = v.remove_duplicate_mrf_writes() || progress; + progress = remove_duplicate_mrf_writes() || progress; - progress = v.propagate_constants() || progress; - progress = v.register_coalesce() || progress; - progress = v.compute_to_mrf() || progress; - progress = v.dead_code_eliminate() || progress; + progress = propagate_constants() || progress; + progress = register_coalesce() || progress; + progress = compute_to_mrf() || progress; + progress = dead_code_eliminate() || progress; } while (progress); - v.schedule_instructions(); + schedule_instructions(); - v.assign_curb_setup(); - v.assign_urb_setup(); + assign_curb_setup(); + assign_urb_setup(); if (0) { /* Debug of register spilling: Go spill everything. */ - int virtual_grf_count = v.virtual_grf_next; + int virtual_grf_count = virtual_grf_next; for (int i = 1; i < virtual_grf_count; i++) { - v.spill_reg(i); + spill_reg(i); } } if (0) - v.assign_regs_trivial(); + assign_regs_trivial(); else { - while (!v.assign_regs()) { - if (v.failed) + while (!assign_regs()) { + if (failed) break; } } } + assert(force_uncompressed_stack == 0); + assert(force_sechalf_stack == 0); - if (!v.failed) - v.generate_code(); + if (failed) + return false; - assert(!v.failed); /* FINISHME: Cleanly fail, tested at link time, etc. */ + generate_code(); - if (v.failed) - return GL_FALSE; + if (c->dispatch_width == 8) { + c->prog_data.total_grf = grf_used; + } else { + c->prog_data.total_grf_16 = grf_used; + c->prog_data.prog_offset_16 = prog_offset_16; - c->prog_data.total_grf = v.grf_used; + /* Make sure we didn't try to sneak in an extra uniform */ + assert(orig_nr_params == c->prog_data.nr_params); + } - return GL_TRUE; + return !failed; +} + +bool +brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; + + if (!prog) + return false; + + struct brw_shader *shader = + (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + if (!shader) + return false; + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + printf("GLSL IR for native fragment shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n\n"); + } + + /* Now the main event: Visit the shader IR and generate our FS IR for it. + */ + c->dispatch_width = 8; + + fs_visitor v(c, shader); + if (!v.run()) { + /* FINISHME: Cleanly fail, test at link time, etc. */ + assert(!"not reached"); + return false; + } + + if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) { + c->dispatch_width = 16; + fs_visitor v2(c, shader); + v2.import_uniforms(v.variable_ht); + v2.run(); + } + + c->prog_data.dispatch_width = 8; + + return true; } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index f792906cfe7..518d09180c4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -67,6 +67,8 @@ enum fs_opcodes { FS_OPCODE_COS, FS_OPCODE_DDX, FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, FS_OPCODE_CINTERP, FS_OPCODE_LINTERP, FS_OPCODE_TEX, @@ -176,6 +178,7 @@ public: int type; bool negate; bool abs; + bool sechalf; struct brw_reg fixed_hw_reg; int smear; /* -1, or a channel of the reg to smear to all channels. */ @@ -341,6 +344,8 @@ public: bool eot; bool header_present; bool shadow_compare; + bool force_uncompressed; + bool force_sechalf; uint32_t offset; /* spill/unspill offset */ /** @{ @@ -403,6 +408,8 @@ public: this->live_intervals_valid = false; this->kill_emitted = false; + this->force_uncompressed_stack = 0; + this->force_sechalf_stack = 0; } ~fs_visitor() @@ -413,6 +420,7 @@ public: fs_reg *variable_storage(ir_variable *var); int virtual_grf_alloc(int size); + void import_uniforms(struct hash_table *src_variable_ht); void visit(ir_variable *ir); void visit(ir_assignment *ir); @@ -459,6 +467,7 @@ public: return emit(fs_inst(opcode, dst, src0, src1, src2)); } + bool run(); void setup_paramvalues_refs(); void assign_curb_setup(); void calculate_urb_setup(); @@ -479,8 +488,14 @@ public: void schedule_instructions(); void fail(const char *msg, ...); + void push_force_uncompressed(); + void pop_force_uncompressed(); + void push_force_sechalf(); + void pop_force_sechalf(); + void generate_code(); void generate_fb_write(fs_inst *inst); + void generate_pixel_xy(struct brw_reg dst, bool is_x); void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); @@ -508,6 +523,7 @@ public: void emit_if_gen6(ir_if *ir); void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); + void emit_color_write(int index, int first_color_mrf, fs_reg color); void emit_fb_writes(); void emit_assignment_writes(fs_reg &l, fs_reg &r, const glsl_type *type, bool predicated); @@ -565,6 +581,9 @@ public: fs_reg reg_null_cmp; int grf_used; + + int force_uncompressed_stack; + int force_sechalf_stack; }; GLboolean brw_do_channel_expressions(struct exec_list *instructions); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 67f29ce1816..1e2cf917116 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -48,11 +48,11 @@ extern "C" { #include "../glsl/ir_print_visitor.h" static void -assign_reg(int *reg_hw_locations, fs_reg *reg) +assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) { if (reg->file == GRF && reg->reg != 0) { assert(reg->reg_offset >= 0); - reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset; + reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; reg->reg = 0; } } @@ -63,32 +63,48 @@ fs_visitor::assign_regs_trivial() int last_grf = 0; int hw_reg_mapping[this->virtual_grf_next]; int i; + int reg_width = c->dispatch_width / 8; hw_reg_mapping[0] = 0; - hw_reg_mapping[1] = this->first_non_payload_grf; + /* Note that compressed instructions require alignment to 2 registers. */ + hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width); for (i = 2; i < this->virtual_grf_next; i++) { hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + - this->virtual_grf_sizes[i - 1]); + this->virtual_grf_sizes[i - 1] * reg_width); } - last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1]; + last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] * + reg_width); foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); - assign_reg(hw_reg_mapping, &inst->dst); - assign_reg(hw_reg_mapping, &inst->src[0]); - assign_reg(hw_reg_mapping, &inst->src[1]); + assign_reg(hw_reg_mapping, &inst->dst, reg_width); + assign_reg(hw_reg_mapping, &inst->src[0], reg_width); + assign_reg(hw_reg_mapping, &inst->src[1], reg_width); } - this->grf_used = last_grf + 1; + if (last_grf >= BRW_MAX_GRF) { + fail("Ran out of regs on trivial allocator (%d/%d)\n", + last_grf, BRW_MAX_GRF); + } + + this->grf_used = last_grf + reg_width; } bool fs_visitor::assign_regs() { + /* Most of this allocation was written for a reg_width of 1 + * (dispatch_width == 8). In extending to 16-wide, the code was + * left in place and it was converted to have the hardware + * registers it's allocating be contiguous physical pairs of regs + * for reg_width == 2. + */ + int reg_width = c->dispatch_width / 8; int last_grf = 0; int hw_reg_mapping[this->virtual_grf_next + 1]; - int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf; + int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); + int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; int class_sizes[base_reg_count]; int class_count = 0; int aligned_pair_class = -1; @@ -157,8 +173,8 @@ fs_visitor::assign_regs() if (0) { printf("%d/%d conflicts %d/%d\n", - class_sizes[i], this->first_non_payload_grf + i_r, - class_sizes[c], this->first_non_payload_grf + c_r); + class_sizes[i], first_assigned_grf + i_r, + class_sizes[c], first_assigned_grf + c_r); } ra_add_reg_conflict(regs, @@ -172,7 +188,7 @@ fs_visitor::assign_regs() /* Add a special class for aligned pairs, which we'll put delta_x/y * in on gen5 so that we can do PLN. */ - if (brw->has_pln && intel->gen < 6) { + if (brw->has_pln && reg_width == 1 && intel->gen < 6) { int reg_count = (base_reg_count - 1) / 2; int unaligned_pair_class = 1; assert(class_sizes[unaligned_pair_class] == 2); @@ -182,7 +198,7 @@ fs_visitor::assign_regs() class_sizes[aligned_pair_class] = 2; class_base_reg[aligned_pair_class] = 0; class_reg_count[aligned_pair_class] = 0; - int start = (this->first_non_payload_grf & 1) ? 1 : 0; + int start = (first_assigned_grf & 1) ? 1 : 0; for (int i = 0; i < reg_count; i++) { ra_class_add_reg(regs, classes[aligned_pair_class], @@ -228,6 +244,8 @@ fs_visitor::assign_regs() if (reg == -1) { fail("no register to spill\n"); + } else if (c->dispatch_width == 16) { + fail("no spilling support on 16-wide yet\n"); } else { spill_reg(reg); } @@ -257,7 +275,7 @@ fs_visitor::assign_regs() } assert(hw_reg >= 0); - hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg; + hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width; last_grf = MAX2(last_grf, hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); } @@ -265,12 +283,12 @@ fs_visitor::assign_regs() foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); - assign_reg(hw_reg_mapping, &inst->dst); - assign_reg(hw_reg_mapping, &inst->src[0]); - assign_reg(hw_reg_mapping, &inst->src[1]); + assign_reg(hw_reg_mapping, &inst->dst, reg_width); + assign_reg(hw_reg_mapping, &inst->src[0], reg_width); + assign_reg(hw_reg_mapping, &inst->src[1], reg_width); } - this->grf_used = last_grf + 1; + this->grf_used = last_grf + reg_width; ralloc_free(g); ralloc_free(regs); diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index bff8f82f3f7..fb1192c810a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -145,6 +145,8 @@ public: void calculate_deps(); void schedule_instructions(fs_inst *next_block_header); + bool is_compressed(fs_inst *inst); + void *mem_ctx; int instructions_to_schedule; @@ -234,6 +236,17 @@ instruction_scheduler::add_barrier_deps(schedule_node *n) } } +/* instruction scheduling needs to be aware of when an MRF write + * actually writes 2 MRFs. + */ +bool +instruction_scheduler::is_compressed(fs_inst *inst) +{ + return (v->c->dispatch_width == 16 && + !inst->force_uncompressed && + !inst->force_sechalf); +} + void instruction_scheduler::calculate_deps() { @@ -297,11 +310,24 @@ instruction_scheduler::calculate_deps() } last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - if (last_mrf_write[inst->dst.hw_reg]) { - add_dep(last_mrf_write[inst->dst.hw_reg], n, - last_mrf_write[inst->dst.hw_reg]->latency); + int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + + if (last_mrf_write[reg]) { + add_dep(last_mrf_write[reg], n, + last_mrf_write[reg]->latency); + } + last_mrf_write[reg] = n; + if (is_compressed(inst)) { + if (inst->dst.hw_reg & BRW_MRF_COMPR4) + reg += 4; + else + reg++; + if (last_mrf_write[reg]) { + add_dep(last_mrf_write[reg], n, + last_mrf_write[reg]->latency); + } + last_mrf_write[reg] = n; } - last_mrf_write[inst->dst.hw_reg] = n; } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); } @@ -369,7 +395,18 @@ instruction_scheduler::calculate_deps() if (inst->dst.file == GRF) { last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - last_mrf_write[inst->dst.hw_reg] = n; + int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + + last_mrf_write[reg] = n; + + if (is_compressed(inst)) { + if (inst->dst.hw_reg & BRW_MRF_COMPR4) + reg += 4; + else + reg++; + + last_mrf_write[reg] = n; + } } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 14ee6767cd5..f213ae20acd 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -121,13 +121,11 @@ static void compile_gs_prog( struct brw_context *brw, /* Upload */ drm_intel_bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_GS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - sizeof(c.prog_data), - &brw->gs.prog_data); + brw->gs.prog_bo = brw_upload_cache(&brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->gs.prog_data); } static const GLenum gs_prim[GL_POLYGON+1] = { @@ -193,7 +191,6 @@ static void prepare_gs_prog(struct brw_context *brw) if (brw->gs.prog_active) { brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, &key, sizeof(key), - NULL, 0, &brw->gs.prog_data); if (brw->gs.prog_bo == NULL) compile_gs_prog( brw, &key ); diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 69a5f7a6667..542874b7706 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -35,112 +35,65 @@ #include "brw_state.h" #include "brw_defines.h" -struct brw_gs_unit_key { - unsigned int total_grf; - unsigned int urb_entry_read_length; - - unsigned int curbe_offset; - - unsigned int nr_urb_entries, urb_size; - GLboolean prog_active; -}; - static void -gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key) -{ - memset(key, 0, sizeof(*key)); - - /* CACHE_NEW_GS_PROG */ - key->prog_active = brw->gs.prog_active; - if (key->prog_active) { - key->total_grf = brw->gs.prog_data->total_grf; - key->urb_entry_read_length = brw->gs.prog_data->urb_read_length; - } else { - key->total_grf = 1; - key->urb_entry_read_length = 1; - } - - /* BRW_NEW_CURBE_OFFSETS */ - key->curbe_offset = brw->curbe.clip_start; - - /* BRW_NEW_URB_FENCE */ - key->nr_urb_entries = brw->urb.nr_gs_entries; - key->urb_size = brw->urb.vsize; -} - -static drm_intel_bo * -gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) +brw_prepare_gs_unit(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - struct brw_gs_unit_state gs; - drm_intel_bo *bo; - - memset(&gs, 0, sizeof(gs)); - - gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; - if (key->prog_active) /* reloc */ - gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; + struct brw_gs_unit_state *gs; - gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - gs.thread1.single_program_flow = 1; + gs = brw_state_batch(brw, sizeof(*gs), 32, &brw->gs.state_offset); - gs.thread3.dispatch_grf_start_reg = 1; - gs.thread3.const_urb_entry_read_offset = 0; - gs.thread3.const_urb_entry_read_length = 0; - gs.thread3.urb_entry_read_offset = 0; - gs.thread3.urb_entry_read_length = key->urb_entry_read_length; + memset(gs, 0, sizeof(*gs)); - gs.thread4.nr_urb_entries = key->nr_urb_entries; - gs.thread4.urb_entry_allocation_size = key->urb_size - 1; - - if (key->nr_urb_entries >= 8) - gs.thread4.max_threads = 1; - else - gs.thread4.max_threads = 0; - - if (intel->gen == 5) - gs.thread4.rendering_enable = 1; - - if (unlikely(INTEL_DEBUG & DEBUG_STATS)) - gs.thread4.stats_enable = 1; - - bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, - key, sizeof(*key), - &brw->gs.prog_bo, 1, - &gs, sizeof(gs)); + /* CACHE_NEW_GS_PROG */ + if (brw->gs.prog_active) { + gs->thread0.grf_reg_count = (ALIGN(brw->gs.prog_data->total_grf, 16) / + 16 - 1); + /* reloc */ + gs->thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; + + gs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + gs->thread1.single_program_flow = 1; + + gs->thread3.dispatch_grf_start_reg = 1; + gs->thread3.const_urb_entry_read_offset = 0; + gs->thread3.const_urb_entry_read_length = 0; + gs->thread3.urb_entry_read_offset = 0; + gs->thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; + + /* BRW_NEW_URB_FENCE */ + gs->thread4.nr_urb_entries = brw->urb.nr_gs_entries; + gs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + + if (brw->urb.nr_gs_entries >= 8) + gs->thread4.max_threads = 1; + else + gs->thread4.max_threads = 0; - if (key->prog_active) { /* Emit GS program relocation */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_gs_unit_state, thread0), - brw->gs.prog_bo, gs.thread0.grf_reg_count << 1, + drm_intel_bo_emit_reloc(intel->batch.bo, + (brw->gs.state_offset + + offsetof(struct brw_gs_unit_state, thread0)), + brw->gs.prog_bo, gs->thread0.grf_reg_count << 1, I915_GEM_DOMAIN_INSTRUCTION, 0); } - return bo; -} - -static void prepare_gs_unit(struct brw_context *brw) -{ - struct brw_gs_unit_key key; + if (intel->gen == 5) + gs->thread4.rendering_enable = 1; - gs_unit_populate_key(brw, &key); + if (unlikely(INTEL_DEBUG & DEBUG_STATS)) + gs->thread4.stats_enable = 1; - drm_intel_bo_unreference(brw->gs.state_bo); - brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT, - &key, sizeof(key), - &brw->gs.prog_bo, 1, - NULL); - if (brw->gs.state_bo == NULL) { - brw->gs.state_bo = gs_unit_create_from_key(brw, &key); - } + brw->state.dirty.cache |= CACHE_NEW_GS_UNIT; } const struct brw_tracked_state brw_gs_unit = { .dirty = { .mesa = 0, - .brw = (BRW_NEW_CURBE_OFFSETS | + .brw = (BRW_NEW_BATCH | + BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_GS_PROG }, - .prepare = prepare_gs_unit, + .prepare = brw_prepare_gs_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 19eea07ebc6..7119786de42 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -143,15 +143,19 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2)); - OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->vs.state_offset); if (brw->gs.prog_active) - OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->gs.state_offset | 1); else OUT_BATCH(0); - OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->clip.state_offset | 1); OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, brw->sf.state_offset); - OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->wm.state_offset); OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, brw->cc.state_offset); ADVANCE_BATCH(); @@ -159,16 +163,6 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_PSP; } - -static void prepare_psp_urb_cbs(struct brw_context *brw) -{ - brw_add_validated_bo(brw, brw->vs.state_bo); - brw_add_validated_bo(brw, brw->gs.state_bo); - brw_add_validated_bo(brw, brw->clip.state_bo); - brw_add_validated_bo(brw, brw->sf.state_bo); - brw_add_validated_bo(brw, brw->wm.state_bo); -} - static void upload_psp_urb_cbs(struct brw_context *brw ) { upload_pipelined_state_pointers(brw); @@ -188,7 +182,6 @@ const struct brw_tracked_state brw_psp_urb_cbs = { CACHE_NEW_WM_UNIT | CACHE_NEW_CC_UNIT) }, - .prepare = prepare_psp_urb_cbs, .emit = upload_psp_urb_cbs, }; @@ -551,12 +544,28 @@ static void upload_state_base_address( struct brw_context *brw ) if (intel->gen >= 6) { BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); - OUT_BATCH(1); /* General state base address */ - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, - 1); /* Surface state base address */ - OUT_BATCH(1); /* Dynamic state base address */ - OUT_BATCH(1); /* Indirect object base address */ - OUT_BATCH(1); /* Instruction base address */ + /* General state base address: stateless DP read/write requests */ + OUT_BATCH(1); + /* Surface state base address: + * BINDING_TABLE_STATE + * SURFACE_STATE + */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); + /* Dynamic state base address: + * SAMPLER_STATE + * SAMPLER_BORDER_COLOR_STATE + * CLIP, SF, WM/CC viewport state + * COLOR_CALC_STATE + * DEPTH_STENCIL_STATE + * BLEND_STATE + * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset + * Disable is clear, which we rely on) + */ + OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER | + I915_GEM_DOMAIN_INSTRUCTION), 0, 1); + + OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ + OUT_BATCH(1); /* Instruction base address: shader kernels (incl. SIP) */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Dynamic state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 6da155b1a9b..5a03851b8e6 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -119,13 +119,11 @@ static void compile_sf_prog( struct brw_context *brw, /* Upload */ drm_intel_bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_SF_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - sizeof(c.prog_data), - &brw->sf.prog_data); + brw->sf.prog_bo = brw_upload_cache(&brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->sf.prog_data); } /* Calculate interpolants for triangle and line rasterization. @@ -194,7 +192,6 @@ static void upload_sf_prog(struct brw_context *brw) drm_intel_bo_unreference(brw->sf.prog_bo); brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, &key, sizeof(key), - NULL, 0, &brw->sf.prog_data); if (brw->sf.prog_bo == NULL) compile_sf_prog( brw, &key ); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 66d91a0bde7..78b22c4df3d 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -39,7 +39,7 @@ static void upload_sf_vp(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &brw->intel.ctx; + struct gl_context *ctx = &intel->ctx; const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport *sfv; GLfloat y_scale, y_bias; @@ -106,11 +106,6 @@ static void upload_sf_vp(struct brw_context *brw) sfv->scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; } - /* Keep a pointer to it for brw_state_dump.c */ - drm_intel_bo_unreference(brw->sf.vp_bo); - drm_intel_bo_reference(intel->batch.bo); - brw->sf.vp_bo = intel->batch.bo; - brw->state.dirty.cache |= CACHE_NEW_SF_VP; } @@ -177,7 +172,7 @@ static void upload_sf_unit( struct brw_context *brw ) sf->thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ - sf->sf5.sf_viewport_state_offset = (brw->sf.vp_bo->offset + + sf->sf5.sf_viewport_state_offset = (intel->batch.bo->offset + brw->sf.vp_offset) >> 5; /* reloc */ sf->sf5.viewport_transform = 1; diff --git a/src/mesa/drivers/dri/i965/brw_state.c b/src/mesa/drivers/dri/i965/brw_state.c deleted file mode 100644 index 13b231d5cf5..00000000000 --- a/src/mesa/drivers/dri/i965/brw_state.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt <[email protected]> - * - */ - -#include "brw_context.h" - -void -brw_enable(struct gl_context *ctx, GLenum cap, GLboolean state) -{ - struct brw_context *brw = brw_context(ctx); - - switch (cap) { - case GL_DEPTH_CLAMP: - brw_update_cc_vp(brw); - break; - } -} - -void -brw_depth_range(struct gl_context *ctx, GLclampd nearval, GLclampd farval) -{ - struct brw_context *brw = brw_context(ctx); - - if (ctx->Transform.DepthClamp) - brw_update_cc_vp(brw); -} diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 86b0caa4a4e..8b9e3a4ec5d 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -47,6 +47,7 @@ brw_add_validated_bo(struct brw_context *brw, drm_intel_bo *bo) }; extern const struct brw_tracked_state brw_blend_constant_color; +extern const struct brw_tracked_state brw_cc_vp; extern const struct brw_tracked_state brw_cc_unit; extern const struct brw_tracked_state brw_check_fallback; extern const struct brw_tracked_state brw_clip_prog; @@ -102,11 +103,11 @@ extern const struct brw_tracked_state gen6_depth_stencil_state; extern const struct brw_tracked_state gen6_gs_state; extern const struct brw_tracked_state gen6_sampler_state; extern const struct brw_tracked_state gen6_scissor_state; -extern const struct brw_tracked_state gen6_scissor_state_pointers; extern const struct brw_tracked_state gen6_sf_state; extern const struct brw_tracked_state gen6_sf_vp; extern const struct brw_tracked_state gen6_urb; extern const struct brw_tracked_state gen6_viewport_state; +extern const struct brw_tracked_state gen6_vs_constants; extern const struct brw_tracked_state gen6_vs_state; extern const struct brw_tracked_state gen6_wm_constants; extern const struct brw_tracked_state gen6_wm_state; @@ -123,38 +124,21 @@ void brw_clear_validated_bos(struct brw_context *brw); /*********************************************************************** * brw_state_cache.c */ -drm_intel_bo *brw_cache_data(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - GLuint size); drm_intel_bo *brw_upload_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_sz, - drm_intel_bo **reloc_bufs, - GLuint nr_reloc_bufs, const void *data, - GLuint data_sz); - -drm_intel_bo *brw_upload_cache_with_auxdata(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_sz, - drm_intel_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_sz, - const void *aux, - GLuint aux_sz, - void *aux_return); + GLuint data_sz, + const void *aux, + GLuint aux_sz, + void *aux_return); drm_intel_bo *brw_search_cache( struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - drm_intel_bo **reloc_bufs, - GLuint nr_reloc_bufs, void *aux_return); void brw_state_cache_check_size( struct brw_context *brw ); diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 01eeb19a684..f13a41fa7cc 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -31,29 +31,17 @@ /** @file brw_state_cache.c * - * This file implements a simple static state cache for 965. The consumers - * can query the hash table of state using a cache_id, opaque key data, - * and list of buffers that will be used in relocations, and receive the - * corresponding state buffer object of state (plus associated auxiliary - * data) in return. + * This file implements a simple static state cache for 965. The + * consumers can query the hash table of state using a cache_id, + * opaque key data, and receive the corresponding state buffer object + * of state (plus associated auxiliary data) in return. Objects in + * the cache may not have relocations (pointers to other BOs) in them. * - * The inner workings are a simple hash table based on a CRC of the key data. - * The cache_id and relocation target buffers associated with the state - * buffer are included as auxiliary key data, but are not part of the hash - * value (this should be fixed, but will likely be fixed instead by making - * consumers use structured keys). + * The inner workings are a simple hash table based on a CRC of the + * key data. * - * Replacement is not implemented. Instead, when the cache gets too big, at - * a safe point (unlock) we throw out all of the cache data and let it - * regenerate for the next rendering operation. - * - * The reloc_buf pointers need to be included as key data, otherwise the - * non-unique values stuffed in the offset in key data through - * brw_cache_data() may result in successful probe for state buffers - * even when the buffer being referenced doesn't match. The result would be - * that the same state cache entry is used twice for different buffers, - * only one of the two buffers referenced gets put into the offset, and the - * incorrect program is run for the other instance. + * Replacement is not implemented. Instead, when the cache gets too + * big we throw out all of the cache data and let it get regenerated. */ #include "main/imports.h" @@ -76,13 +64,6 @@ hash_key(struct brw_cache_item *item) hash = (hash << 5) | (hash >> 27); } - /* Include the BO pointers as key data as well */ - ikey = (GLuint *)item->reloc_bufs; - for (i = 0; i < item->nr_reloc_bufs * sizeof(drm_intel_bo *) / 4; i++) { - hash ^= ikey[i]; - hash = (hash << 5) | (hash >> 27); - } - return hash; } @@ -110,10 +91,7 @@ brw_cache_item_equals(const struct brw_cache_item *a, return a->cache_id == b->cache_id && a->hash == b->hash && a->key_size == b->key_size && - (memcmp(a->key, b->key, a->key_size) == 0) && - a->nr_reloc_bufs == b->nr_reloc_bufs && - (memcmp(a->reloc_bufs, b->reloc_bufs, - a->nr_reloc_bufs * sizeof(drm_intel_bo *)) == 0); + (memcmp(a->key, b->key, a->key_size) == 0); } static struct brw_cache_item * @@ -170,9 +148,7 @@ rehash(struct brw_cache *cache) drm_intel_bo * brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - drm_intel_bo **reloc_bufs, GLuint nr_reloc_bufs, + const void *key, GLuint key_size, void *aux_return) { struct brw_cache_item *item; @@ -182,8 +158,6 @@ brw_search_cache(struct brw_cache *cache, lookup.cache_id = cache_id; lookup.key = key; lookup.key_size = key_size; - lookup.reloc_bufs = reloc_bufs; - lookup.nr_reloc_bufs = nr_reloc_bufs; hash = hash_key(&lookup); lookup.hash = hash; @@ -203,30 +177,24 @@ brw_search_cache(struct brw_cache *cache, drm_intel_bo * -brw_upload_cache_with_auxdata(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - drm_intel_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_size, - const void *aux, - GLuint aux_size, - void *aux_return) +brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + const void *data, + GLuint data_size, + const void *aux, + GLuint aux_size, + void *aux_return) { struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); GLuint hash; - GLuint relocs_size = nr_reloc_bufs * sizeof(drm_intel_bo *); void *tmp; drm_intel_bo *bo; - int i; item->cache_id = cache_id; item->key = key; item->key_size = key_size; - item->reloc_bufs = reloc_bufs; - item->nr_reloc_bufs = nr_reloc_bufs; hash = hash_key(item); item->hash = hash; @@ -235,19 +203,13 @@ brw_upload_cache_with_auxdata(struct brw_cache *cache, cache->name[cache_id], data_size, 1 << 6); - /* Set up the memory containing the key, aux_data, and reloc_bufs */ - tmp = malloc(key_size + aux_size + relocs_size); + /* Set up the memory containing the key and aux_data */ + tmp = malloc(key_size + aux_size); memcpy(tmp, key, key_size); memcpy(tmp + key_size, aux, aux_size); - memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); - for (i = 0; i < nr_reloc_bufs; i++) { - if (reloc_bufs[i] != NULL) - drm_intel_bo_reference(reloc_bufs[i]); - } item->key = tmp; - item->reloc_bufs = tmp + key_size + aux_size; item->bo = bo; drm_intel_bo_reference(bo); @@ -276,73 +238,6 @@ brw_upload_cache_with_auxdata(struct brw_cache *cache, return bo; } -drm_intel_bo * -brw_upload_cache(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - drm_intel_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_size) -{ - return brw_upload_cache_with_auxdata(cache, cache_id, - key, key_size, - reloc_bufs, nr_reloc_bufs, - data, data_size, - NULL, 0, - NULL); -} - -/** - * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. - * - * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be - * better to use, as the potentially changing offsets in the data-used-as-key - * will result in excessive cache misses. - * - * If aux data is involved, use search/upload instead. - - */ -drm_intel_bo * -brw_cache_data(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - GLuint data_size) -{ - drm_intel_bo *bo; - struct brw_cache_item *item, lookup; - GLuint hash; - - lookup.cache_id = cache_id; - lookup.key = data; - lookup.key_size = data_size; - lookup.reloc_bufs = NULL; - lookup.nr_reloc_bufs = 0; - hash = hash_key(&lookup); - lookup.hash = hash; - - item = search_cache(cache, hash, &lookup); - if (item) { - update_cache_last(cache, cache_id, item->bo); - drm_intel_bo_reference(item->bo); - return item->bo; - } - - bo = brw_upload_cache(cache, cache_id, - data, data_size, - NULL, 0, - data, data_size); - - return bo; -} - -enum pool_type { - DW_SURFACE_STATE, - DW_GENERAL_STATE -}; - - static void brw_init_cache_id(struct brw_cache *cache, const char *name, @@ -352,8 +247,8 @@ brw_init_cache_id(struct brw_cache *cache, } -static void -brw_init_non_surface_cache(struct brw_context *brw) +void +brw_init_caches(struct brw_context *brw) { struct brw_cache *cache = &brw->cache; @@ -367,7 +262,6 @@ brw_init_non_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "CC_VP", BRW_CC_VP); brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT); brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG); - brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR); brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER); brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT); brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG); @@ -392,13 +286,6 @@ brw_init_non_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "DEPTH_STENCIL_STATE", BRW_DEPTH_STENCIL_STATE); } -void -brw_init_caches(struct brw_context *brw) -{ - brw_init_non_surface_cache(brw); -} - - static void brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { @@ -409,11 +296,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { - int j; - next = c->next; - for (j = 0; j < c->nr_reloc_bufs; j++) - drm_intel_bo_unreference(c->reloc_bufs[j]); drm_intel_bo_unreference(c->bo); free((void *)c->key); free(c); diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index b393259c915..3a3aa8c0346 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -140,19 +140,15 @@ static void dump_wm_surface_state(struct brw_context *brw) static void dump_wm_sampler_state(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; int i; - if (!brw->wm.sampler_bo) { - fprintf(stderr, "WM_SAMPLER: NULL\n"); - return; - } - - drm_intel_bo_map(brw->wm.sampler_bo, GL_FALSE); + drm_intel_bo_map(intel->batch.bo, GL_FALSE); for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { unsigned int offset; + uint32_t sdc_offset; struct brw_sampler_state *samp; - struct brw_sampler_default_color *sdc; char name[20]; if (!ctx->Texture.Unit[i]._ReallyEnabled) { @@ -160,9 +156,11 @@ static void dump_wm_sampler_state(struct brw_context *brw) continue; } - offset = brw->wm.sampler_bo->offset + - i * sizeof(struct brw_sampler_state); - samp = (struct brw_sampler_state *)(brw->wm.sampler_bo->virtual + + offset = (intel->batch.bo->offset + + brw->wm.sampler_offset + + i * sizeof(struct brw_sampler_state)); + samp = (struct brw_sampler_state *)(intel->batch.bo->virtual + + brw->wm.sampler_offset + i * sizeof(struct brw_sampler_state)); sprintf(name, "WM SAMP%d", i); @@ -173,30 +171,45 @@ static void dump_wm_sampler_state(struct brw_context *brw) sprintf(name, " WM SDC%d", i); - drm_intel_bo_map(brw->wm.sdc_bo[i], GL_FALSE); - sdc = (struct brw_sampler_default_color *)(brw->wm.sdc_bo[i]->virtual); - state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 0, "r\n"); - state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 1, "g\n"); - state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 2, "b\n"); - state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 3, "a\n"); - drm_intel_bo_unmap(brw->wm.sdc_bo[i]); + sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; + if (intel->gen >= 5) { + struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + + brw->wm.sdc_offset[i]); + state_out(name, sdc, sdc_offset, 0, "unorm rgba\n"); + state_out(name, sdc, sdc_offset, 1, "r %f\n", sdc->f[0]); + state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->f[1]); + state_out(name, sdc, sdc_offset, 3, "g %f\n", sdc->f[2]); + state_out(name, sdc, sdc_offset, 4, "a %f\n", sdc->f[3]); + state_out(name, sdc, sdc_offset, 5, "half float rg\n"); + state_out(name, sdc, sdc_offset, 6, "half float ba\n"); + state_out(name, sdc, sdc_offset, 7, "u16 rg\n"); + state_out(name, sdc, sdc_offset, 8, "u16 ba\n"); + state_out(name, sdc, sdc_offset, 9, "s16 rg\n"); + state_out(name, sdc, sdc_offset, 10, "s16 ba\n"); + state_out(name, sdc, sdc_offset, 11, "s8 rgba\n"); + } else { + struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + + brw->wm.sdc_offset[i]); + state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); + state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); + state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); + state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); + } } - drm_intel_bo_unmap(brw->wm.sampler_bo); + drm_intel_bo_unmap(intel->batch.bo); } static void dump_sf_viewport_state(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; const char *name = "SF VP"; struct brw_sf_viewport *vp; uint32_t vp_off; - if (brw->sf.vp_bo == NULL) - return; - - drm_intel_bo_map(brw->sf.vp_bo, GL_FALSE); + drm_intel_bo_map(intel->batch.bo, GL_FALSE); - vp = brw->sf.vp_bo->virtual + brw->sf.vp_offset; - vp_off = brw->sf.vp_bo->offset + brw->sf.vp_offset; + vp = intel->batch.bo->virtual + brw->sf.vp_offset; + vp_off = intel->batch.bo->offset + brw->sf.vp_offset; state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); @@ -210,62 +223,56 @@ static void dump_sf_viewport_state(struct brw_context *brw) state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", vp->scissor.xmax, vp->scissor.ymax); - drm_intel_bo_unmap(brw->sf.vp_bo); + drm_intel_bo_unmap(intel->batch.bo); } static void dump_clip_viewport_state(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; const char *name = "CLIP VP"; struct brw_clipper_viewport *vp; uint32_t vp_off; - if (brw->clip.vp_bo == NULL) - return; - - drm_intel_bo_map(brw->clip.vp_bo, GL_FALSE); + drm_intel_bo_map(intel->batch.bo, GL_FALSE); - vp = brw->clip.vp_bo->virtual; - vp_off = brw->clip.vp_bo->offset; + vp = intel->batch.bo->virtual + brw->clip.vp_offset; + vp_off = intel->batch.bo->offset + brw->clip.vp_offset; state_out(name, vp, vp_off, 0, "xmin = %f\n", vp->xmin); state_out(name, vp, vp_off, 1, "xmax = %f\n", vp->xmax); state_out(name, vp, vp_off, 2, "ymin = %f\n", vp->ymin); state_out(name, vp, vp_off, 3, "ymax = %f\n", vp->ymax); - drm_intel_bo_unmap(brw->clip.vp_bo); + drm_intel_bo_unmap(intel->batch.bo); } static void dump_cc_viewport_state(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; const char *name = "CC VP"; struct brw_cc_viewport *vp; uint32_t vp_off; - if (brw->cc.vp_bo == NULL) - return; - - drm_intel_bo_map(brw->cc.vp_bo, GL_FALSE); + drm_intel_bo_map(intel->batch.bo, GL_FALSE); - vp = brw->cc.vp_bo->virtual; - vp_off = brw->cc.vp_bo->offset; + vp = intel->batch.bo->virtual + brw->cc.vp_offset; + vp_off = intel->batch.bo->offset + brw->cc.vp_offset; state_out(name, vp, vp_off, 0, "min_depth = %f\n", vp->min_depth); state_out(name, vp, vp_off, 1, "max_depth = %f\n", vp->max_depth); - drm_intel_bo_unmap(brw->cc.vp_bo); + drm_intel_bo_unmap(intel->batch.bo); } static void dump_depth_stencil_state(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; const char *name = "DEPTH STENCIL"; struct gen6_depth_stencil_state *ds; uint32_t ds_off; - if (brw->cc.depth_stencil_state_bo == NULL) - return; + drm_intel_bo_map(intel->batch.bo, GL_FALSE); - drm_intel_bo_map(brw->cc.depth_stencil_state_bo, GL_FALSE); - - ds = brw->cc.depth_stencil_state_bo->virtual; - ds_off = brw->cc.depth_stencil_state_bo->offset; + ds = intel->batch.bo->virtual + brw->cc.depth_stencil_state_offset; + ds_off = intel->batch.bo->offset + brw->cc.depth_stencil_state_offset; state_out(name, ds, ds_off, 0, "stencil %sable, func %d, write %sable\n", ds->ds0.stencil_enable ? "en" : "dis", @@ -277,7 +284,7 @@ static void dump_depth_stencil_state(struct brw_context *brw) ds->ds2.depth_test_enable ? "en" : "dis", ds->ds2.depth_test_func, ds->ds2.depth_write_enable ? "en" : "dis"); - drm_intel_bo_unmap(brw->cc.depth_stencil_state_bo); + drm_intel_bo_unmap(intel->batch.bo); } static void dump_cc_state(struct brw_context *brw) @@ -291,8 +298,8 @@ static void dump_cc_state(struct brw_context *brw) return; drm_intel_bo_map(bo, GL_FALSE); - cc = bo->virtual; - cc_off = bo->offset; + cc = bo->virtual + brw->cc.state_offset; + cc_off = bo->offset + brw->cc.state_offset; state_out(name, cc, cc_off, 0, "alpha test format %s, round disable %d, stencil ref %d," "bf stencil ref %d\n", @@ -312,22 +319,20 @@ static void dump_cc_state(struct brw_context *brw) static void dump_blend_state(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; const char *name = "BLEND"; struct gen6_blend_state *blend; uint32_t blend_off; - if (brw->cc.blend_state_bo == NULL) - return; - - drm_intel_bo_map(brw->cc.blend_state_bo, GL_FALSE); + drm_intel_bo_map(intel->batch.bo, GL_FALSE); - blend = brw->cc.blend_state_bo->virtual; - blend_off = brw->cc.blend_state_bo->offset; + blend = intel->batch.bo->virtual + brw->cc.blend_state_offset; + blend_off = intel->batch.bo->offset + brw->cc.blend_state_offset; state_out(name, blend, blend_off, 0, "\n"); state_out(name, blend, blend_off, 1, "\n"); - drm_intel_bo_unmap(brw->cc.blend_state_bo); + drm_intel_bo_unmap(intel->batch.bo); } @@ -383,21 +388,25 @@ void brw_debug_batch(struct intel_context *intel) dump_wm_sampler_state(brw); if (intel->gen < 6) - state_struct_out("VS", brw->vs.state_bo, 0, sizeof(struct brw_vs_unit_state)); + state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, + sizeof(struct brw_vs_unit_state)); brw_debug_prog("VS prog", brw->vs.prog_bo); if (intel->gen < 6) - state_struct_out("GS", brw->gs.state_bo, 0, sizeof(struct brw_gs_unit_state)); + state_struct_out("GS", intel->batch.bo, brw->gs.state_offset, + sizeof(struct brw_gs_unit_state)); brw_debug_prog("GS prog", brw->gs.prog_bo); if (intel->gen < 6) { - state_struct_out("SF", brw->sf.state_bo, 0, sizeof(struct brw_sf_unit_state)); - brw_debug_prog("SF prog", brw->sf.prog_bo); + state_struct_out("SF", intel->batch.bo, brw->sf.state_offset, + sizeof(struct brw_sf_unit_state)); + brw_debug_prog("SF prog", brw->sf.prog_bo); } dump_sf_viewport_state(brw); if (intel->gen < 6) - state_struct_out("WM", brw->wm.state_bo, 0, sizeof(struct brw_wm_unit_state)); + state_struct_out("WM", intel->batch.bo, brw->wm.state_offset, + sizeof(struct brw_wm_unit_state)); brw_debug_prog("WM prog", brw->wm.prog_bo); if (intel->gen >= 6) { diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 6f521be6599..008aceb222b 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -60,6 +60,7 @@ static const struct brw_tracked_state *gen4_atoms[] = &brw_curbe_offsets, &brw_recalculate_urb_fence, + &brw_cc_vp, &brw_cc_unit, &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ @@ -119,6 +120,10 @@ static const struct brw_tracked_state *gen6_atoms[] = /* Command packets: */ &brw_invarient_state, + /* must do before binding table pointers, cc state ptrs */ + &brw_state_base_address, + + &brw_cc_vp, &gen6_viewport_state, /* must do after *_vp stages */ &gen6_urb, @@ -129,6 +134,7 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ &brw_wm_constants, /* Before wm_surfaces and constant_buffer */ + &gen6_vs_constants, /* Before vs_state */ &gen6_wm_constants, /* Before wm_state */ &brw_vs_surfaces, /* must do before unit */ @@ -146,9 +152,6 @@ static const struct brw_tracked_state *gen6_atoms[] = &gen6_wm_state, &gen6_scissor_state, - &gen6_scissor_state_pointers, - - &brw_state_base_address, &gen6_binding_table_pointers, @@ -314,7 +317,6 @@ static struct dirty_bit_map cache_bits[] = { DEFINE_BIT(CACHE_NEW_CC_VP), DEFINE_BIT(CACHE_NEW_CC_UNIT), DEFINE_BIT(CACHE_NEW_WM_PROG), - DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR), DEFINE_BIT(CACHE_NEW_SAMPLER), DEFINE_BIT(CACHE_NEW_WM_UNIT), DEFINE_BIT(CACHE_NEW_SF_PROG), diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 63ae13191f9..31a2b518c40 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -103,13 +103,11 @@ static void do_vs_prog( struct brw_context *brw, aux_size += c.vp->program.Base.Parameters->NumParameters; drm_intel_bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - aux_size, - &brw->vs.prog_data); + brw->vs.prog_bo = brw_upload_cache(&brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, aux_size, + &brw->vs.prog_data); } @@ -148,7 +146,6 @@ static void brw_upload_vs_prog(struct brw_context *brw) drm_intel_bo_unreference(brw->vs.prog_bo); brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG, &key, sizeof(key), - NULL, 0, &brw->vs.prog_data); if (brw->vs.prog_bo == NULL) do_vs_prog(brw, vp, &key); diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index dd4e1e6c6ad..a28cdc0bfe9 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1553,6 +1553,26 @@ static void emit_swz( struct brw_vs_compile *c, } } +static int +align_interleaved_urb_mlen(struct brw_context *brw, int mlen) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 6) { + /* URB data written (does not include the message header reg) must + * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, + * section 5.4.3.2.2: URB_INTERLEAVED. + * + * URB entries are allocated on a multiple of 1024 bits, so an + * extra 128 bits written here to make the end align to 256 is + * no problem. + */ + if ((mlen % 2) != 1) + mlen++; + } + + return mlen; +} /** * Post-vertex-program processing. Send the results to the URB. @@ -1734,12 +1754,11 @@ static void emit_vertex_write( struct brw_vs_compile *c) eot = (c->first_overflow_output == 0); - msg_len = c->nr_outputs + 2 + len_vertex_header; - if (intel->gen >= 6) { - /* interleaved urb write message length for gen6 should be multiple of 2 */ - if ((msg_len % 2) != 0) - msg_len++; - } + /* Message header, plus VUE header, plus the (first set of) outputs. */ + msg_len = 1 + len_vertex_header + c->nr_outputs; + msg_len = align_interleaved_urb_mlen(brw, msg_len); + /* Any outputs beyond BRW_MAX_MRF should be past first_overflow_output */ + msg_len = MIN2(msg_len, (BRW_MAX_MRF - 1)), brw_urb_WRITE(p, brw_null_reg(), /* dest */ @@ -1747,7 +1766,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - MIN2(msg_len - 1, (BRW_MAX_MRF - 1)), /* msg len */ + msg_len, 0, /* response len */ eot, /* eot */ eot, /* writes complete */ @@ -1774,7 +1793,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - mrf, /* msg len */ + align_interleaved_urb_mlen(brw, mrf), 0, /* response len */ 1, /* eot */ 1, /* writes complete */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index c3a7cc247c5..1eee5b7e5de 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -49,48 +49,19 @@ struct brw_vs_unit_key { }; static void -vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) -{ - struct gl_context *ctx = &brw->intel.ctx; - - memset(key, 0, sizeof(*key)); - - /* CACHE_NEW_VS_PROG */ - key->total_grf = brw->vs.prog_data->total_grf; - key->urb_entry_read_length = brw->vs.prog_data->urb_read_length; - key->curb_entry_read_length = brw->vs.prog_data->curb_read_length; - - /* BRW_NEW_URB_FENCE */ - key->nr_urb_entries = brw->urb.nr_vs_entries; - key->urb_size = brw->urb.vsize; - - /* BRW_NEW_NR_VS_SURFACES */ - key->nr_surfaces = brw->vs.nr_surfaces; - - /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ - if (ctx->Transform.ClipPlanesEnabled) { - /* Note that we read in the userclip planes as well, hence - * clip_start: - */ - key->curbe_offset = brw->curbe.clip_start; - } - else { - key->curbe_offset = brw->curbe.vs_start; - } -} - -static drm_intel_bo * -vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) +brw_prepare_vs_unit(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - struct brw_vs_unit_state vs; - drm_intel_bo *bo; + struct gl_context *ctx = &intel->ctx; + struct brw_vs_unit_state *vs; - memset(&vs, 0, sizeof(vs)); + vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset); + memset(vs, 0, sizeof(*vs)); - vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ - vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; - vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + /* CACHE_NEW_VS_PROG */ + vs->thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ + vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; + vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, * which will have the channel mask for dwords 4-7 enabled in the thread, * and those dwords will be written to the second URB handle when we @@ -103,21 +74,34 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * The most notable and reliably failing application is the Humus * demo "CelShading" */ - vs.thread1.single_program_flow = (intel->gen == 5); + vs->thread1.single_program_flow = (intel->gen == 5); + /* BRW_NEW_NR_VS_SURFACES */ if (intel->gen == 5) - vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ + vs->thread1.binding_table_entry_count = 0; /* hardware requirement */ else - vs.thread1.binding_table_entry_count = key->nr_surfaces; + vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces; + + vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; + vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; + vs->thread3.dispatch_grf_start_reg = 1; + vs->thread3.urb_entry_read_offset = 0; - vs.thread3.urb_entry_read_length = key->urb_entry_read_length; - vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; - vs.thread3.dispatch_grf_start_reg = 1; - vs.thread3.urb_entry_read_offset = 0; - vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ + if (ctx->Transform.ClipPlanesEnabled) { + /* Note that we read in the userclip planes as well, hence + * clip_start: + */ + vs->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + } + else { + vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; + } + + /* BRW_NEW_URB_FENCE */ if (intel->gen == 5) { - switch (key->nr_urb_entries) { + switch (brw->urb.nr_vs_entries) { case 8: case 12: case 16: @@ -129,13 +113,13 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) case 192: case 224: case 256: - vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; + vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2; break; default: assert(0); } } else { - switch (key->nr_urb_entries) { + switch (brw->urb.nr_vs_entries) { case 8: case 12: case 16: @@ -147,63 +131,45 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) default: assert(0); } - vs.thread4.nr_urb_entries = key->nr_urb_entries; + vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries; } - vs.thread4.urb_entry_allocation_size = key->urb_size - 1; + vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2, - 1, brw->vs_max_threads) - 1; + vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2, + 1, brw->vs_max_threads) - 1; /* No samplers for ARB_vp programs: */ /* It has to be set to 0 for Ironlake */ - vs.vs5.sampler_count = 0; + vs->vs5.sampler_count = 0; if (unlikely(INTEL_DEBUG & DEBUG_STATS)) - vs.thread4.stats_enable = 1; + vs->thread4.stats_enable = 1; /* Vertex program always enabled: */ - vs.vs6.vs_enable = 1; - - bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, - key, sizeof(*key), - &brw->vs.prog_bo, 1, - &vs, sizeof(vs)); + vs->vs6.vs_enable = 1; /* Emit VS program relocation */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_vs_unit_state, thread0), - brw->vs.prog_bo, vs.thread0.grf_reg_count << 1, + drm_intel_bo_emit_reloc(intel->batch.bo, (brw->vs.state_offset + + offsetof(struct brw_vs_unit_state, + thread0)), + brw->vs.prog_bo, vs->thread0.grf_reg_count << 1, I915_GEM_DOMAIN_INSTRUCTION, 0); - return bo; -} - -static void prepare_vs_unit(struct brw_context *brw) -{ - struct brw_vs_unit_key key; - - vs_unit_populate_key(brw, &key); - - drm_intel_bo_unreference(brw->vs.state_bo); - brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT, - &key, sizeof(key), - &brw->vs.prog_bo, 1, - NULL); - if (brw->vs.state_bo == NULL) { - brw->vs.state_bo = vs_unit_create_from_key(brw, &key); - } + brw->state.dirty.cache |= CACHE_NEW_VS_UNIT; } const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, - .brw = (BRW_NEW_CURBE_OFFSETS | + .brw = (BRW_NEW_BATCH | + BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, - .prepare = prepare_vs_unit, + .prepare = brw_prepare_vs_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index ce8712a260f..f2c417d8a81 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -60,7 +60,6 @@ dri_bo_release(drm_intel_bo **bo) static void brw_destroy_context( struct intel_context *intel ) { struct brw_context *brw = brw_context(&intel->ctx); - int i; brw_destroy_state(brw); brw_draw_destroy( brw ); @@ -77,28 +76,13 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->curbe.curbe_bo); dri_bo_release(&brw->vs.prog_bo); - dri_bo_release(&brw->vs.state_bo); dri_bo_release(&brw->vs.const_bo); dri_bo_release(&brw->gs.prog_bo); - dri_bo_release(&brw->gs.state_bo); dri_bo_release(&brw->clip.prog_bo); - dri_bo_release(&brw->clip.state_bo); - dri_bo_release(&brw->clip.vp_bo); dri_bo_release(&brw->sf.prog_bo); - dri_bo_release(&brw->sf.state_bo); - dri_bo_release(&brw->sf.vp_bo); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) - dri_bo_release(&brw->wm.sdc_bo[i]); - dri_bo_release(&brw->wm.sampler_bo); dri_bo_release(&brw->wm.prog_bo); - dri_bo_release(&brw->wm.state_bo); dri_bo_release(&brw->wm.const_bo); - dri_bo_release(&brw->wm.push_const_bo); dri_bo_release(&brw->cc.prog_bo); - dri_bo_release(&brw->cc.vp_bo); - dri_bo_release(&brw->cc.blend_state_bo); - dri_bo_release(&brw->cc.depth_stencil_state_bo); - dri_bo_release(&brw->cc.color_calc_state_bo); free(brw->curbe.last_buf); free(brw->curbe.next_buf); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 65af227d831..06512de940f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -33,6 +33,7 @@ #include "brw_wm.h" #include "brw_state.h" #include "main/formats.h" +#include "main/samplerobj.h" /** Return number of src args for given instruction */ GLuint brw_wm_nr_args( GLuint opcode ) @@ -119,7 +120,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_emit(c); } -static void +void brw_wm_payload_setup(struct brw_context *brw, struct brw_wm_compile *c) { @@ -224,18 +225,13 @@ static void do_wm_prog( struct brw_context *brw, brw_init_compile(brw, &c->func); - brw_wm_payload_setup(brw, c); - if (!brw_wm_fs_emit(brw, c)) { - /* - * Shader which use GLSL features such as flow control are handled - * differently from "simple" shaders. - */ + /* Fallback for fixed function and ARB_fp shaders. */ c->dispatch_width = 16; brw_wm_payload_setup(brw, c); brw_wm_non_glsl_emit(brw, c); + c->prog_data.dispatch_width = 16; } - c->prog_data.dispatch_width = c->dispatch_width; /* Scratch space is used for register spilling */ if (c->last_scratch) { @@ -272,13 +268,11 @@ static void do_wm_prog( struct brw_context *brw, program = brw_get_program(&c->func, &program_size); drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - NULL, 0, - program, program_size, - &c->prog_data, - sizeof(c->prog_data), - &brw->wm.prog_data); + brw->wm.prog_bo = brw_upload_cache(&brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + program, program_size, + &c->prog_data, sizeof(c->prog_data), + &brw->wm.prog_data); } @@ -373,6 +367,7 @@ static void brw_wm_populate_key( struct brw_context *brw, if (unit->_ReallyEnabled) { const struct gl_texture_object *t = unit->_Current; const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, i); int swizzles[SWIZZLE_NIL + 1] = { SWIZZLE_X, SWIZZLE_Y, @@ -388,14 +383,14 @@ static void brw_wm_populate_key( struct brw_context *brw, * well and our shadow compares always return the result in * all 4 channels. */ - if (t->Sampler.CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { - if (t->Sampler.DepthMode == GL_ALPHA) { + if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { + if (sampler->DepthMode == GL_ALPHA) { swizzles[0] = SWIZZLE_ZERO; swizzles[1] = SWIZZLE_ZERO; swizzles[2] = SWIZZLE_ZERO; - } else if (t->Sampler.DepthMode == GL_LUMINANCE) { + } else if (sampler->DepthMode == GL_LUMINANCE) { swizzles[3] = SWIZZLE_ONE; - } else if (t->Sampler.DepthMode == GL_RED) { + } else if (sampler->DepthMode == GL_RED) { /* See table 3.23 of the GL 3.0 spec. */ swizzles[1] = SWIZZLE_ZERO; swizzles[2] = SWIZZLE_ZERO; @@ -465,7 +460,7 @@ static void brw_prepare_wm_prog(struct brw_context *brw) struct brw_wm_prog_key key; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; - + brw_wm_populate_key(brw, &key); /* Make an early check for the key. @@ -473,7 +468,6 @@ static void brw_prepare_wm_prog(struct brw_context *brw) drm_intel_bo_unreference(brw->wm.prog_bo); brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, &key, sizeof(key), - NULL, 0, &brw->wm.prog_data); if (brw->wm.prog_bo == NULL) do_wm_prog(brw, fp, &key); diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 40659f26025..a5f99a0a657 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -201,11 +201,11 @@ struct brw_wm_compile { PASS2_DONE } state; - GLuint source_depth_reg:3; - GLuint source_w_reg:3; - GLuint aa_dest_stencil_reg:3; - GLuint dest_depth_reg:3; - GLuint nr_payload_regs:4; + uint8_t source_depth_reg; + uint8_t source_w_reg; + uint8_t aa_dest_stencil_reg; + uint8_t dest_depth_reg; + uint8_t nr_payload_regs; GLuint computes_depth:1; /* could be derived from program string */ GLuint source_depth_to_render_target:1; GLuint runtime_check_aads_emit:1; @@ -218,7 +218,6 @@ struct brw_wm_compile { GLuint nr_fp_insns; GLuint fp_temp; GLuint fp_interp_emitted; - GLuint fp_fragcolor_emitted; struct prog_src_register pixel_xy; struct prog_src_register delta_xy; @@ -315,7 +314,7 @@ void brw_wm_print_program( struct brw_wm_compile *c, void brw_wm_lookup_iz(struct intel_context *intel, struct brw_wm_compile *c); -GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c); +bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c); /* brw_wm_emit.c */ void emit_alu1(struct brw_compile *p, @@ -475,5 +474,7 @@ struct gl_shader_program *brw_new_shader_program(struct gl_context *ctx, GLuint bool brw_color_buffer_write_enabled(struct brw_context *brw); bool brw_render_target_supported(gl_format format); +void brw_wm_payload_setup(struct brw_context *brw, + struct brw_wm_compile *c); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index cdc1f367e5c..fd4cd892f41 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -51,16 +51,6 @@ static GLboolean can_do_pln(struct intel_context *intel, return GL_TRUE; } -/* Not quite sure how correct this is - need to understand horiz - * vs. vertical strides a little better. - */ -static INLINE struct brw_reg sechalf( struct brw_reg reg ) -{ - if (reg.vstride) - reg.nr++; - return reg; -} - /* Return the SrcReg index of the channels that can be immediate float operands * instead of usage of PROGRAM_CONSTANT values through push/pull. */ @@ -1325,12 +1315,6 @@ static void fire_fb_write( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; - struct brw_reg dst; - - if (c->dispatch_width == 16) - dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); - else - dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); /* Pass through control information: * @@ -1352,7 +1336,6 @@ static void fire_fb_write( struct brw_wm_compile *c, /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, c->dispatch_width, - dst, base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), target, diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 4759b289a0c..9ddbee2edf4 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -961,35 +961,31 @@ static void emit_render_target_writes( struct brw_wm_compile *c ) struct prog_src_register outcolor; GLuint i; - struct prog_instruction *inst, *last_inst = NULL; + struct prog_instruction *inst = NULL; /* The inst->Aux field is used for FB write target and the EOT marker */ - if (c->key.nr_color_regions > 1) { - for (i = 0 ; i < c->key.nr_color_regions; i++) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = INST_AUX_TARGET(i); - if (c->fp_fragcolor_emitted) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = INST_AUX_TARGET(i); - } + for (i = 0; i < c->key.nr_color_regions; i++) { + if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); + } else { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); } - last_inst->Aux |= INST_AUX_EOT; + inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = INST_AUX_TARGET(i); } - else { - /* if gl_FragData[0] is written, use it, else use gl_FragColor */ - if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0)) - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); - else - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0); + /* Mark the last FB write as final, or emit a dummy write if we had + * no render targets bound. + */ + if (c->key.nr_color_regions != 0) { + inst->Aux |= INST_AUX_EOT; + } else { + inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), + 0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR), + payload_r0_depth, outdepth); + inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT; } } @@ -1015,16 +1011,6 @@ static void validate_src_regs( struct brw_wm_compile *c, } } } - -static void validate_dst_regs( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - if (inst->DstReg.File == PROGRAM_OUTPUT) { - GLuint idx = inst->DstReg.Index; - if (idx == FRAG_RESULT_COLOR) - c->fp_fragcolor_emitted = 1; - } -} static void print_insns( const struct prog_instruction *insn, GLuint nr ) @@ -1083,7 +1069,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; validate_src_regs(c, inst); - validate_dst_regs(c, inst); } /* Loop over all instructions doing assorted simplifications and diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index cfc30d8613f..7b93bf90241 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -35,7 +35,7 @@ #include "brw_defines.h" #include "main/macros.h" - +#include "main/samplerobj.h" /* Samplers aren't strictly wm state from the hardware's perspective, @@ -66,81 +66,93 @@ static GLuint translate_wrap_mode( GLenum wrap ) } } -static drm_intel_bo *upload_default_color( struct brw_context *brw, - const GLfloat *color ) +static void +upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, + int unit) { struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *texObj = texUnit->_Current; + struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel]; + float color[4]; + + if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { + /* GL specs that border color for depth textures is taken from the + * R channel, while the hardware uses A. Spam R into all the + * channels for safety. + */ + color[0] = sampler->BorderColor.f[0]; + color[1] = sampler->BorderColor.f[0]; + color[2] = sampler->BorderColor.f[0]; + color[3] = sampler->BorderColor.f[0]; + } else { + color[0] = sampler->BorderColor.f[0]; + color[1] = sampler->BorderColor.f[1]; + color[2] = sampler->BorderColor.f[2]; + color[3] = sampler->BorderColor.f[3]; + } if (intel->gen >= 5) { - struct gen5_sampler_default_color sdc; + struct gen5_sampler_default_color *sdc; + + sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); - memset(&sdc, 0, sizeof(sdc)); + memset(sdc, 0, sizeof(*sdc)); - UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]); - UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]); - UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]); - UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[0], color[0]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[1], color[1]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[2], color[2]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[3], color[3]); - UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]); - UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]); - UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]); - UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]); + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[0], color[0]); + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[1], color[1]); + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[2], color[2]); + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[3], color[3]); - UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]); - UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]); - UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]); - UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]); + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[0], color[0]); + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[1], color[1]); + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[2], color[2]); + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[3], color[3]); - /* XXX: Fill in half floats */ - /* XXX: Fill in signed bytes */ + sdc->hf[0] = _mesa_float_to_half(color[0]); + sdc->hf[1] = _mesa_float_to_half(color[1]); + sdc->hf[2] = _mesa_float_to_half(color[2]); + sdc->hf[3] = _mesa_float_to_half(color[3]); - COPY_4V(sdc.f, color); + sdc->b[0] = sdc->s[0] >> 8; + sdc->b[1] = sdc->s[1] >> 8; + sdc->b[2] = sdc->s[2] >> 8; + sdc->b[3] = sdc->s[3] >> 8; - return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, - &sdc, sizeof(sdc)); + sdc->f[0] = color[0]; + sdc->f[1] = color[1]; + sdc->f[2] = color[2]; + sdc->f[3] = color[3]; } else { - struct brw_sampler_default_color sdc; + struct brw_sampler_default_color *sdc; - COPY_4V(sdc.color, color); + sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); - return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, - &sdc, sizeof(sdc)); + COPY_4V(sdc->color, color); } } - -struct wm_sampler_key { - int sampler_count; - - struct wm_sampler_entry { - GLenum tex_target; - GLenum wrap_r, wrap_s, wrap_t; - float maxlod, minlod; - float lod_bias; - float max_aniso; - GLenum minfilter, magfilter; - GLenum comparemode, comparefunc; - - /** If target is cubemap, take context setting. - */ - GLboolean seamless_cube_map; - } sampler[BRW_MAX_TEX_UNIT]; -}; - /** * Sets the sampler state for a single unit based off of the sampler key * entry. */ static void brw_update_sampler_state(struct brw_context *brw, - struct wm_sampler_entry *key, - drm_intel_bo *sdc_bo, + int unit, struct brw_sampler_state *sampler) { struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *texObj = texUnit->_Current; + struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit); - memset(sampler, 0, sizeof(*sampler)); - - switch (key->minfilter) { + switch (gl_sampler->MinFilter) { case GL_NEAREST: sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; @@ -171,17 +183,17 @@ static void brw_update_sampler_state(struct brw_context *brw, /* Set Anisotropy: */ - if (key->max_aniso > 1.0) { + if (gl_sampler->MaxAnisotropy > 1.0) { sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; - if (key->max_aniso > 2.0) { - sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, + if (gl_sampler->MaxAnisotropy > 2.0) { + sampler->ss3.max_aniso = MIN2((gl_sampler->MaxAnisotropy - 2) / 2, BRW_ANISORATIO_16); } } else { - switch (key->magfilter) { + switch (gl_sampler->MagFilter) { case GL_NEAREST: sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; break; @@ -193,9 +205,9 @@ static void brw_update_sampler_state(struct brw_context *brw, } } - sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); - sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); - sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); + sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR); + sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS); + sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT); if (intel->gen >= 6 && sampler->ss0.min_filter != sampler->ss0.mag_filter) @@ -204,9 +216,10 @@ static void brw_update_sampler_state(struct brw_context *brw, /* Cube-maps on 965 and later must use the same wrap mode for all 3 * coordinate dimensions. Futher, only CUBE and CLAMP are valid. */ - if (key->tex_target == GL_TEXTURE_CUBE_MAP) { - if (key->seamless_cube_map && - (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) { + if (texObj->Target == GL_TEXTURE_CUBE_MAP) { + if (ctx->Texture.CubeMapSeamless && + (gl_sampler->MinFilter != GL_NEAREST || + gl_sampler->MagFilter != GL_NEAREST)) { sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; @@ -215,7 +228,7 @@ static void brw_update_sampler_state(struct brw_context *brw, sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; } - } else if (key->tex_target == GL_TEXTURE_1D) { + } else if (texObj->Target == GL_TEXTURE_1D) { /* There's a bug in 1D texture sampling - it actually pays * attention to the wrap_t value, though it should not. * Override the wrap_t value here to GL_REPEAT to keep @@ -227,18 +240,19 @@ static void brw_update_sampler_state(struct brw_context *brw, /* Set shadow function: */ - if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) { + if (gl_sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { /* Shadowing is "enabled" by emitting a particular sampler * message (sample_c). So need to recompile WM program when * shadow comparison is enabled on each/any texture unit. */ sampler->ss0.shadow_function = - intel_translate_shadow_compare_func(key->comparefunc); + intel_translate_shadow_compare_func(gl_sampler->CompareFunc); } /* Set LOD bias: */ - sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6); + sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias + + gl_sampler->LodBias, -16, 15), 6); sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ @@ -252,150 +266,67 @@ static void brw_update_sampler_state(struct brw_context *brw, */ sampler->ss0.base_level = U_FIXED(0, 1); - sampler->ss1.max_lod = U_FIXED(CLAMP(key->maxlod, 0, 13), 6); - sampler->ss1.min_lod = U_FIXED(CLAMP(key->minlod, 0, 13), 6); - - sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ -} + sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6); + sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6); + upload_default_color(brw, gl_sampler, unit); -/** Sets up the cache key for sampler state for all texture units */ -static void -brw_wm_sampler_populate_key(struct brw_context *brw, - struct wm_sampler_key *key) -{ - struct gl_context *ctx = &brw->intel.ctx; - int unit; - char *last_entry_end = ((char*)&key->sampler_count) + - sizeof(key->sampler_count); - - key->sampler_count = 0; - - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { - if (ctx->Texture.Unit[unit]._ReallyEnabled) { - struct wm_sampler_entry *entry = &key->sampler[unit]; - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *texObj = texUnit->_Current; - struct gl_texture_image *firstImage = - texObj->Image[0][texObj->BaseLevel]; - - memset(last_entry_end, 0, - (char*)entry - last_entry_end + sizeof(*entry)); - last_entry_end = ((char*)entry) + sizeof(*entry); - - entry->tex_target = texObj->Target; - - entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) - ? ctx->Texture.CubeMapSeamless : GL_FALSE; - - entry->wrap_r = texObj->Sampler.WrapR; - entry->wrap_s = texObj->Sampler.WrapS; - entry->wrap_t = texObj->Sampler.WrapT; - - entry->maxlod = texObj->Sampler.MaxLod; - entry->minlod = texObj->Sampler.MinLod; - entry->lod_bias = texUnit->LodBias + texObj->Sampler.LodBias; - entry->max_aniso = texObj->Sampler.MaxAnisotropy; - entry->minfilter = texObj->Sampler.MinFilter; - entry->magfilter = texObj->Sampler.MagFilter; - entry->comparemode = texObj->Sampler.CompareMode; - entry->comparefunc = texObj->Sampler.CompareFunc; - - drm_intel_bo_unreference(brw->wm.sdc_bo[unit]); - if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { - float bordercolor[4] = { - texObj->Sampler.BorderColor.f[0], - texObj->Sampler.BorderColor.f[0], - texObj->Sampler.BorderColor.f[0], - texObj->Sampler.BorderColor.f[0] - }; - /* GL specs that border color for depth textures is taken from the - * R channel, while the hardware uses A. Spam R into all the - * channels for safety. - */ - brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor); - } else { - brw->wm.sdc_bo[unit] = upload_default_color(brw, - texObj->Sampler.BorderColor.f); - } - key->sampler_count = unit + 1; - } + if (intel->gen >= 6) { + sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5; + } else { + /* reloc */ + sampler->ss2.default_color_pointer = (intel->batch.bo->offset + + brw->wm.sdc_offset[unit]) >> 5; + + drm_intel_bo_emit_reloc(intel->batch.bo, + brw->wm.sampler_offset + + unit * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + intel->batch.bo, brw->wm.sdc_offset[unit], + I915_GEM_DOMAIN_SAMPLER, 0); } - struct wm_sampler_entry *entry = &key->sampler[key->sampler_count]; - memset(last_entry_end, 0, (char*)entry - last_entry_end); } + /* All samplers must be uploaded in a single contiguous array, which * complicates various things. However, this is still too confusing - * FIXME: simplify all the different new texture state flags. */ -static void upload_wm_samplers( struct brw_context *brw ) +static void +prepare_wm_samplers(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; - struct wm_sampler_key key; - int i, sampler_key_size; - - brw_wm_sampler_populate_key(brw, &key); + struct brw_sampler_state *samplers; + int i; - if (brw->wm.sampler_count != key.sampler_count) { - brw->wm.sampler_count = key.sampler_count; - brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + brw->wm.sampler_count = 0; + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) + brw->wm.sampler_count = i + 1; } - drm_intel_bo_unreference(brw->wm.sampler_bo); - brw->wm.sampler_bo = NULL; if (brw->wm.sampler_count == 0) return; - /* Only include the populated portion of the key in the search. */ - sampler_key_size = offsetof(struct wm_sampler_key, - sampler[key.sampler_count]); - brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, - &key, sampler_key_size, - brw->wm.sdc_bo, key.sampler_count, - NULL); + samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + 32, &brw->wm.sampler_offset); + memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); - /* If we didnt find it in the cache, compute the state and put it in the - * cache. - */ - if (brw->wm.sampler_bo == NULL) { - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; - - memset(sampler, 0, sizeof(sampler)); - for (i = 0; i < key.sampler_count; i++) { - if (brw->wm.sdc_bo[i] == NULL) - continue; - - brw_update_sampler_state(brw, &key.sampler[i], brw->wm.sdc_bo[i], - &sampler[i]); - } - - brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, - &key, sampler_key_size, - brw->wm.sdc_bo, key.sampler_count, - &sampler, sizeof(sampler)); - - /* Emit SDC relocations */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - if (!ctx->Texture.Unit[i]._ReallyEnabled) - continue; - - drm_intel_bo_emit_reloc(brw->wm.sampler_bo, - i * sizeof(struct brw_sampler_state) + - offsetof(struct brw_sampler_state, ss2), - brw->wm.sdc_bo[i], 0, - I915_GEM_DOMAIN_SAMPLER, 0); - } + for (i = 0; i < brw->wm.sampler_count; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) + brw_update_sampler_state(brw, i, &samplers[i]); } + + brw->state.dirty.cache |= CACHE_NEW_SAMPLER; } const struct brw_tracked_state brw_wm_samplers = { .dirty = { .mesa = _NEW_TEXTURE, - .brw = 0, + .brw = BRW_NEW_BATCH, .cache = 0 }, - .prepare = upload_wm_samplers, + .prepare = prepare_wm_samplers, }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index be4b260a5ff..a91ae511b7f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -40,21 +40,6 @@ * WM unit - fragment programs and rasterization */ -struct brw_wm_unit_key { - unsigned int total_grf, total_scratch; - unsigned int urb_entry_read_length; - unsigned int curb_entry_read_length; - unsigned int dispatch_grf_start_reg; - - unsigned int curbe_offset; - - unsigned int nr_surfaces, sampler_count; - GLboolean uses_depth, computes_depth, uses_kill, is_glsl; - GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; - GLboolean color_write_enable; - GLfloat offset_units, offset_factor; -}; - bool brw_color_buffer_write_enabled(struct brw_context *brw) { @@ -81,219 +66,192 @@ brw_color_buffer_write_enabled(struct brw_context *brw) return false; } +/** + * Setup wm hardware state. See page 225 of Volume 2 + */ static void -wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) +brw_prepare_wm_unit(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; - const struct gl_fragment_program *fp = brw->fragment_program; struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + const struct gl_fragment_program *fp = brw->fragment_program; + struct brw_wm_unit_state *wm; - memset(key, 0, sizeof(*key)); + wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.state_offset); + memset(wm, 0, sizeof(*wm)); + + if (brw->wm.prog_data->prog_offset_16) { + /* These two fields should be the same pre-gen6, which is why we + * only have one hardware field to program for both dispatch + * widths. + */ + assert(brw->wm.prog_data->first_curbe_grf == + brw->wm.prog_data->first_curbe_grf_16); + } /* CACHE_NEW_WM_PROG */ - key->total_grf = brw->wm.prog_data->total_grf; - key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; - key->curb_entry_read_length = brw->wm.prog_data->curb_read_length; - key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; - key->total_scratch = brw->wm.prog_data->total_scratch; + wm->thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1; + wm->wm9.grf_reg_count_2 = ALIGN(brw->wm.prog_data->total_grf_16, 16) / 16 - 1; + wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ + /* reloc */ + wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset + + brw->wm.prog_data->prog_offset_16) >> 6; + wm->thread1.depth_coef_urb_read_offset = 1; + wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - /* BRW_NEW_CURBE_OFFSETS */ - key->curbe_offset = brw->curbe.wm_start; + if (intel->gen == 5) + wm->thread1.binding_table_entry_count = 0; /* hardware requirement */ + else { + /* BRW_NEW_NR_SURFACES */ + wm->thread1.binding_table_entry_count = brw->wm.nr_surfaces; + } - /* BRW_NEW_NR_SURFACEs */ - key->nr_surfaces = brw->wm.nr_surfaces; + if (brw->wm.prog_data->total_scratch != 0) { + wm->thread2.scratch_space_base_pointer = + brw->wm.scratch_bo->offset >> 10; /* reloc */ + wm->thread2.per_thread_scratch_space = + ffs(brw->wm.prog_data->total_scratch) - 11; + } else { + wm->thread2.scratch_space_base_pointer = 0; + wm->thread2.per_thread_scratch_space = 0; + } - /* CACHE_NEW_SAMPLER */ - key->sampler_count = brw->wm.sampler_count; + wm->thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + wm->thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; + wm->thread3.urb_entry_read_offset = 0; + wm->thread3.const_urb_entry_read_length = + brw->wm.prog_data->curb_read_length; + /* BRW_NEW_CURBE_OFFSETS */ + wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; - /* _NEW_POLYGONSTIPPLE */ - key->polygon_stipple = ctx->Polygon.StippleFlag; + if (intel->gen == 5) + wm->wm4.sampler_count = 0; /* hardware requirement */ + else { + /* CACHE_NEW_SAMPLER */ + wm->wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; + } - /* BRW_NEW_FRAGMENT_PROGRAM */ - key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; + if (brw->wm.sampler_count) { + /* reloc */ + wm->wm4.sampler_state_pointer = (intel->batch.bo->offset + + brw->wm.sampler_offset) >> 5; + } else { + wm->wm4.sampler_state_pointer = 0; + } - /* as far as we can tell */ - key->computes_depth = - (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0; + /* BRW_NEW_FRAGMENT_PROGRAM */ + wm->wm5.program_uses_depth = (fp->Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; + wm->wm5.program_computes_depth = (fp->Base.OutputsWritten & + BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0; /* BRW_NEW_DEPTH_BUFFER * Override for NULL depthbuffer case, required by the Pixel Shader Computed * Depth field. */ if (brw->state.depth_region == NULL) - key->computes_depth = 0; - - /* _NEW_BUFFERS | _NEW_COLOR */ - key->color_write_enable = brw_color_buffer_write_enabled(brw); + wm->wm5.program_computes_depth = 0; /* _NEW_COLOR */ - key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; + wm->wm5.program_uses_killpixel = fp->UsesKill || ctx->Color.AlphaEnabled; - /* If using the fragment shader backend, the program is always - * 8-wide. + + /* BRW_NEW_FRAGMENT_PROGRAM + * + * If using the fragment shader backend, the program is always + * 8-wide. If not, it's always 16. */ if (ctx->Shader.CurrentFragmentProgram) { struct brw_shader *shader = (struct brw_shader *) ctx->Shader.CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]; if (shader != NULL && shader->ir != NULL) { - key->is_glsl = GL_TRUE; + wm->wm5.enable_8_pix = 1; + if (brw->wm.prog_data->prog_offset_16) + wm->wm5.enable_16_pix = 1; } } + if (!wm->wm5.enable_8_pix) + wm->wm5.enable_16_pix = 1; - /* _NEW_DEPTH */ - key->stats_wm = intel->stats_wm; + wm->wm5.max_threads = brw->wm_max_threads - 1; - /* _NEW_LINE */ - key->line_stipple = ctx->Line.StippleFlag; - - /* _NEW_POLYGON */ - key->offset_enable = ctx->Polygon.OffsetFill; - key->offset_units = ctx->Polygon.OffsetUnits; - key->offset_factor = ctx->Polygon.OffsetFactor; -} - -/** - * Setup wm hardware state. See page 225 of Volume 2 - */ -static drm_intel_bo * -wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, - drm_intel_bo **reloc_bufs) -{ - struct intel_context *intel = &brw->intel; - struct brw_wm_unit_state wm; - drm_intel_bo *bo; - - memset(&wm, 0, sizeof(wm)); - - wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; - wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ - wm.thread1.depth_coef_urb_read_offset = 1; - wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - - if (intel->gen == 5) - wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ - else - wm.thread1.binding_table_entry_count = key->nr_surfaces; - - if (key->total_scratch != 0) { - wm.thread2.scratch_space_base_pointer = - brw->wm.scratch_bo->offset >> 10; /* reloc */ - wm.thread2.per_thread_scratch_space = ffs(key->total_scratch) - 11; - } else { - wm.thread2.scratch_space_base_pointer = 0; - wm.thread2.per_thread_scratch_space = 0; - } - - wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; - wm.thread3.urb_entry_read_length = key->urb_entry_read_length; - wm.thread3.urb_entry_read_offset = 0; - wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; - wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - - if (intel->gen == 5) - wm.wm4.sampler_count = 0; /* hardware requirement */ - else - wm.wm4.sampler_count = (key->sampler_count + 1) / 4; - - if (brw->wm.sampler_bo != NULL) { - /* reloc */ - wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; - } else { - wm.wm4.sampler_state_pointer = 0; - } - - wm.wm5.program_uses_depth = key->uses_depth; - wm.wm5.program_computes_depth = key->computes_depth; - wm.wm5.program_uses_killpixel = key->uses_kill; - - if (key->is_glsl) - wm.wm5.enable_8_pix = 1; - else - wm.wm5.enable_16_pix = 1; - - wm.wm5.max_threads = brw->wm_max_threads - 1; - - if (key->color_write_enable || - key->uses_kill || - key->computes_depth) { - wm.wm5.thread_dispatch_enable = 1; + /* _NEW_BUFFERS | _NEW_COLOR */ + if (brw_color_buffer_write_enabled(brw) || + wm->wm5.program_uses_killpixel || + wm->wm5.program_computes_depth) { + wm->wm5.thread_dispatch_enable = 1; } - wm.wm5.legacy_line_rast = 0; - wm.wm5.legacy_global_depth_bias = 0; - wm.wm5.early_depth_test = 1; /* never need to disable */ - wm.wm5.line_aa_region_width = 0; - wm.wm5.line_endcap_aa_region_width = 1; + wm->wm5.legacy_line_rast = 0; + wm->wm5.legacy_global_depth_bias = 0; + wm->wm5.early_depth_test = 1; /* never need to disable */ + wm->wm5.line_aa_region_width = 0; + wm->wm5.line_endcap_aa_region_width = 1; - wm.wm5.polygon_stipple = key->polygon_stipple; + /* _NEW_POLYGONSTIPPLE */ + wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag; - if (key->offset_enable) { - wm.wm5.depth_offset = 1; + /* _NEW_POLYGON */ + if (ctx->Polygon.OffsetFill) { + wm->wm5.depth_offset = 1; /* Something wierd going on with legacy_global_depth_bias, * offset_constant, scaling and MRD. This value passes glean * but gives some odd results elsewere (eg. the * quad-offset-units test). */ - wm.global_depth_offset_constant = key->offset_units * 2; + wm->global_depth_offset_constant = ctx->Polygon.OffsetUnits * 2; /* This is the only value that passes glean: */ - wm.global_depth_offset_scale = key->offset_factor; + wm->global_depth_offset_scale = ctx->Polygon.OffsetFactor; } - wm.wm5.line_stipple = key->line_stipple; - - if (unlikely(INTEL_DEBUG & DEBUG_STATS) || key->stats_wm) - wm.wm4.stats_enable = 1; + /* _NEW_LINE */ + wm->wm5.line_stipple = ctx->Line.StippleFlag; - bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, - key, sizeof(*key), - reloc_bufs, 3, - &wm, sizeof(wm)); + /* _NEW_DEPTH */ + if (unlikely(INTEL_DEBUG & DEBUG_STATS) || intel->stats_wm) + wm->wm4.stats_enable = 1; /* Emit WM program relocation */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo, wm.thread0.grf_reg_count << 1, + drm_intel_bo_emit_reloc(intel->batch.bo, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo, wm->thread0.grf_reg_count << 1, I915_GEM_DOMAIN_INSTRUCTION, 0); + if (brw->wm.prog_data->prog_offset_16) { + drm_intel_bo_emit_reloc(intel->batch.bo, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, wm9), + brw->wm.prog_bo, + ((wm->wm9.grf_reg_count_2 << 1) + + brw->wm.prog_data->prog_offset_16), + I915_GEM_DOMAIN_INSTRUCTION, 0); + } + /* Emit scratch space relocation */ - if (key->total_scratch != 0) { - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2), + if (brw->wm.prog_data->total_scratch != 0) { + drm_intel_bo_emit_reloc(intel->batch.bo, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, thread2), brw->wm.scratch_bo, - wm.thread2.per_thread_scratch_space, + wm->thread2.per_thread_scratch_space, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } /* Emit sampler state relocation */ - if (key->sampler_count != 0) { - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, wm4), - brw->wm.sampler_bo, (wm.wm4.stats_enable | - (wm.wm4.sampler_count << 2)), + if (brw->wm.sampler_count != 0) { + drm_intel_bo_emit_reloc(intel->batch.bo, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, wm4), + intel->batch.bo, (brw->wm.sampler_offset | + wm->wm4.stats_enable | + (wm->wm4.sampler_count << 2)), I915_GEM_DOMAIN_INSTRUCTION, 0); } - return bo; -} - - -static void upload_wm_unit( struct brw_context *brw ) -{ - struct brw_wm_unit_key key; - drm_intel_bo *reloc_bufs[3]; - wm_unit_populate_key(brw, &key); - - reloc_bufs[0] = brw->wm.prog_bo; - reloc_bufs[1] = brw->wm.scratch_bo; - reloc_bufs[2] = brw->wm.sampler_bo; - - drm_intel_bo_unreference(brw->wm.state_bo); - brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT, - &key, sizeof(key), - reloc_bufs, 3, - NULL); - if (brw->wm.state_bo == NULL) { - brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); - } + brw->state.dirty.cache |= CACHE_NEW_WM_UNIT; } const struct brw_tracked_state brw_wm_unit = { @@ -305,7 +263,8 @@ const struct brw_tracked_state brw_wm_unit = { _NEW_DEPTH | _NEW_BUFFERS), - .brw = (BRW_NEW_FRAGMENT_PROGRAM | + .brw = (BRW_NEW_BATCH | + BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | BRW_NEW_DEPTH_BUFFER | BRW_NEW_NR_WM_SURFACES), @@ -313,6 +272,6 @@ const struct brw_tracked_state brw_wm_unit = { .cache = (CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) }, - .prepare = upload_wm_unit, + .prepare = brw_prepare_wm_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index e3396a3cbd4..47b8b511f05 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -31,6 +31,7 @@ #include "main/mtypes.h" +#include "main/samplerobj.h" #include "main/texstore.h" #include "program/prog_parameter.h" @@ -112,6 +113,10 @@ static uint32_t brw_format_for_mesa_format[MESA_FORMAT_COUNT] = [MESA_FORMAT_LUMINANCE_FLOAT32] = BRW_SURFACEFORMAT_L32_FLOAT, [MESA_FORMAT_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_A32_FLOAT, [MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_L32A32_FLOAT, + [MESA_FORMAT_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_UNORM, + [MESA_FORMAT_SIGNED_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_SNORM, + [MESA_FORMAT_RG_RGTC2] = BRW_SURFACEFORMAT_BC5_UNORM, + [MESA_FORMAT_SIGNED_RG_RGTC2] = BRW_SURFACEFORMAT_BC5_SNORM, }; bool @@ -213,6 +218,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); const GLuint surf_index = SURF_INDEX_TEXTURE(unit); struct brw_surface_state *surf; @@ -224,8 +230,8 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) surf->ss0.surface_type = translate_tex_target(tObj->Target); surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat, firstImage->InternalFormat, - tObj->Sampler.DepthMode, - tObj->Sampler.sRGBDecode); + sampler->DepthMode, + sampler->sRGBDecode); /* This is ok for all textures with channel width 8bit or less: */ @@ -309,7 +315,7 @@ brw_create_constant_surface(struct brw_context *brw, * state atom. */ static void -prepare_wm_constants(struct brw_context *brw) +prepare_wm_pull_constants(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; @@ -353,7 +359,7 @@ const struct brw_tracked_state brw_wm_constants = { .brw = (BRW_NEW_FRAGMENT_PROGRAM), .cache = 0 }, - .prepare = prepare_wm_constants, + .prepare = prepare_wm_pull_constants, }; /** diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 1b935fb5e70..66357f00fa6 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -32,82 +32,39 @@ #include "intel_batchbuffer.h" #include "main/macros.h" -struct gen6_blend_state_key { - GLboolean color_blend, alpha_enabled; - GLboolean dither; - GLboolean color_mask[BRW_MAX_DRAW_BUFFERS][4]; - - GLenum logic_op; - - GLenum blend_eq_rgb, blend_eq_a; - GLenum blend_src_rgb, blend_src_a; - GLenum blend_dst_rgb, blend_dst_a; - - GLenum alpha_func; -}; - static void -blend_state_populate_key(struct brw_context *brw, - struct gen6_blend_state_key *key) +prepare_blend_state(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; - - memset(key, 0, sizeof(*key)); - - /* _NEW_COLOR */ - memcpy(key->color_mask, ctx->Color.ColorMask, sizeof(key->color_mask)); - - /* _NEW_COLOR */ - if (ctx->Color._LogicOpEnabled) - key->logic_op = ctx->Color.LogicOp; - else - key->logic_op = GL_COPY; - - /* _NEW_COLOR */ - key->color_blend = ctx->Color.BlendEnabled; - if (key->color_blend) { - key->blend_eq_rgb = ctx->Color.Blend[0].EquationRGB; - key->blend_eq_a = ctx->Color.Blend[0].EquationA; - key->blend_src_rgb = ctx->Color.Blend[0].SrcRGB; - key->blend_dst_rgb = ctx->Color.Blend[0].DstRGB; - key->blend_src_a = ctx->Color.Blend[0].SrcA; - key->blend_dst_a = ctx->Color.Blend[0].DstA; - } - - /* _NEW_COLOR */ - key->alpha_enabled = ctx->Color.AlphaEnabled; - if (key->alpha_enabled) { - key->alpha_func = ctx->Color.AlphaFunc; - } - - /* _NEW_COLOR */ - key->dither = ctx->Color.DitherFlag; -} - -/** - * Creates the state cache entry for the given CC unit key. - */ -static drm_intel_bo * -blend_state_create_from_key(struct brw_context *brw, - struct gen6_blend_state_key *key) -{ - struct gen6_blend_state blend[BRW_MAX_DRAW_BUFFERS]; - drm_intel_bo *bo; + struct gen6_blend_state *blend; int b; - - memset(&blend, 0, sizeof(blend)); - - for (b = 0; b < BRW_MAX_DRAW_BUFFERS; b++) { - if (key->logic_op != GL_COPY) { - blend[b].blend1.logic_op_enable = 1; - blend[b].blend1.logic_op_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend & (1 << b)) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; + int nr_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers; + int size = sizeof(*blend) * nr_draw_buffers; + + blend = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset); + + memset(blend, 0, size); + + for (b = 0; b < nr_draw_buffers; b++) { + /* _NEW_COLOR */ + if (ctx->Color._LogicOpEnabled) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[b]; + /* _NEW_BUFFERS */ + /* Floating point RTs should have no effect from LogicOp, + * except for disabling of blending + */ + if (_mesa_get_format_datatype(rb->Format) != GL_FLOAT) { + blend[b].blend1.logic_op_enable = 1; + blend[b].blend1.logic_op_func = + intel_translate_logic_op(ctx->Color.LogicOp); + } + } else if (ctx->Color.BlendEnabled & (1 << b)) { + GLenum eqRGB = ctx->Color.Blend[0].EquationRGB; + GLenum eqA = ctx->Color.Blend[0].EquationA; + GLenum srcRGB = ctx->Color.Blend[0].SrcRGB; + GLenum dstRGB = ctx->Color.Blend[0].DstRGB; + GLenum srcA = ctx->Color.Blend[0].SrcA; + GLenum dstA = ctx->Color.Blend[0].DstA; if (eqRGB == GL_MIN || eqRGB == GL_MAX) { srcRGB = dstRGB = GL_ONE; @@ -131,146 +88,74 @@ blend_state_create_from_key(struct brw_context *brw, eqA != eqRGB); } - if (key->alpha_enabled) { + + /* _NEW_COLOR */ + if (ctx->Color.AlphaEnabled) { blend[b].blend1.alpha_test_enable = 1; - blend[b].blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); + blend[b].blend1.alpha_test_func = + intel_translate_compare_func(ctx->Color.AlphaFunc); } - if (key->dither) { + /* _NEW_COLOR */ + if (ctx->Color.DitherFlag) { blend[b].blend1.dither_enable = 1; blend[b].blend1.y_dither_offset = 0; blend[b].blend1.x_dither_offset = 0; } - blend[b].blend1.write_disable_r = !key->color_mask[b][0]; - blend[b].blend1.write_disable_g = !key->color_mask[b][1]; - blend[b].blend1.write_disable_b = !key->color_mask[b][2]; - blend[b].blend1.write_disable_a = !key->color_mask[b][3]; + blend[b].blend1.write_disable_r = !ctx->Color.ColorMask[b][0]; + blend[b].blend1.write_disable_g = !ctx->Color.ColorMask[b][1]; + blend[b].blend1.write_disable_b = !ctx->Color.ColorMask[b][2]; + blend[b].blend1.write_disable_a = !ctx->Color.ColorMask[b][3]; } - bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE, - key, sizeof(*key), - NULL, 0, - &blend, sizeof(blend)); - - return bo; -} - -static void -prepare_blend_state(struct brw_context *brw) -{ - struct gen6_blend_state_key key; - - blend_state_populate_key(brw, &key); - - drm_intel_bo_unreference(brw->cc.blend_state_bo); - brw->cc.blend_state_bo = brw_search_cache(&brw->cache, BRW_BLEND_STATE, - &key, sizeof(key), - NULL, 0, - NULL); - - if (brw->cc.blend_state_bo == NULL) - brw->cc.blend_state_bo = blend_state_create_from_key(brw, &key); + brw->state.dirty.cache |= CACHE_NEW_BLEND_STATE; } const struct brw_tracked_state gen6_blend_state = { .dirty = { - .mesa = _NEW_COLOR, - .brw = 0, + .mesa = (_NEW_COLOR | + _NEW_BUFFERS), + .brw = BRW_NEW_BATCH, .cache = 0, }, .prepare = prepare_blend_state, }; -struct gen6_color_calc_state_key { - float blend_constant_color[4]; - GLclampf alpha_ref; - GLubyte stencil_ref[2]; -}; - static void -color_calc_state_populate_key(struct brw_context *brw, - struct gen6_color_calc_state_key *key) +gen6_prepare_color_calc_state(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; + struct gen6_color_calc_state *cc; - memset(key, 0, sizeof(*key)); - - /* _NEW_STENCIL */ - if (ctx->Stencil._Enabled) { - const unsigned back = ctx->Stencil._BackFace; - - key->stencil_ref[0] = ctx->Stencil.Ref[0]; - if (ctx->Stencil._TestTwoSide) - key->stencil_ref[1] = ctx->Stencil.Ref[back]; - } + cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + memset(cc, 0, sizeof(*cc)); /* _NEW_COLOR */ - if (ctx->Color.AlphaEnabled) - key->alpha_ref = ctx->Color.AlphaRef; - - key->blend_constant_color[0] = ctx->Color.BlendColorUnclamped[0]; - key->blend_constant_color[1] = ctx->Color.BlendColorUnclamped[1]; - key->blend_constant_color[2] = ctx->Color.BlendColorUnclamped[2]; - key->blend_constant_color[3] = ctx->Color.BlendColorUnclamped[3]; -} - -/** - * Creates the state cache entry for the given CC state key. - */ -static drm_intel_bo * -color_calc_state_create_from_key(struct brw_context *brw, - struct gen6_color_calc_state_key *key) -{ - struct gen6_color_calc_state cc; - drm_intel_bo *bo; - - memset(&cc, 0, sizeof(cc)); + cc->cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + UNCLAMPED_FLOAT_TO_UBYTE(cc->cc1.alpha_ref_fi.ui, ctx->Color.AlphaRef); - cc.cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc1.alpha_ref_fi.ui, key->alpha_ref); - - cc.cc0.stencil_ref = key->stencil_ref[0]; - cc.cc0.bf_stencil_ref = key->stencil_ref[1]; - - cc.constant_r = key->blend_constant_color[0]; - cc.constant_g = key->blend_constant_color[1]; - cc.constant_b = key->blend_constant_color[2]; - cc.constant_a = key->blend_constant_color[3]; - - bo = brw_upload_cache(&brw->cache, BRW_COLOR_CALC_STATE, - key, sizeof(*key), - NULL, 0, - &cc, sizeof(cc)); - - return bo; -} - -static void -prepare_color_calc_state(struct brw_context *brw) -{ - struct gen6_color_calc_state_key key; - - color_calc_state_populate_key(brw, &key); + /* _NEW_STENCIL */ + cc->cc0.stencil_ref = ctx->Stencil.Ref[0]; + cc->cc0.bf_stencil_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace]; - drm_intel_bo_unreference(brw->cc.color_calc_state_bo); - brw->cc.color_calc_state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE, - &key, sizeof(key), - NULL, 0, - NULL); + /* _NEW_COLOR */ + cc->constant_r = ctx->Color.BlendColorUnclamped[0]; + cc->constant_g = ctx->Color.BlendColorUnclamped[1]; + cc->constant_b = ctx->Color.BlendColorUnclamped[2]; + cc->constant_a = ctx->Color.BlendColorUnclamped[3]; - if (brw->cc.color_calc_state_bo == NULL) - brw->cc.color_calc_state_bo = color_calc_state_create_from_key(brw, &key); + brw->state.dirty.cache |= CACHE_NEW_COLOR_CALC_STATE; } const struct brw_tracked_state gen6_color_calc_state = { .dirty = { .mesa = _NEW_COLOR | _NEW_STENCIL, - .brw = 0, + .brw = BRW_NEW_BATCH, .cache = 0, }, - .prepare = prepare_color_calc_state, + .prepare = gen6_prepare_color_calc_state, }; static void upload_cc_state_pointers(struct brw_context *brw) @@ -279,20 +164,12 @@ static void upload_cc_state_pointers(struct brw_context *brw) BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); - OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); - OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); - OUT_RELOC(brw->cc.color_calc_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_BATCH(brw->cc.blend_state_offset | 1); + OUT_BATCH(brw->cc.depth_stencil_state_offset | 1); + OUT_BATCH(brw->cc.state_offset | 1); ADVANCE_BATCH(); } - -static void prepare_cc_state_pointers(struct brw_context *brw) -{ - brw_add_validated_bo(brw, brw->cc.color_calc_state_bo); - brw_add_validated_bo(brw, brw->cc.blend_state_bo); - brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo); -} - const struct brw_tracked_state gen6_cc_state_pointers = { .dirty = { .mesa = 0, @@ -301,6 +178,5 @@ const struct brw_tracked_state gen6_cc_state_pointers = { CACHE_NEW_COLOR_CALC_STATE | CACHE_NEW_DEPTH_STENCIL_STATE) }, - .prepare = prepare_cc_state_pointers, .emit = upload_cc_state_pointers, }; diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c index 96e6eade6b7..775e1ce2c9c 100644 --- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -28,138 +28,68 @@ #include "brw_context.h" #include "brw_state.h" -struct brw_depth_stencil_state_key { - GLenum depth_func; - GLboolean depth_test, depth_write; - GLboolean stencil, stencil_two_side; - GLenum stencil_func[2], stencil_fail_op[2]; - GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; - GLubyte stencil_write_mask[2], stencil_test_mask[2]; -}; - static void -depth_stencil_state_populate_key(struct brw_context *brw, - struct brw_depth_stencil_state_key *key) +gen6_prepare_depth_stencil_state(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; - const unsigned back = ctx->Stencil._BackFace; + struct gen6_depth_stencil_state *ds; - memset(key, 0, sizeof(*key)); + ds = brw_state_batch(brw, sizeof(*ds), 64, + &brw->cc.depth_stencil_state_offset); + memset(ds, 0, sizeof(*ds)); /* _NEW_STENCIL */ - key->stencil = ctx->Stencil._Enabled; - key->stencil_two_side = ctx->Stencil._TestTwoSide; - - if (key->stencil) { - key->stencil_func[0] = ctx->Stencil.Function[0]; - key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; - key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; - key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; - key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; - key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; - } - if (key->stencil_two_side) { - key->stencil_func[1] = ctx->Stencil.Function[back]; - key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; - key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; - key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; - key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; - key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; - } - - key->depth_test = ctx->Depth.Test; - if (key->depth_test) { - key->depth_func = ctx->Depth.Func; - key->depth_write = ctx->Depth.Mask; - } -} - -/** - * Creates the state cache entry for the given DEPTH_STENCIL_STATE state key. - */ -static drm_intel_bo * -depth_stencil_state_create_from_key(struct brw_context *brw, - struct brw_depth_stencil_state_key *key) -{ - struct gen6_depth_stencil_state ds; - drm_intel_bo *bo; - - memset(&ds, 0, sizeof(ds)); - - /* _NEW_STENCIL */ - if (key->stencil) { - ds.ds0.stencil_enable = 1; - ds.ds0.stencil_func = - intel_translate_compare_func(key->stencil_func[0]); - ds.ds0.stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[0]); - ds.ds0.stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); - ds.ds0.stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); - ds.ds1.stencil_write_mask = key->stencil_write_mask[0]; - ds.ds1.stencil_test_mask = key->stencil_test_mask[0]; - - if (key->stencil_two_side) { - ds.ds0.bf_stencil_enable = 1; - ds.ds0.bf_stencil_func = - intel_translate_compare_func(key->stencil_func[1]); - ds.ds0.bf_stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[1]); - ds.ds0.bf_stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); - ds.ds0.bf_stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); - ds.ds1.bf_stencil_write_mask = key->stencil_write_mask[1]; - ds.ds1.bf_stencil_test_mask = key->stencil_test_mask[1]; + if (ctx->Stencil._Enabled) { + int back = ctx->Stencil._BackFace; + + ds->ds0.stencil_enable = 1; + ds->ds0.stencil_func = + intel_translate_compare_func(ctx->Stencil.Function[0]); + ds->ds0.stencil_fail_op = + intel_translate_stencil_op(ctx->Stencil.FailFunc[0]); + ds->ds0.stencil_pass_depth_fail_op = + intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]); + ds->ds0.stencil_pass_depth_pass_op = + intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]); + ds->ds1.stencil_write_mask = ctx->Stencil.WriteMask[0]; + ds->ds1.stencil_test_mask = ctx->Stencil.ValueMask[0]; + + if (ctx->Stencil._TestTwoSide) { + ds->ds0.bf_stencil_enable = 1; + ds->ds0.bf_stencil_func = + intel_translate_compare_func(ctx->Stencil.Function[back]); + ds->ds0.bf_stencil_fail_op = + intel_translate_stencil_op(ctx->Stencil.FailFunc[back]); + ds->ds0.bf_stencil_pass_depth_fail_op = + intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]); + ds->ds0.bf_stencil_pass_depth_pass_op = + intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]); + ds->ds1.bf_stencil_write_mask = ctx->Stencil.WriteMask[back]; + ds->ds1.bf_stencil_test_mask = ctx->Stencil.ValueMask[back]; } /* Not really sure about this: */ - if (key->stencil_write_mask[0] || - (key->stencil_two_side && key->stencil_write_mask[1])) - ds.ds0.stencil_write_enable = 1; + if (ctx->Stencil.WriteMask[0] || + (ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back])) + ds->ds0.stencil_write_enable = 1; } /* _NEW_DEPTH */ - if (key->depth_test) { - ds.ds2.depth_test_enable = 1; - ds.ds2.depth_test_func = intel_translate_compare_func(key->depth_func); - ds.ds2.depth_write_enable = key->depth_write; + if (ctx->Depth.Test) { + ds->ds2.depth_test_enable = 1; + ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func); + ds->ds2.depth_write_enable = ctx->Depth.Mask; } - bo = brw_upload_cache(&brw->cache, BRW_DEPTH_STENCIL_STATE, - key, sizeof(*key), - NULL, 0, - &ds, sizeof(ds)); - - return bo; -} - -static void -prepare_depth_stencil_state(struct brw_context *brw) -{ - struct brw_depth_stencil_state_key key; - - depth_stencil_state_populate_key(brw, &key); - - drm_intel_bo_unreference(brw->cc.depth_stencil_state_bo); - brw->cc.depth_stencil_state_bo = brw_search_cache(&brw->cache, - BRW_DEPTH_STENCIL_STATE, - &key, sizeof(key), - NULL, 0, - NULL); - - if (brw->cc.depth_stencil_state_bo == NULL) - brw->cc.depth_stencil_state_bo = - depth_stencil_state_create_from_key(brw, &key); + brw->state.dirty.cache |= CACHE_NEW_DEPTH_STENCIL_STATE; } const struct brw_tracked_state gen6_depth_stencil_state = { .dirty = { .mesa = _NEW_DEPTH | _NEW_STENCIL, - .brw = 0, + .brw = BRW_NEW_BATCH, .cache = 0, }, - .prepare = prepare_depth_stencil_state, + .prepare = gen6_prepare_depth_stencil_state, }; diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c index f65c651bdff..4cdec699df6 100644 --- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c @@ -43,27 +43,15 @@ upload_sampler_state_pointers(struct brw_context *brw) (4 - 2)); OUT_BATCH(0); /* VS */ OUT_BATCH(0); /* GS */ - if (brw->wm.sampler_bo) - OUT_RELOC(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - else - OUT_BATCH(0); - + OUT_BATCH(brw->wm.sampler_offset); ADVANCE_BATCH(); } - -static void -prepare_sampler_state_pointers(struct brw_context *brw) -{ - brw_add_validated_bo(brw, brw->wm.sampler_bo); -} - const struct brw_tracked_state gen6_sampler_state = { .dirty = { .mesa = 0, .brw = BRW_NEW_BATCH, .cache = CACHE_NEW_SAMPLER }, - .prepare = prepare_sampler_state_pointers, .emit = upload_sampler_state_pointers, }; diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index 12b65826ae9..fad3ca0dd04 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -31,11 +31,15 @@ #include "intel_batchbuffer.h" static void -prepare_scissor_state(struct brw_context *brw) +gen6_prepare_scissor_state(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); - struct gen6_scissor_rect scissor; + struct gen6_scissor_rect *scissor; + uint32_t scissor_state_offset; + + scissor = brw_state_batch(brw, sizeof(*scissor), 32, &scissor_state_offset); /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ @@ -54,62 +58,36 @@ prepare_scissor_state(struct brw_context *brw) * anything. Instead, just provide a min > max scissor inside * the bounds, which produces the expected no rendering. */ - scissor.xmin = 1; - scissor.xmax = 0; - scissor.ymin = 1; - scissor.ymax = 0; + scissor->xmin = 1; + scissor->xmax = 0; + scissor->ymin = 1; + scissor->ymax = 0; } else if (render_to_fbo) { /* texmemory: Y=0=bottom */ - scissor.xmin = ctx->DrawBuffer->_Xmin; - scissor.xmax = ctx->DrawBuffer->_Xmax - 1; - scissor.ymin = ctx->DrawBuffer->_Ymin; - scissor.ymax = ctx->DrawBuffer->_Ymax - 1; + scissor->xmin = ctx->DrawBuffer->_Xmin; + scissor->xmax = ctx->DrawBuffer->_Xmax - 1; + scissor->ymin = ctx->DrawBuffer->_Ymin; + scissor->ymax = ctx->DrawBuffer->_Ymax - 1; } else { /* memory: Y=0=top */ - scissor.xmin = ctx->DrawBuffer->_Xmin; - scissor.xmax = ctx->DrawBuffer->_Xmax - 1; - scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; - scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; + scissor->xmin = ctx->DrawBuffer->_Xmin; + scissor->xmax = ctx->DrawBuffer->_Xmax - 1; + scissor->ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; + scissor->ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; } - drm_intel_bo_unreference(brw->sf.state_bo); - brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT, - &scissor, sizeof(scissor)); -} - -const struct brw_tracked_state gen6_scissor_state = { - .dirty = { - .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT, - .brw = 0, - .cache = 0, - }, - .prepare = prepare_scissor_state, -}; - -static void upload_scissor_state_pointers(struct brw_context *brw) -{ - struct intel_context *intel = &brw->intel; - BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_SCISSOR_STATE_POINTERS << 16 | (2 - 2)); - OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(scissor_state_offset); ADVANCE_BATCH(); - } - -static void prepare_scissor_state_pointers(struct brw_context *brw) -{ - brw_add_validated_bo(brw, brw->sf.state_bo); -} - -const struct brw_tracked_state gen6_scissor_state_pointers = { +const struct brw_tracked_state gen6_scissor_state = { .dirty = { - .mesa = 0, + .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT, .brw = BRW_NEW_BATCH, - .cache = CACHE_NEW_SF_UNIT + .cache = 0, }, - .prepare = prepare_scissor_state_pointers, - .emit = upload_scissor_state_pointers, + .prepare = gen6_prepare_scissor_state, }; diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index cd7d209e3ea..4116bdb96de 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -41,22 +41,22 @@ static void prepare_clip_vp(struct brw_context *brw) { - struct brw_clipper_viewport vp; + struct brw_clipper_viewport *vp; - vp.xmin = -1.0; - vp.xmax = 1.0; - vp.ymin = -1.0; - vp.ymax = 1.0; + vp = brw_state_batch(brw, sizeof(*vp), 32, &brw->clip.vp_offset); - drm_intel_bo_unreference(brw->clip.vp_bo); - brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP, - &vp, sizeof(vp)); + vp->xmin = -1.0; + vp->xmax = 1.0; + vp->ymin = -1.0; + vp->ymax = 1.0; + + brw->state.dirty.cache |= CACHE_NEW_CLIP_VP; } const struct brw_tracked_state gen6_clip_vp = { .dirty = { - .mesa = _NEW_VIEWPORT, /* XXX: not really, but we need nonzero */ - .brw = 0, + .mesa = 0, + .brw = BRW_NEW_BATCH, .cache = 0, }, .prepare = prepare_clip_vp, @@ -67,12 +67,13 @@ prepare_sf_vp(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; - struct brw_sf_viewport sfv; + struct brw_sf_viewport *sfv; GLfloat y_scale, y_bias; const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - memset(&sfv, 0, sizeof(sfv)); + sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + memset(sfv, 0, sizeof(*sfv)); /* _NEW_BUFFERS */ if (render_to_fbo) { @@ -84,34 +85,25 @@ prepare_sf_vp(struct brw_context *brw) } /* _NEW_VIEWPORT */ - sfv.viewport.m00 = v[MAT_SX]; - sfv.viewport.m11 = v[MAT_SY] * y_scale; - sfv.viewport.m22 = v[MAT_SZ] * depth_scale; - sfv.viewport.m30 = v[MAT_TX]; - sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; - sfv.viewport.m32 = v[MAT_TZ] * depth_scale; + sfv->viewport.m00 = v[MAT_SX]; + sfv->viewport.m11 = v[MAT_SY] * y_scale; + sfv->viewport.m22 = v[MAT_SZ] * depth_scale; + sfv->viewport.m30 = v[MAT_TX]; + sfv->viewport.m31 = v[MAT_TY] * y_scale + y_bias; + sfv->viewport.m32 = v[MAT_TZ] * depth_scale; - drm_intel_bo_unreference(brw->sf.vp_bo); - brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, - &sfv, sizeof(sfv)); + brw->state.dirty.cache |= CACHE_NEW_SF_VP; } const struct brw_tracked_state gen6_sf_vp = { .dirty = { .mesa = _NEW_VIEWPORT | _NEW_BUFFERS, - .brw = 0, + .brw = BRW_NEW_BATCH, .cache = 0, }, .prepare = prepare_sf_vp, }; -static void prepare_viewport_state_pointers(struct brw_context *brw) -{ - brw_add_validated_bo(brw, brw->clip.vp_bo); - brw_add_validated_bo(brw, brw->sf.vp_bo); - brw_add_validated_bo(brw, brw->cc.vp_bo); -} - static void upload_viewport_state_pointers(struct brw_context *brw) { struct intel_context *intel = &brw->intel; @@ -121,9 +113,9 @@ static void upload_viewport_state_pointers(struct brw_context *brw) GEN6_CC_VIEWPORT_MODIFY | GEN6_SF_VIEWPORT_MODIFY | GEN6_CLIP_VIEWPORT_MODIFY); - OUT_RELOC(brw->clip.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(brw->sf.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(brw->cc.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->clip.vp_offset); + OUT_BATCH(brw->sf.vp_offset); + OUT_BATCH(brw->cc.vp_offset); ADVANCE_BATCH(); } @@ -135,6 +127,5 @@ const struct brw_tracked_state gen6_viewport_state = { CACHE_NEW_SF_VP | CACHE_NEW_CC_VP) }, - .prepare = prepare_viewport_state_pointers, .emit = upload_viewport_state_pointers, }; diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index a10cec318d6..b46368e36e2 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -34,43 +34,36 @@ #include "intel_batchbuffer.h" static void -upload_vs_state(struct brw_context *brw) +gen6_prepare_vs_push_constants(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; + /* _BRW_NEW_VERTEX_PROGRAM */ const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); unsigned int nr_params = brw->vs.prog_data->nr_params / 4; - drm_intel_bo *constant_bo; - int i; + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + /* XXX: Should this happen somewhere before to get our state flag set? */ + _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + + /* CACHE_NEW_VS_PROG | _NEW_TRANSFORM */ if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) { - /* Disable the push constant buffers. */ - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); + brw->vs.push_const_size = 0; } else { - int params_uploaded = 0, param_regs; + int params_uploaded = 0; float *param; + int i; - if (brw->vertex_program->IsNVProgram) - _mesa_load_tracked_matrices(ctx); - - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); - - constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", - (MAX_CLIP_PLANES + nr_params) * - 4 * sizeof(float), - 4096); - drm_intel_gem_bo_map_gtt(constant_bo); - param = constant_bo->virtual; + param = brw_state_batch(brw, + (MAX_CLIP_PLANES + nr_params) * + 4 * sizeof(float), + 32, &brw->vs.push_const_offset); /* This should be loaded like any other param, but it's ad-hoc * until we redo the VS backend. @@ -100,30 +93,56 @@ upload_vs_state(struct brw_context *brw) if (0) { printf("VS constant buffer:\n"); for (i = 0; i < params_uploaded; i++) { - float *buf = (float *)constant_bo->virtual + i * 4; + float *buf = param + i * 4; printf("%d: %f %f %f %f\n", i, buf[0], buf[1], buf[2], buf[3]); } } - drm_intel_gem_bo_unmap_gtt(constant_bo); + brw->vs.push_const_size = (params_uploaded + 1) / 2; + /* We can only push 32 registers of constants at a time. */ + assert(brw->vs.push_const_size <= 32); + } +} + +const struct brw_tracked_state gen6_vs_constants = { + .dirty = { + .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, + .brw = (BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM), + .cache = 0, + }, + .prepare = gen6_prepare_vs_push_constants, +}; - param_regs = (params_uploaded + 1) / 2; - assert(param_regs <= 32); +static void +upload_vs_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + if (brw->vs.push_const_size == 0) { + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); - OUT_RELOC(constant_bo, - I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - param_regs - 1); + /* Pointer to the VS constant buffer. Covered by the set of + * state flags from gen6_prepare_wm_constants + */ + OUT_BATCH(brw->vs.push_const_offset + + brw->vs.push_const_size - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); - - drm_intel_bo_unreference(constant_bo); } BEGIN_BATCH(6); @@ -149,7 +168,9 @@ const struct brw_tracked_state gen6_vs_state = { .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE | - BRW_NEW_CONTEXT), + BRW_NEW_CONTEXT | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_BATCH), .cache = CACHE_NEW_VS_PROG }, .emit = upload_vs_state, diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 8215cb15a9c..33b233414c6 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -35,16 +35,13 @@ #include "intel_batchbuffer.h" static void -prepare_wm_constants(struct brw_context *brw) +gen6_prepare_wm_push_constants(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); - drm_intel_bo_unreference(brw->wm.push_const_bo); - brw->wm.push_const_bo = NULL; - /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ @@ -55,13 +52,11 @@ prepare_wm_constants(struct brw_context *brw) float *constants; unsigned int i; - brw->wm.push_const_bo = drm_intel_bo_alloc(intel->bufmgr, - "WM constant_bo", - brw->wm.prog_data->nr_params * - sizeof(float), - 4096); - drm_intel_gem_bo_map_gtt(brw->wm.push_const_bo); - constants = brw->wm.push_const_bo->virtual; + constants = brw_state_batch(brw, + brw->wm.prog_data->nr_params * + sizeof(float), + 32, &brw->wm.push_const_offset); + for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], *brw->wm.prog_data->param[i]); @@ -80,18 +75,17 @@ prepare_wm_constants(struct brw_context *brw) printf("\n"); printf("\n"); } - - drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo); } } const struct brw_tracked_state gen6_wm_constants = { .dirty = { .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_FRAGMENT_PROGRAM, + .brw = (BRW_NEW_BATCH | + BRW_NEW_FRAGMENT_PROGRAM), .cache = 0, }, - .prepare = prepare_wm_constants, + .prepare = gen6_prepare_wm_push_constants, }; static void @@ -118,8 +112,10 @@ upload_wm_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); - OUT_RELOC(brw->wm.push_const_bo, - I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ + /* Pointer to the WM constant buffer. Covered by the set of + * state flags from gen6_prepare_wm_constants + */ + OUT_BATCH(brw->wm.push_const_offset + ALIGN(brw->wm.prog_data->nr_params, brw->wm.prog_data->dispatch_width) / 8 - 1); OUT_BATCH(0); @@ -143,14 +139,19 @@ upload_wm_state(struct brw_context *brw) dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT; dw4 |= (brw->wm.prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); + dw4 |= (brw->wm.prog_data->first_curbe_grf_16 << + GEN6_WM_DISPATCH_START_GRF_SHIFT_2); dw5 |= (brw->wm_max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; /* CACHE_NEW_WM_PROG */ - if (brw->wm.prog_data->dispatch_width == 8) + if (brw->wm.prog_data->dispatch_width == 8) { dw5 |= GEN6_WM_8_DISPATCH_ENABLE; - else + if (brw->wm.prog_data->prog_offset_16) + dw5 |= GEN6_WM_16_DISPATCH_ENABLE; + } else { dw5 |= GEN6_WM_16_DISPATCH_ENABLE; + } /* _NEW_LINE */ if (ctx->Line.StippleFlag) @@ -194,7 +195,12 @@ upload_wm_state(struct brw_context *brw) OUT_BATCH(dw5); OUT_BATCH(dw6); OUT_BATCH(0); /* kernel 1 pointer */ - OUT_BATCH(0); /* kernel 2 pointer */ + if (brw->wm.prog_data->prog_offset_16) { + OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->wm.prog_data->prog_offset_16); + } else { + OUT_BATCH(0); /* kernel 2 pointer */ + } ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 53d6e7c6acc..377989bcc14 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -128,6 +128,11 @@ _intel_batchbuffer_flush(struct intel_context *intel, if (intel->batch.used == 0) return; + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch.bo; + drm_intel_bo_reference(intel->first_post_swapbuffers_batch); + } + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, 4*intel->batch.used); diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index 82d29e76712..5a96232107e 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -28,6 +28,7 @@ #include "main/glheader.h" #include "main/mtypes.h" +#include "main/condrender.h" #include "swrast/swrast.h" #include "drivers/common/meta.h" @@ -88,6 +89,9 @@ intelClear(struct gl_context *ctx, GLbitfield mask) struct intel_renderbuffer *irb; int i; + if (!_mesa_check_conditional_render(ctx)) + return; + if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { intel->front_buffer_dirty = GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 02e7f7717fc..acdf35fc71b 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -466,9 +466,11 @@ intel_prepare_render(struct intel_context *intel) * the swap, and getting our hands on that doesn't seem worth it, * so we just us the first batch we emitted after the last swap. */ - if (intel->need_throttle) { - drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE); - intel->need_throttle = GL_FALSE; + if (intel->need_throttle && intel->first_post_swapbuffers_batch) { + drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + intel->need_throttle = GL_FALSE; } } @@ -650,27 +652,23 @@ intelInitContext(struct intel_context *intel, intel->driFd = sPriv->fd; intel->has_xrgb_textures = GL_TRUE; + intel->gen = intelScreen->gen; if (IS_GEN6(intel->intelScreen->deviceID)) { - intel->gen = 6; intel->needs_ff_sync = GL_TRUE; intel->has_luminance_srgb = GL_TRUE; } else if (IS_GEN5(intel->intelScreen->deviceID)) { - intel->gen = 5; intel->needs_ff_sync = GL_TRUE; intel->has_luminance_srgb = GL_TRUE; } else if (IS_965(intel->intelScreen->deviceID)) { - intel->gen = 4; if (IS_G4X(intel->intelScreen->deviceID)) { intel->has_luminance_srgb = GL_TRUE; intel->is_g4x = GL_TRUE; } } else if (IS_9XX(intel->intelScreen->deviceID)) { - intel->gen = 3; if (IS_945(intel->intelScreen->deviceID)) { intel->is_945 = GL_TRUE; } } else { - intel->gen = 2; if (intel->intelScreen->deviceID == PCI_CHIP_I830_M || intel->intelScreen->deviceID == PCI_CHIP_845_G) { intel->has_xrgb_textures = GL_FALSE; @@ -718,6 +716,12 @@ intelInitContext(struct intel_context *intel, ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT5] = GL_TRUE; #ifndef I915 + /* GL_ARB_texture_compression_rgtc */ + ctx->TextureFormatSupported[MESA_FORMAT_RED_RGTC1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SIGNED_RED_RGTC1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RG_RGTC2] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SIGNED_RG_RGTC2] = GL_TRUE; + /* GL_ARB_texture_rg */ ctx->TextureFormatSupported[MESA_FORMAT_R8] = GL_TRUE; ctx->TextureFormatSupported[MESA_FORMAT_R16] = GL_TRUE; @@ -936,6 +940,8 @@ intelDestroyContext(__DRIcontext * driContextPriv) intel->prim.vb = NULL; drm_intel_bo_unreference(intel->prim.vb_bo); intel->prim.vb_bo = NULL; + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; driDestroyOptionCache(&intel->optionCache); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index c59119373da..d3a8a659caa 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -182,6 +182,7 @@ struct intel_context bool is_blit; } batch; + drm_intel_bo *first_post_swapbuffers_batch; GLboolean need_throttle; GLboolean no_batch_wrap; diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index e107534a4da..3fd987abd8c 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -91,6 +91,7 @@ static const struct dri_extension card_extensions[] = { { "GL_ARB_pixel_buffer_object", NULL }, { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, { "GL_ARB_point_sprite", NULL }, + { "GL_ARB_sampler_objects", NULL }, { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, { "GL_ARB_sync", GL_ARB_sync_functions }, @@ -176,6 +177,7 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_texture_float", NULL }, #endif { "GL_MESA_texture_signed_rgba", NULL }, + { "GL_ARB_texture_compression_rgtc", NULL }, { "GL_ARB_texture_non_power_of_two", NULL }, { "GL_ARB_texture_rg", NULL }, { "GL_EXT_draw_buffers2", GL_EXT_draw_buffers2_functions }, @@ -189,6 +191,7 @@ static const struct dri_extension brw_extensions[] = { { "GL_ATI_envmap_bumpmap", GL_ATI_envmap_bumpmap_functions }, { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions }, { "GL_ATI_texture_env_combine3", NULL }, + { "GL_NV_conditional_render", NULL }, { "GL_NV_texture_env_combine4", NULL }, { NULL, NULL } }; diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 43cdd0d2bac..64c7acce1e9 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -29,6 +29,7 @@ #include "main/enums.h" #include "main/image.h" #include "main/colormac.h" +#include "main/condrender.h" #include "main/mtypes.h" #include "main/macros.h" #include "main/pbo.h" @@ -68,7 +69,7 @@ static const GLubyte *map_pbo( struct gl_context *ctx, if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, GL_COLOR_INDEX, GL_BITMAP, - (GLvoid *) bitmap)) { + INT_MAX, (const GLvoid *) bitmap)) { _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)"); return NULL; } @@ -329,6 +330,9 @@ intelBitmap(struct gl_context * ctx, { struct intel_context *intel = intel_context(ctx); + if (!_mesa_check_conditional_render(ctx)) + return; + if (do_blit_bitmap(ctx, x, y, width, height, unpack, pixels)) return; diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index a7ca780e944..e83f1bfab94 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -29,6 +29,7 @@ #include "main/image.h" #include "main/state.h" #include "main/mtypes.h" +#include "main/condrender.h" #include "drivers/common/meta.h" #include "intel_context.h" @@ -204,6 +205,9 @@ intelCopyPixels(struct gl_context * ctx, { DBG("%s\n", __FUNCTION__); + if (!_mesa_check_conditional_render(ctx)) + return; + if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) return; diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 64a21a147f0..5dacbb06633 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -216,8 +216,16 @@ intel_create_image(__DRIscreen *screen, { __DRIimage *image; struct intel_screen *intelScreen = screen->private; + uint32_t tiling; int cpp; + tiling = I915_TILING_X; + if (use & __DRI_IMAGE_USE_CURSOR) { + if (width != 64 || height != 64) + return NULL; + tiling = I915_TILING_NONE; + } + image = CALLOC(sizeof *image); if (image == NULL) return NULL; @@ -247,7 +255,7 @@ intel_create_image(__DRIscreen *screen, cpp = _mesa_get_format_bytes(image->format); image->region = - intel_region_alloc(intelScreen, I915_TILING_NONE, + intel_region_alloc(intelScreen, tiling, cpp, width, height, GL_TRUE); if (image->region == NULL) { FREE(image); @@ -548,6 +556,18 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen->deviceID = strtod(devid_override, NULL); } + if (IS_GEN6(intelScreen->deviceID)) { + intelScreen->gen = 6; + } else if (IS_GEN5(intelScreen->deviceID)) { + intelScreen->gen = 5; + } else if (IS_965(intelScreen->deviceID)) { + intelScreen->gen = 4; + } else if (IS_9XX(intelScreen->deviceID)) { + intelScreen->gen = 3; + } else { + intelScreen->gen = 2; + } + api_mask = (1 << __DRI_API_OPENGL); #if FEATURE_ES1 api_mask |= (1 << __DRI_API_GLES); @@ -660,12 +680,21 @@ intelAllocateBuffer(__DRIscreen *screen, { struct intel_buffer *intelBuffer; struct intel_screen *intelScreen = screen->private; + uint32_t tiling; intelBuffer = CALLOC(sizeof *intelBuffer); if (intelBuffer == NULL) return NULL; - intelBuffer->region = intel_region_alloc(intelScreen, I915_TILING_NONE, + if ((attachment == __DRI_BUFFER_DEPTH || + attachment == __DRI_BUFFER_STENCIL || + attachment == __DRI_BUFFER_DEPTH_STENCIL) && + intelScreen->gen >= 4) + tiling = I915_TILING_Y; + else + tiling = I915_TILING_X; + + intelBuffer->region = intel_region_alloc(intelScreen, tiling, format / 8, width, height, GL_TRUE); if (intelBuffer->region == NULL) { diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index 0f0b5be56dc..4613c9858c4 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -37,6 +37,7 @@ struct intel_screen { int deviceID; + int gen; int logTextureGranularity; diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 5e705c93619..27f2646ebf5 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -1,5 +1,6 @@ #include "main/mtypes.h" #include "main/macros.h" +#include "main/samplerobj.h" #include "intel_context.h" #include "intel_mipmap_tree.h" @@ -14,11 +15,13 @@ */ static void intel_update_max_level(struct intel_context *intel, - struct intel_texture_object *intelObj) + struct intel_texture_object *intelObj, + struct gl_sampler_object *sampler) { struct gl_texture_object *tObj = &intelObj->base; - if (tObj->Sampler.MinFilter == GL_NEAREST || tObj->Sampler.MinFilter == GL_LINEAR) { + if (sampler->MinFilter == GL_NEAREST || + sampler->MinFilter == GL_LINEAR) { intelObj->_MaxLevel = tObj->BaseLevel; } else { intelObj->_MaxLevel = tObj->_MaxLevel; @@ -70,8 +73,10 @@ copy_image_data_to_tree(struct intel_context *intel, GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) { + struct gl_context *ctx = &intel->ctx; struct gl_texture_object *tObj = intel->ctx.Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); int comp_byte = 0; int cpp; GLuint face, i; @@ -84,7 +89,7 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) /* What levels must the tree include at a minimum? */ - intel_update_max_level(intel, intelObj); + intel_update_max_level(intel, intelObj, sampler); firstImage = intel_texture_image(tObj->Image[0][tObj->BaseLevel]); /* Fallback case: diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 51b896ae91f..5c9f57b4eac 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -20,12 +20,15 @@ C_SOURCES = \ radeon_pair_translate.c \ radeon_pair_schedule.c \ radeon_pair_regalloc.c \ + radeon_pair_dead_sources.c \ radeon_dataflow.c \ radeon_dataflow_deadcode.c \ radeon_dataflow_swizzles.c \ + radeon_list.c \ radeon_optimize.c \ radeon_remove_constants.c \ radeon_rename_regs.c \ + radeon_variable.c \ r3xx_fragprog.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ @@ -48,6 +51,7 @@ INCLUDES = \ -I. \ -I$(TOP)/include \ -I$(TOP)/src/mesa \ + -I$(TOP)/src/glsl \ ##### TARGETS ##### diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript index 2b4bce1c08c..9931537492e 100755 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -3,6 +3,7 @@ Import('*') env = env.Clone() env.Append(CPPPATH = '#/include') env.Append(CPPPATH = '#/src/mesa') +env.Append(CPPPATH = '#/src/glsl') # temporary fix env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '') @@ -22,6 +23,7 @@ r300compiler = env.ConvenienceLibrary( 'radeon_pair_translate.c', 'radeon_pair_schedule.c', 'radeon_pair_regalloc.c', + 'radeon_pair_dead_sources.c', 'radeon_optimize.c', 'radeon_remove_constants.c', 'radeon_rename_regs.c', @@ -30,6 +32,8 @@ r300compiler = env.ConvenienceLibrary( 'radeon_dataflow.c', 'radeon_dataflow_deadcode.c', 'radeon_dataflow_swizzles.c', + 'radeon_variable.c', + 'radeon_list.c', 'r3xx_fragprog.c', 'r300_fragprog.c', 'r300_fragprog_swizzle.c', diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 8b73409136f..e6fd1fde62d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -93,7 +93,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r if (src.File == RC_FILE_CONSTANT) { return src.Index | (1 << 5); - } else if (src.File == RC_FILE_TEMPORARY) { + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { use_temporary(code, src.Index); return src.Index & 0x1f; } diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index 5223aaa71a4..b7bca8c0cfa 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -87,6 +87,18 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) return 0; } +/** + * Determines if the given swizzle is valid for r300/r400. In most situations + * it is better to use r300_swizzle_is_native() which can be accesed via + * struct radeon_compiler *c; c->SwizzleCaps->IsNative(). + */ +int r300_swizzle_is_native_basic(unsigned int swizzle) +{ + if(lookup_native_swizzle(swizzle)) + return 1; + else + return 0; +} /** * Check whether the given instruction supports the swizzle and negate @@ -140,7 +152,6 @@ static void r300_swizzle_split( split->NumPhases = 0; while(mask) { - const struct swizzle_data *best_swizzle = 0; unsigned int best_matchcount = 0; unsigned int best_matchmask = 0; int i, comp; @@ -167,7 +178,6 @@ static void r300_swizzle_split( } } if (matchcount > best_matchcount) { - best_swizzle = sd; best_matchcount = matchcount; best_matchmask = matchmask; if (matchmask == (mask & RC_MASK_XYZ)) diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h index 118476af132..f2635be140d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h @@ -34,5 +34,6 @@ extern struct rc_swizzle_caps r300_swizzle_caps; unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); +int r300_swizzle_is_native_basic(unsigned int swizzle); #endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 9286733635f..e2441e97d87 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -148,8 +148,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"register rename", 1, !is_r500, rc_rename_regs, NULL}, {"pair translate", 1, 1, rc_pair_translate, NULL}, {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, - {"register allocation", 1, opt, rc_pair_regalloc, NULL}, - {"dumb register allocation", 1, !opt, rc_pair_regalloc_inputs_only, NULL}, + {"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL}, + {"register allocation", 1, 1, rc_pair_regalloc, &opt}, {"final code validation", 0, 1, rc_validate_final_shader, NULL}, {"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, {"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 140eeed3de3..5e0be6b8881 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -70,6 +70,8 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) if (opcode == RC_OPCODE_TEX || opcode == RC_OPCODE_TXB || opcode == RC_OPCODE_TXP || + opcode == RC_OPCODE_TXD || + opcode == RC_OPCODE_TXL || opcode == RC_OPCODE_KIL) { if (reg.Abs) return 0; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index c7f79bc53c7..c30cd753d15 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -207,7 +207,7 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r if (src.File == RC_FILE_CONSTANT) { return src.Index | R500_RGB_ADDR0_CONST; - } else if (src.File == RC_FILE_TEMPORARY) { + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { use_temporary(code, src.Index); return src.Index; } @@ -396,6 +396,12 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst case RC_OPCODE_TXP: code->inst[ip].inst1 |= R500_TEX_INST_PROJ; break; + case RC_OPCODE_TXD: + code->inst[ip].inst1 |= R500_TEX_INST_DXDY; + break; + case RC_OPCODE_TXL: + code->inst[ip].inst1 |= R500_TEX_INST_LOD; + break; default: error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); } @@ -407,8 +413,23 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) | R500_TEX_DST_ADDR(inst->DstReg.Index) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + | (GET_SWZ(inst->TexSwizzle, 0) << 24) + | (GET_SWZ(inst->TexSwizzle, 1) << 26) + | (GET_SWZ(inst->TexSwizzle, 2) << 28) + | (GET_SWZ(inst->TexSwizzle, 3) << 30) + ; + + if (inst->Opcode == RC_OPCODE_TXD) { + use_temporary(code, inst->SrcReg[1].Index); + use_temporary(code, inst->SrcReg[2].Index); + + /* DX and DY parameters are specified in a separate register. */ + code->inst[ip].inst3 = + R500_DX_ADDR(inst->SrcReg[1].Index) | + (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) | + R500_DY_ADDR(inst->SrcReg[2].Index) | + (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24); + } return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index 15ec4418cb8..b077e7b7d65 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -124,6 +124,165 @@ unsigned swizzle_mask(unsigned swizzle, unsigned mask) return ret; } +static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info) +{ + if (info->HasTexture) { + return 0; + } + switch (info->Opcode) { + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +/** + * @return A swizzle the results from converting old_swizzle using + * conversion_swizzle + */ +unsigned int rc_adjust_channels( + unsigned int old_swizzle, + unsigned int conversion_swizzle) +{ + unsigned int i; + unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + for (i = 0; i < 4; i++) { + unsigned int new_chan = get_swz(conversion_swizzle, i); + if (new_chan == RC_SWIZZLE_UNUSED) { + continue; + } + SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i)); + } + return new_swizzle; +} + +static unsigned int rewrite_writemask( + unsigned int old_mask, + unsigned int conversion_swizzle) +{ + unsigned int new_mask = 0; + unsigned int i; + + for (i = 0; i < 4; i++) { + if (!GET_BIT(old_mask, i) + || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) { + continue; + } + new_mask |= (1 << GET_SWZ(conversion_swizzle, i)); + } + + return new_mask; +} + +/** + * This function rewrites the writemask of sub and adjusts the swizzles + * of all its source registers based on the conversion_swizzle. + * conversion_swizzle represents a mapping of the old writemask to the + * new writemask. For a detailed description of how conversion swizzles + * work see rc_rewrite_swizzle(). + */ +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + unsigned int i; + + sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle); + + if (!srcs_need_rewrite(info)) { + return ; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + sub->Arg[i].Swizzle = + rc_adjust_channels(sub->Arg[i].Swizzle, + conversion_swizzle); + } +} + +static void normal_rewrite_writemask_cb( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + unsigned int * new_mask = (unsigned int *)userdata; + src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask); +} + +/** + * This function is the same as rc_pair_rewrite_writemask() except it + * operates on normal instructions. + */ +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle) +{ + unsigned int new_mask; + struct rc_sub_instruction * sub = &inst->U.I; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + sub->DstReg.WriteMask = + rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle); + + if (info->HasTexture) { + unsigned int i; + assert(sub->TexSwizzle == RC_SWIZZLE_XYZW); + for (i = 0; i < 4; i++) { + unsigned int swz = GET_SWZ(conversion_swizzle, i); + if (swz > 3) + continue; + SET_SWZ(sub->TexSwizzle, swz, i); + } + } + + if (!srcs_need_rewrite(info)) { + return; + } + + new_mask = sub->DstReg.WriteMask; + rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask); +} + +/** + * This function replaces each value 'swz' in swizzle with the value of + * GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's + * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want + * to change all the Y's in swizzle to X, then conversion_swizzle should be + * _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then + * conversion swizzle should be YX__ (0xfc1). + * @param swizzle The swizzle to change + * @param conversion_swizzle Describes the conversion to perform on the swizzle + * @return A converted swizzle + */ +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int conversion_swizzle) +{ + unsigned int chan; + unsigned int out_swizzle = swizzle; + + for (chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(swizzle, chan); + unsigned int new_swz; + if (swz > 3) { + SET_SWZ(out_swizzle, chan, swz); + } else { + new_swz = GET_SWZ(conversion_swizzle, swz); + if (new_swz != RC_SWIZZLE_UNUSED) { + SET_SWZ(out_swizzle, chan, new_swz); + } else { + SET_SWZ(out_swizzle, chan, swz); + } + } + } + return out_swizzle; +} + /** * Left multiplication of a register with a swizzle */ @@ -281,3 +440,197 @@ unsigned int rc_inst_can_use_presub( return 1; } +struct max_data { + unsigned int Max; + unsigned int HasFileType; + rc_register_file File; +}; + +static void max_callback( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct max_data * d = (struct max_data*)userdata; + if (file == d->File && (!d->HasFileType || index > d->Max)) { + d->Max = index; + d->HasFileType = 1; + } +} + +/** + * @return The maximum index of the specified register file used by the + * program. + */ +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file) +{ + struct max_data data; + data.Max = 0; + data.HasFileType = 0; + data.File = file; + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, max_callback, &data); + rc_for_all_writes_mask(inst, max_callback, &data); + } + if (!data.HasFileType) { + return -1; + } else { + return data.Max; + } +} + +static unsigned int get_source_readmask( + struct rc_pair_sub_instruction * sub, + unsigned int source, + unsigned int src_type) +{ + unsigned int i; + unsigned int readmask = 0; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + + for (i = 0; i < info->NumSrcRegs; i++) { + if (sub->Arg[i].Source != source + || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) { + continue; + } + readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle); + } + return readmask; +} + +/** + * This function attempts to remove a source from a pair instructions. + * @param inst + * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd + * @param source The index of the source to remove + * @param new_readmask A mask representing the components that are read by + * the source that is intended to replace the one you are removing. If you + * want to remove a source only and not replace it, this parameter should be + * zero. + * @return 1 if the source was successfully removed, 0 if it was not + */ +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask) +{ + unsigned int readmask = 0; + + readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type); + readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type); + + if ((new_readmask & readmask) != readmask) + return 0; + + if (src_type & RC_SOURCE_RGB) { + memset(&inst->U.P.RGB.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + if (src_type & RC_SOURCE_ALPHA) { + memset(&inst->U.P.Alpha.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + return 1; +} + +/** + * @return RC_OPCODE_NOOP if inst is not a flow control instruction. + * @return The opcode of inst if it is a flow control instruction. + */ +rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst) +{ + const struct rc_opcode_info * info; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(inst->U.I.Opcode); + } else { + info = rc_get_opcode_info(inst->U.P.RGB.Opcode); + /*A flow control instruction shouldn't have an alpha + * instruction.*/ + assert(!info->IsFlowControl || + inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); + } + + if (info->IsFlowControl) + return info->Opcode; + else + return RC_OPCODE_NOP; + +} + +/** + * @return The BGNLOOP instruction that starts the loop ended by endloop. + */ +struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop) +{ + unsigned int endloop_count = 0; + struct rc_instruction * inst; + for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + endloop_count++; + } else if (op == RC_OPCODE_BGNLOOP) { + if (endloop_count == 0) { + return inst; + } else { + endloop_count--; + } + } + } + return NULL; +} + +/** + * @return The ENDLOOP instruction that ends the loop started by bgnloop. + */ +struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop) +{ + unsigned int bgnloop_count = 0; + struct rc_instruction * inst; + for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + bgnloop_count++; + } else if (op == RC_OPCODE_ENDLOOP) { + if (bgnloop_count == 0) { + return inst; + } else { + bgnloop_count--; + } + } + } + return NULL; +} + +/** + * @return A conversion swizzle for converting from old_mask->new_mask + */ +unsigned int rc_make_conversion_swizzle( + unsigned int old_mask, + unsigned int new_mask) +{ + unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + unsigned int old_idx; + unsigned int new_idx = 0; + for (old_idx = 0; old_idx < 4; old_idx++) { + if (!GET_BIT(old_mask, old_idx)) + continue; + for ( ; new_idx < 4; new_idx++) { + if (GET_BIT(new_mask, new_idx)) { + SET_SWZ(conversion_swizzle, old_idx, new_idx); + new_idx++; + break; + } + } + } + return conversion_swizzle; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index dd0f6c66156..2af289dfabd 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -3,7 +3,12 @@ #ifndef RADEON_PROGRAM_UTIL_H #define RADEON_PROGRAM_UTIL_H +#include "radeon_opcodes.h" + +struct radeon_compiler; struct rc_instruction; +struct rc_pair_instruction; +struct rc_pair_sub_instruction; struct rc_src_register; unsigned int rc_swizzle_to_writemask(unsigned int swz); @@ -22,6 +27,22 @@ rc_swizzle rc_mask_to_swizzle(unsigned int mask); unsigned swizzle_mask(unsigned swizzle, unsigned mask); +unsigned int rc_adjust_channels( + unsigned int old_swizzle, + unsigned int conversion_swizzle); + +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle); + +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle); + +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int new_mask); + struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); void reset_srcreg(struct rc_src_register* reg); @@ -46,4 +67,23 @@ unsigned int rc_inst_can_use_presub( struct rc_src_register presub_src0, struct rc_src_register presub_src1); +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file); + +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask); + +rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst); + +struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop); +struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop); + +unsigned int rc_make_conversion_swizzle( + unsigned int old_mask, + unsigned int new_mask); + #endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index d1a7eab50f7..b0deb751be0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -151,6 +151,7 @@ static void pair_sub_for_all_args( unsigned int presub_src_count; struct rc_pair_instruction_source * src_array; unsigned int j; + if (src_type & RC_SOURCE_RGB) { presub_type = fullinst-> U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; @@ -446,30 +447,6 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v remap_pair_instruction(inst, cb, userdata); } -/** - * @return RC_OPCODE_NOOP if inst is not a flow control instruction. - * @return The opcode of inst if it is a flow control instruction. - */ -static rc_opcode get_flow_control_inst(struct rc_instruction * inst) -{ - const struct rc_opcode_info * info; - if (inst->Type == RC_INSTRUCTION_NORMAL) { - info = rc_get_opcode_info(inst->U.I.Opcode); - } else { - info = rc_get_opcode_info(inst->U.P.RGB.Opcode); - /*A flow control instruction shouldn't have an alpha - * instruction.*/ - assert(!info->IsFlowControl || - inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); - } - - if (info->IsFlowControl) - return info->Opcode; - else - return RC_OPCODE_NOP; - -} - struct branch_write_mask { unsigned int IfWriteMask:4; unsigned int ElseWriteMask:4; @@ -495,12 +472,11 @@ struct get_readers_callback_data { struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; }; -static void add_reader( +static struct rc_reader * add_reader( struct memory_pool * pool, struct rc_reader_data * data, struct rc_instruction * inst, - unsigned int mask, - void * arg_or_src) + unsigned int mask) { struct rc_reader * new; memory_pool_array_reserve(pool, struct rc_reader, data->Readers, @@ -508,11 +484,32 @@ static void add_reader( new = &data->Readers[data->ReaderCount++]; new->Inst = inst; new->WriteMask = mask; - if (inst->Type == RC_INSTRUCTION_NORMAL) { - new->U.Src = arg_or_src; - } else { - new->U.Arg = arg_or_src; - } + return new; +} + +static void add_reader_normal( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask, + struct rc_src_register * src) +{ + struct rc_reader * new = add_reader(pool, data, inst, mask); + new->U.I.Src = src; +} + + +static void add_reader_pair( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + struct rc_reader * new = add_reader(pool, data, inst, mask); + new->U.P.Src = src; + new->U.P.Arg = arg; } static unsigned int get_readers_read_callback( @@ -544,6 +541,11 @@ static unsigned int get_readers_read_callback( return shared_mask; } + if (cb_data->ReaderData->LoopDepth > 0) { + cb_data->ReaderData->AbortOnWrite |= + (read_mask & cb_data->AliveWriteMask); + } + /* XXX The behavior in this case should be configurable. */ if ((read_mask & cb_data->AliveWriteMask) != read_mask) { cb_data->ReaderData->Abort = 1; @@ -572,10 +574,10 @@ static void get_readers_pair_read_callback( if (d->ReadPairCB) d->ReadPairCB(d->ReaderData, inst, arg, src); - if (d->ReaderData->Abort) + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) return; - add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, arg); + add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src); } /** @@ -600,10 +602,10 @@ static void get_readers_normal_read_callback( if (d->ReadNormalCB) d->ReadNormalCB(d->ReaderData, inst, src); - if (d->ReaderData->Abort) + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) return; - add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, src); + add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src); } /** @@ -624,12 +626,57 @@ static void get_readers_write_callback( unsigned int shared_mask = mask & d->DstMask; d->ReaderData->AbortOnRead &= ~shared_mask; d->AliveWriteMask &= ~shared_mask; + if (d->ReaderData->AbortOnWrite & shared_mask) { + d->ReaderData->Abort = 1; + } } if(d->WriteCB) d->WriteCB(d->ReaderData, inst, file, index, mask); } +static void push_branch_mask( + struct get_readers_callback_data * d, + unsigned int * branch_depth) +{ + (*branch_depth)++; + if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { + d->ReaderData->Abort = 1; + return; + } + d->BranchMasks[*branch_depth].IfWriteMask = + d->AliveWriteMask; +} + +static void pop_branch_mask( + struct get_readers_callback_data * d, + unsigned int * branch_depth) +{ + struct branch_write_mask * masks = &d->BranchMasks[*branch_depth]; + + if (masks->HasElse) { + /* Abort on read for components that were written in the IF + * block. */ + d->ReaderData->AbortOnRead |= + masks->IfWriteMask & ~masks->ElseWriteMask; + /* Abort on read for components that were written in the ELSE + * block. */ + d->ReaderData->AbortOnRead |= + masks->ElseWriteMask & ~d->AliveWriteMask; + + d->AliveWriteMask = masks->IfWriteMask + ^ ((masks->IfWriteMask ^ masks->ElseWriteMask) + & (masks->IfWriteMask ^ d->AliveWriteMask)); + } else { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask & ~d->AliveWriteMask; + d->AliveWriteMask = masks->IfWriteMask; + + } + memset(masks, 0, sizeof(struct branch_write_mask)); + (*branch_depth)--; +} + static void get_readers_for_single_write( void * userdata, struct rc_instruction * writer, @@ -639,10 +686,14 @@ static void get_readers_for_single_write( { struct rc_instruction * tmp; unsigned int branch_depth = 0; + struct rc_instruction * endloop = NULL; + unsigned int abort_on_read_at_endloop; struct get_readers_callback_data * d = userdata; d->ReaderData->Writer = writer; d->ReaderData->AbortOnRead = 0; + d->ReaderData->AbortOnWrite = 0; + d->ReaderData->LoopDepth = 0; d->ReaderData->InElse = 0; d->DstFile = dst_file; d->DstIndex = dst_index; @@ -655,32 +706,43 @@ static void get_readers_for_single_write( for(tmp = writer->Next; tmp != &d->C->Program.Instructions; tmp = tmp->Next){ - rc_opcode opcode = get_flow_control_inst(tmp); + rc_opcode opcode = rc_get_flow_control_inst(tmp); switch(opcode) { case RC_OPCODE_BGNLOOP: - /* XXX We can do better when we see a BGNLOOP if we - * add a flag called AbortOnWrite to struct - * rc_reader_data and leave it set until the next - * ENDLOOP. */ + d->ReaderData->LoopDepth++; + push_branch_mask(d, &branch_depth); + break; case RC_OPCODE_ENDLOOP: - /* XXX We can do better when we see an ENDLOOP by - * searching backwards from writer and looking for - * readers of writer's destination index. If we find a - * reader before we get to the BGNLOOP, we must abort - * unless there is another writer between that reader - * and the BGNLOOP. */ - case RC_OPCODE_BRK: - case RC_OPCODE_CONT: - d->ReaderData->Abort = 1; - return; - case RC_OPCODE_IF: - branch_depth++; - if (branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { - d->ReaderData->Abort = 1; - return; + if (d->ReaderData->LoopDepth > 0) { + d->ReaderData->LoopDepth--; + if (d->ReaderData->LoopDepth == 0) { + d->ReaderData->AbortOnWrite = 0; + } + pop_branch_mask(d, &branch_depth); + } else { + /* Here we have reached an ENDLOOP without + * seeing its BGNLOOP. These means that + * the writer was written inside of a loop, + * so it could have readers that are above it + * (i.e. they have a lower IP). To find these + * readers we jump to the BGNLOOP instruction + * and check each instruction until we get + * back to the writer. + */ + endloop = tmp; + tmp = rc_match_endloop(tmp); + if (!tmp) { + rc_error(d->C, "Failed to match endloop.\n"); + d->ReaderData->Abort = 1; + return; + } + abort_on_read_at_endloop = d->ReaderData->AbortOnRead; + d->ReaderData->AbortOnRead |= d->AliveWriteMask; + continue; } - d->BranchMasks[branch_depth].IfWriteMask = - d->AliveWriteMask; + break; + case RC_OPCODE_IF: + push_branch_mask(d, &branch_depth); break; case RC_OPCODE_ELSE: if (branch_depth == 0) { @@ -700,35 +762,7 @@ static void get_readers_for_single_write( d->ReaderData->InElse = 0; } else { - struct branch_write_mask * masks = - &d->BranchMasks[branch_depth]; - - if (masks->HasElse) { - /* Abort on read for components that - * were written in the IF block. */ - d->ReaderData->AbortOnRead |= - masks->IfWriteMask - & ~masks->ElseWriteMask; - /* Abort on read for components that - * were written in the ELSE block. */ - d->ReaderData->AbortOnRead |= - masks->ElseWriteMask - & ~d->AliveWriteMask; - d->AliveWriteMask = masks->IfWriteMask - ^ ((masks->IfWriteMask ^ - masks->ElseWriteMask) - & (masks->IfWriteMask - ^ d->AliveWriteMask)); - } else { - d->ReaderData->AbortOnRead |= - masks->IfWriteMask - & ~d->AliveWriteMask; - d->AliveWriteMask = masks->IfWriteMask; - - } - memset(masks, 0, - sizeof(struct branch_write_mask)); - branch_depth--; + pop_branch_mask(d, &branch_depth); } break; default: @@ -745,9 +779,17 @@ static void get_readers_for_single_write( rc_pair_for_all_reads_arg(tmp, get_readers_pair_read_callback, d); } + + /* This can happen when we jump from an ENDLOOP to BGNLOOP */ + if (tmp == writer) { + tmp = endloop; + endloop = NULL; + d->ReaderData->AbortOnRead = abort_on_read_at_endloop; + continue; + } rc_for_all_writes_mask(tmp, get_readers_write_callback, d); - if (d->ReaderData->Abort) + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) return; if (branch_depth == 0 && !d->AliveWriteMask) @@ -755,6 +797,26 @@ static void get_readers_for_single_write( } } +static void init_get_readers_callback_data( + struct get_readers_callback_data * d, + struct rc_reader_data * reader_data, + struct radeon_compiler * c, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + reader_data->Abort = 0; + reader_data->ReaderCount = 0; + reader_data->ReadersReserved = 0; + reader_data->Readers = NULL; + + d->C = c; + d->ReaderData = reader_data; + d->ReadNormalCB = read_normal_cb; + d->ReadPairCB = read_pair_cb; + d->WriteCB = write_cb; +} + /** * This function will create a list of readers via the rc_reader_data struct. * This function will abort (set the flag data->Abort) and return if it @@ -803,16 +865,28 @@ void rc_get_readers( { struct get_readers_callback_data d; - data->Abort = 0; - data->ReaderCount = 0; - data->ReadersReserved = 0; - data->Readers = NULL; - - d.C = c; - d.ReaderData = data; - d.ReadNormalCB = read_normal_cb; - d.ReadPairCB = read_pair_cb; - d.WriteCB = write_cb; + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); } + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + struct get_readers_callback_data d; + + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); + + if (sub_writer->WriteMask) { + get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY, + sub_writer->DestIndex, sub_writer->WriteMask); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index ef971c5b234..d8a627258ea 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -37,6 +37,7 @@ struct rc_swizzle_caps; struct rc_src_register; struct rc_pair_instruction_arg; struct rc_pair_instruction_source; +struct rc_pair_sub_instruction; struct rc_compiler; @@ -74,14 +75,21 @@ struct rc_reader { struct rc_instruction * Inst; unsigned int WriteMask; union { - struct rc_src_register * Src; - struct rc_pair_instruction_arg * Arg; + struct { + struct rc_src_register * Src; + } I; + struct { + struct rc_pair_instruction_arg * Arg; + struct rc_pair_instruction_source * Src; + } P; } U; }; struct rc_reader_data { unsigned int Abort; unsigned int AbortOnRead; + unsigned int AbortOnWrite; + unsigned int LoopDepth; unsigned int InElse; struct rc_instruction * Writer; @@ -89,6 +97,9 @@ struct rc_reader_data { unsigned int ReadersReserved; struct rc_reader * Readers; + /* If this flag is enabled, rc_get_readers will exit as soon possbile + * after the Abort flag is set.*/ + unsigned int ExitOnAbort; void * CbData; }; @@ -99,6 +110,15 @@ void rc_get_readers( rc_read_src_fn read_normal_cb, rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb); + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb); /** * Compiler passes based on dataflow analysis. */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_list.c b/src/mesa/drivers/dri/r300/compiler/radeon_list.c new file mode 100644 index 00000000000..811c908a81a --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_list.c @@ -0,0 +1,90 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_list.h" + +#include <stdlib.h> +#include <stdio.h> + +#include "memory_pool.h" + +struct rc_list * rc_list(struct memory_pool * pool, void * item) +{ + struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list)); + new->Item = item; + new->Next = NULL; + new->Prev = NULL; + + return new; +} + +void rc_list_add(struct rc_list ** list, struct rc_list * new_value) +{ + struct rc_list * temp; + + if (*list == NULL) { + *list = new_value; + return; + } + + for (temp = *list; temp->Next; temp = temp->Next); + + temp->Next = new_value; + new_value->Prev = temp; +} + +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value) +{ + if (*list == rm_value) { + *list = rm_value->Next; + return; + } + + rm_value->Prev->Next = rm_value->Next; + if (rm_value->Next) { + rm_value->Next->Prev = rm_value->Prev; + } +} + +unsigned int rc_list_count(struct rc_list * list) +{ + unsigned int count = 0; + while (list) { + count++; + list = list->Next; + } + return count; +} + +void rc_list_print(struct rc_list * list) +{ + while(list) { + fprintf(stderr, "%p->", list->Item); + list = list->Next; + } + fprintf(stderr, "\n"); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_list.h b/src/mesa/drivers/dri/r300/compiler/radeon_list.h new file mode 100644 index 00000000000..b3c8f89cc68 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_list.h @@ -0,0 +1,46 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_LIST_H +#define RADEON_LIST_H + +struct memory_pool; + +struct rc_list { + void * Item; + struct rc_list * Prev; + struct rc_list * Next; +}; + +struct rc_list * rc_list(struct memory_pool * pool, void * item); +void rc_list_add(struct rc_list ** list, struct rc_list * new_value); +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value); +unsigned int rc_list_count(struct rc_list * list); +void rc_list_print(struct rc_list * list); + +#endif /* RADEON_LIST_H */ + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index e3e498e8fb4..afd78ad79dd 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -481,6 +481,7 @@ void rc_compute_sources_for_writemask( break; case RC_OPCODE_TXB: case RC_OPCODE_TXP: + case RC_OPCODE_TXL: srcmasks[0] |= RC_MASK_W; /* Fall through */ case RC_OPCODE_TEX: @@ -500,6 +501,33 @@ void rc_compute_sources_for_writemask( break; } break; + case RC_OPCODE_TXD: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_Y; + /* Fall through. */ + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + srcmasks[1] |= RC_MASK_X; + srcmasks[2] |= RC_MASK_X; + break; + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_Z; + /* Fall through. */ + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + srcmasks[2] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + srcmasks[2] |= RC_MASK_XYZ; + break; + } + break; case RC_OPCODE_DST: srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; srcmasks[1] |= RC_MASK_Y | RC_MASK_W; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 79898e1047e..5b4fba80873 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -91,6 +91,8 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, (inst->U.I.Opcode == RC_OPCODE_TEX || inst->U.I.Opcode == RC_OPCODE_TXB || inst->U.I.Opcode == RC_OPCODE_TXP || + inst->U.I.Opcode == RC_OPCODE_TXD || + inst->U.I.Opcode == RC_OPCODE_TXL || inst->U.I.Opcode == RC_OPCODE_KIL)){ reader_data->Abort = 1; return; @@ -144,6 +146,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i return; /* Get a list of all the readers of this MOV instruction. */ + reader_data.ExitOnAbort = 1; rc_get_readers(c, inst_mov, &reader_data, copy_propagate_scan_read, NULL, is_src_clobbered_scan_write); @@ -154,7 +157,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i /* Propagate the MOV instruction. */ for (i = 0; i < reader_data.ReaderCount; i++) { struct rc_instruction * inst = reader_data.Readers[i].Inst; - *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]); + *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) inst->U.I.PreSub = inst_mov->U.I.PreSub; @@ -453,6 +456,7 @@ static int presub_helper( rc_presubtract_op cb_op = presub_opcode; reader_data.CbData = &cb_op; + reader_data.ExitOnAbort = 1; rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, is_src_clobbered_scan_write); @@ -466,7 +470,7 @@ static int presub_helper( rc_get_opcode_info(reader.Inst->U.I.Opcode); for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { - if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src) + if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) presub_replace(inst_add, reader.Inst, src_index); } } @@ -619,13 +623,11 @@ static int peephole_add_presub_inv( struct radeon_compiler * c, struct rc_instruction * inst_add) { - unsigned int i, swz, mask; + unsigned int i, swz; if (!is_presub_candidate(c, inst_add)) return 0; - mask = inst_add->U.I.DstReg.WriteMask; - /* Check if src0 is 1. */ /* XXX It would be nice to use is_src_uniform_constant here, but that * function only works if the register's file is RC_FILE_NONE */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_dead_sources.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_dead_sources.c new file mode 100644 index 00000000000..1e9a2c09d44 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_dead_sources.c @@ -0,0 +1,62 @@ + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_opcodes.h" +#include "radeon_program_pair.h" + +static void mark_used_presub(struct rc_pair_sub_instruction * sub) +{ + if (sub->Src[RC_PAIR_PRESUB_SRC].Used) { + unsigned int presub_reg_count = rc_presubtract_src_reg_count( + sub->Src[RC_PAIR_PRESUB_SRC].Index); + unsigned int i; + for (i = 0; i < presub_reg_count; i++) { + sub->Src[i].Used = 1; + } + } +} + +static void mark_used( + struct rc_instruction * inst, + struct rc_pair_sub_instruction * sub) +{ + unsigned int i; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle); + if (src_type & RC_SOURCE_RGB) { + inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1; + } + + if (src_type & RC_SOURCE_ALPHA) { + inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1; + } + } +} + +/** + * This pass finds sources that are not used by their instruction and marks + * them as unused. + */ +void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + unsigned int i; + if (inst->Type == RC_INSTRUCTION_NORMAL) + continue; + + /* Mark all sources as unused */ + for (i = 0; i < 4; i++) { + inst->U.P.RGB.Src[i].Used = 0; + inst->U.P.Alpha.Src[i].Used = 0; + } + mark_used(inst, &inst->U.P.RGB); + mark_used(inst, &inst->U.P.Alpha); + + mark_used_presub(&inst->U.P.RGB); + mark_used_presub(&inst->U.P.Alpha); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index d53181e1f75..49983d6ce75 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2011 Tom Stellard <[email protected]> * * All Rights Reserved. * @@ -29,125 +30,126 @@ #include <stdio.h> +#include "main/glheader.h" +#include "program/register_allocate.h" +#include "ralloc.h" + +#include "r300_fragprog_swizzle.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" - +#include "radeon_list.h" +#include "radeon_variable.h" #define VERBOSE 0 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) -struct live_intervals { - int Start; - int End; - struct live_intervals * Next; -}; struct register_info { - struct live_intervals Live; + struct live_intervals Live[4]; unsigned int Used:1; unsigned int Allocated:1; unsigned int File:3; unsigned int Index:RC_REGISTER_INDEX_BITS; -}; - -struct hardware_register { - struct live_intervals * Used; + unsigned int Writemask; }; struct regalloc_state { struct radeon_compiler * C; - struct register_info Input[RC_REGISTER_MAX_INDEX]; - struct register_info Temporary[RC_REGISTER_MAX_INDEX]; - - struct hardware_register * HwTemporary; - unsigned int NumHwTemporaries; - /** - * If an instruction is inside of a loop, EndLoop will be the - * IP of the ENDLOOP instruction, and BeginLoop will be the IP - * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop - * will be -1. - */ - int EndLoop; - int BeginLoop; + struct register_info * Input; + unsigned int NumInputs; + + struct register_info * Temporary; + unsigned int NumTemporaries; + + unsigned int Simple; + int LoopEnd; +}; + +enum rc_reg_class { + RC_REG_CLASS_SINGLE, + RC_REG_CLASS_DOUBLE, + RC_REG_CLASS_TRIPLE, + RC_REG_CLASS_ALPHA, + RC_REG_CLASS_SINGLE_PLUS_ALPHA, + RC_REG_CLASS_DOUBLE_PLUS_ALPHA, + RC_REG_CLASS_TRIPLE_PLUS_ALPHA, + RC_REG_CLASS_X, + RC_REG_CLASS_Y, + RC_REG_CLASS_Z, + RC_REG_CLASS_XY, + RC_REG_CLASS_YZ, + RC_REG_CLASS_XZ, + RC_REG_CLASS_XW, + RC_REG_CLASS_YW, + RC_REG_CLASS_ZW, + RC_REG_CLASS_XYW, + RC_REG_CLASS_YZW, + RC_REG_CLASS_XZW, + RC_REG_CLASS_COUNT +}; + +struct rc_class { + enum rc_reg_class Class; + + unsigned int WritemaskCount; + + /** This is 1 if this class is being used by the register allocator + * and 0 otherwise */ + unsigned int Used; + + /** This is the ID number assigned to this class by ra. */ + unsigned int Id; + + /** List of writemasks that belong to this class */ + unsigned int Writemasks[3]; + + }; static void print_live_intervals(struct live_intervals * src) { - if (!src) { + if (!src || !src->Used) { DBG("(null)"); return; } - while(src) { - DBG("(%i,%i)", src->Start, src->End); - src = src->Next; - } + DBG("(%i,%i)", src->Start, src->End); } -static void add_live_intervals(struct regalloc_state * s, - struct live_intervals ** dst, struct live_intervals * src) +static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) { - struct live_intervals ** dst_backup = dst; - if (VERBOSE) { - DBG("add_live_intervals: "); - print_live_intervals(*dst); + DBG("overlap_live_intervals: "); + print_live_intervals(a); DBG(" to "); - print_live_intervals(src); - DBG("\n"); - } - - while(src) { - if (*dst && (*dst)->End < src->Start) { - dst = &(*dst)->Next; - } else if (!*dst || (*dst)->Start > src->End) { - struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li)); - li->Start = src->Start; - li->End = src->End; - li->Next = *dst; - *dst = li; - src = src->Next; - } else { - if (src->End > (*dst)->End) - (*dst)->End = src->End; - if (src->Start < (*dst)->Start) - (*dst)->Start = src->Start; - src = src->Next; - } - } - - if (VERBOSE) { - DBG(" result: "); - print_live_intervals(*dst_backup); + print_live_intervals(b); DBG("\n"); } -} -static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src) -{ - if (VERBOSE) { - DBG("overlap_live_intervals: "); - print_live_intervals(dst); - DBG(" to "); - print_live_intervals(src); - DBG("\n"); + if (!a->Used || !b->Used) { + DBG(" unused interval\n"); + return 0; } - while(src && dst) { - if (dst->End <= src->Start) { - dst = dst->Next; - } else if (dst->End <= src->End) { + if (a->Start > b->Start) { + if (a->Start < b->End) { + DBG(" overlap\n"); + return 1; + } + } else if (b->Start > a->Start) { + if (b->Start < a->End) { DBG(" overlap\n"); return 1; - } else if (dst->Start < src->End) { + } + } else { /* a->Start == b->Start */ + if (a->Start != a->End && b->Start != b->End) { DBG(" overlap\n"); return 1; - } else { - src = src->Next; } } @@ -156,92 +158,27 @@ static int overlap_live_intervals(struct live_intervals * dst, struct live_inter return 0; } -static int try_add_live_intervals(struct regalloc_state * s, - struct live_intervals ** dst, struct live_intervals * src) -{ - if (overlap_live_intervals(*dst, src)) - return 0; - - add_live_intervals(s, dst, src); - return 1; -} - -static void scan_callback(void * data, struct rc_instruction * inst, +static void scan_read_callback(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { struct regalloc_state * s = data; struct register_info * reg; + unsigned int i; - if (file == RC_FILE_TEMPORARY) - reg = &s->Temporary[index]; - else if (file == RC_FILE_INPUT) - reg = &s->Input[index]; - else + if (file != RC_FILE_INPUT) return; - if (!reg->Used) { - reg->Used = 1; - if (file == RC_FILE_INPUT) - reg->Live.Start = -1; - else if (s->BeginLoop >= 0) - reg->Live.Start = s->BeginLoop; - else - reg->Live.Start = inst->IP; - reg->Live.End = inst->IP; - } else if (s->EndLoop >= 0) - reg->Live.End = s->EndLoop; - else if (inst->IP > reg->Live.End) - reg->Live.End = inst->IP; -} + s->Input[index].Used = 1; + reg = &s->Input[index]; -static void compute_live_intervals(struct radeon_compiler *c, - struct regalloc_state *s) -{ - memset(s, 0, sizeof(*s)); - s->C = c; - s->NumHwTemporaries = c->max_temp_regs; - s->BeginLoop = -1; - s->EndLoop = -1; - s->HwTemporary = - memory_pool_malloc(&c->Pool, - s->NumHwTemporaries * sizeof(struct hardware_register)); - memset(s->HwTemporary, 0, s->NumHwTemporaries * sizeof(struct hardware_register)); - - rc_recompute_ips(s->C); - - for(struct rc_instruction * inst = s->C->Program.Instructions.Next; - inst != &s->C->Program.Instructions; - inst = inst->Next) { - - /* For all instructions inside of a loop, the ENDLOOP - * instruction is used as the end of the live interval and - * the BGNLOOP instruction is used as the beginning. */ - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) { - int loops = 1; - struct rc_instruction * tmp; - s->BeginLoop = inst->IP; - for(tmp = inst->Next; - tmp != &s->C->Program.Instructions; - tmp = tmp->Next) { - if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) { - loops++; - } else if (tmp->U.I.Opcode - == RC_OPCODE_ENDLOOP) { - if(!--loops) { - s->EndLoop = tmp->IP; - break; - } - } - } - } - - if (inst->IP == s->EndLoop) { - s->EndLoop = -1; - s->BeginLoop = -1; + for (i = 0; i < 4; i++) { + if (!((mask >> i) & 0x1)) { + continue; } - - rc_for_all_reads_mask(inst, scan_callback, s); - rc_for_all_writes_mask(inst, scan_callback, s); + reg->Live[i].Used = 1; + reg->Live[i].Start = 0; + reg->Live[i].End = + s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; } } @@ -251,7 +188,7 @@ static void remap_register(void * data, struct rc_instruction * inst, struct regalloc_state * s = data; const struct register_info * reg; - if (*file == RC_FILE_TEMPORARY) + if (*file == RC_FILE_TEMPORARY && s->Simple) reg = &s->Temporary[*index]; else if (*file == RC_FILE_INPUT) reg = &s->Input[*index]; @@ -259,106 +196,511 @@ static void remap_register(void * data, struct rc_instruction * inst, return; if (reg->Allocated) { - *file = reg->File; *index = reg->Index; } } -static void do_regalloc(struct regalloc_state * s) +static void alloc_input_simple(void * data, unsigned int input, + unsigned int hwreg) +{ + struct regalloc_state * s = data; + + if (input >= s->NumInputs) + return; + + s->Input[input].Allocated = 1; + s->Input[input].File = RC_FILE_TEMPORARY; + s->Input[input].Index = hwreg; +} + +/* This functions offsets the temporary register indices by the number + * of input registers, because input registers are actually temporaries and + * should not occupy the same space. + * + * This pass is supposed to be used to maintain correct allocation of inputs + * if the standard register allocation is disabled. */ +static void do_regalloc_inputs_only(struct regalloc_state * s) +{ + for (unsigned i = 0; i < s->NumTemporaries; i++) { + s->Temporary[i].Allocated = 1; + s->Temporary[i].File = RC_FILE_TEMPORARY; + s->Temporary[i].Index = i + s->NumInputs; + } +} + +static unsigned int is_derivative(rc_opcode op) { - /* Simple and stupid greedy register allocation */ - for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { - struct register_info * reg = &s->Temporary[index]; + return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); +} - if (!reg->Used) +static int find_class( + struct rc_class * classes, + unsigned int writemask, + unsigned int max_writemask_count) +{ + unsigned int i; + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + unsigned int j; + if (classes[i].WritemaskCount > max_writemask_count) { continue; + } + for (j = 0; j < 3; j++) { + if (classes[i].Writemasks[j] == writemask) { + return i; + } + } + } + return -1; +} + +static enum rc_reg_class variable_get_class( + struct rc_variable * variable, + struct rc_class * classes) +{ + unsigned int i; + unsigned int can_change_writemask= 1; + unsigned int writemask = rc_variable_writemask_sum(variable); + struct rc_list * readers = rc_variable_readers_union(variable); + int class_index; + + if (!variable->C->is_r500) { + struct rc_class c; + /* The assumption here is that if an instruction has type + * RC_INSTRUCTION_NORMAL then it is a TEX instruction. + * r300 and r400 can't swizzle the result of a TEX lookup. */ + if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = RC_MASK_XYZW; + } - for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) { - if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, ®->Live)) { - reg->Allocated = 1; - reg->File = RC_FILE_TEMPORARY; - reg->Index = hwreg; - goto success; + /* Check if it is possible to do swizzle packing for r300/r400 + * without creating non-native swizzles. */ + class_index = find_class(classes, writemask, 3); + if (class_index < 0) { + goto error; + } + c = classes[class_index]; + for (i = 0; i < c.WritemaskCount; i++) { + int j; + unsigned int conversion_swizzle = + rc_make_conversion_swizzle( + writemask, c.Writemasks[i]); + for (j = 0; j < variable->ReaderCount; j++) { + unsigned int old_swizzle; + unsigned int new_swizzle; + struct rc_reader r = variable->Readers[j]; + if (r.Inst->Type == RC_INSTRUCTION_PAIR ) { + old_swizzle = r.U.P.Arg->Swizzle; + } else { + old_swizzle = r.U.I.Src->Swizzle; + } + new_swizzle = rc_adjust_channels( + old_swizzle, conversion_swizzle); + if (!r300_swizzle_is_native_basic(new_swizzle)) { + can_change_writemask = 0; + break; + } + } + if (!can_change_writemask) { + break; } } + } - rc_error(s->C, "Ran out of hardware temporaries\n"); - return; + if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { + /* DDX/DDY seem to always fail when their writemasks are + * changed.*/ + if (is_derivative(variable->Inst->U.P.RGB.Opcode) + || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + } + } + for ( ; readers; readers = readers->Next) { + struct rc_reader * r = readers->Item; + if (r->Inst->Type == RC_INSTRUCTION_PAIR) { + if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { + can_change_writemask = 0; + break; + } + /* DDX/DDY also fail when their swizzles are changed. */ + if (is_derivative(r->Inst->U.P.RGB.Opcode) + || is_derivative(r->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + break; + } + } + } - success:; + class_index = find_class(classes, writemask, + can_change_writemask ? 3 : 1); + if (class_index > -1) { + return classes[class_index].Class; + } else { +error: + rc_error(variable->C, + "Could not find class for index=%u mask=%u\n", + variable->Dst.Index, writemask); + return 0; } +} - /* Rewrite all instructions based on the translation table we built */ - for(struct rc_instruction * inst = s->C->Program.Instructions.Next; - inst != &s->C->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, &remap_register, s); +static unsigned int overlap_live_intervals_array( + struct live_intervals * a, + struct live_intervals * b) +{ + unsigned int a_chan, b_chan; + for (a_chan = 0; a_chan < 4; a_chan++) { + for (b_chan = 0; b_chan < 4; b_chan++) { + if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { + return 1; + } + } } + return 0; } -static void alloc_input(void * data, unsigned int input, unsigned int hwreg) +static unsigned int reg_get_index(int reg) { - struct regalloc_state * s = data; + return reg / RC_MASK_XYZW; +} - if (!s->Input[input].Used) - return; +static unsigned int reg_get_writemask(int reg) +{ + return (reg % RC_MASK_XYZW) + 1; +} - add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live); +static int get_reg_id(unsigned int index, unsigned int writemask) +{ + assert(writemask); + if (writemask == 0) { + return 0; + } + return (index * RC_MASK_XYZW) + (writemask - 1); +} - s->Input[input].Allocated = 1; - s->Input[input].File = RC_FILE_TEMPORARY; - s->Input[input].Index = hwreg; +#if VERBOSE +static void print_reg(int reg) +{ + unsigned int index = reg_get_index(reg); + unsigned int mask = reg_get_writemask(reg); + fprintf(stderr, "Temp[%u].%c%c%c%c", index, + mask & RC_MASK_X ? 'x' : '_', + mask & RC_MASK_Y ? 'y' : '_', + mask & RC_MASK_Z ? 'z' : '_', + mask & RC_MASK_W ? 'w' : '_'); +} +#endif +static void add_register_conflicts( + struct ra_regs * regs, + unsigned int max_temp_regs) +{ + unsigned int index, a_mask, b_mask; + for (index = 0; index < max_temp_regs; index++) { + for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { + for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; + b_mask++) { + if (a_mask & b_mask) { + ra_add_reg_conflict(regs, + get_reg_id(index, a_mask), + get_reg_id(index, b_mask)); + } + } + } + } } -void rc_pair_regalloc(struct radeon_compiler *cc, void *user) +static void do_advanced_regalloc(struct regalloc_state * s) { - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; - struct regalloc_state s; + struct rc_class rc_class_list [] = { + {RC_REG_CLASS_SINGLE, 3, 0, 0, + {RC_MASK_X, + RC_MASK_Y, + RC_MASK_Z}}, + {RC_REG_CLASS_DOUBLE, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_X | RC_MASK_Z, + RC_MASK_Y | RC_MASK_Z}}, + {RC_REG_CLASS_TRIPLE, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ALPHA, 1, 0, 0, + {RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_Y | RC_MASK_W, + RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_X, 1, 0, 0, + {RC_MASK_X, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Y, 1, 0, 0, + {RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Z, 1, 0, 0, + {RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XY, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZ, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZ, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XW, 1, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ZW, 1, 0, 0, + {RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XYW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}} + }; + + unsigned int i, j, index, input_node, node_count, node_index; + unsigned int * node_classes; + unsigned int * input_classes; + struct rc_instruction * inst; + struct rc_list * var_ptr; + struct rc_list * variables; + struct ra_regs * regs; + struct ra_graph * graph; + + /* Allocate the main ra data structure */ + regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW); + + /* Get list of program variables */ + variables = rc_get_variables(s->C); + node_count = rc_list_count(variables); + node_classes = memory_pool_malloc(&s->C->Pool, + node_count * sizeof(unsigned int)); + input_classes = memory_pool_malloc(&s->C->Pool, + s->NumInputs * sizeof(unsigned int)); + + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + unsigned int class_index; + /* Compute the live intervals */ + rc_variable_compute_live_intervals(var_ptr->Item); + + class_index = variable_get_class(var_ptr->Item, rc_class_list); + + /* If we haven't used this register class yet, mark it + * as used and allocate space for it. */ + if (!rc_class_list[class_index].Used) { + rc_class_list[class_index].Used = 1; + rc_class_list[class_index].Id = ra_alloc_reg_class(regs); + } - compute_live_intervals(cc, &s); + node_classes[node_index] = rc_class_list[class_index].Id; + } - c->AllocateHwInputs(c, &alloc_input, &s); - do_regalloc(&s); -} + /* Assign registers to the classes */ + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + struct rc_class class = rc_class_list[i]; + if (!class.Used) { + continue; + } -/* This functions offsets the temporary register indices by the number - * of input registers, because input registers are actually temporaries and - * should not occupy the same space. - * - * This pass is supposed to be used to maintain correct allocation of inputs - * if the standard register allocation is disabled. */ -void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user) -{ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; - struct regalloc_state s; - int temp_reg_offset; + for (index = 0; index < s->C->max_temp_regs; index++) { + for (j = 0; j < class.WritemaskCount; j++) { + int reg_id = get_reg_id(index, + class.Writemasks[j]); + ra_class_add_reg(regs, class.Id, reg_id); + } + } + } + + /* Add register conflicts */ + add_register_conflicts(regs, s->C->max_temp_regs); + + /* Calculate live intervals for input registers */ + for (inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction * endloop = + rc_match_bgnloop(inst); + if (endloop->IP > s->LoopEnd) { + s->LoopEnd = endloop->IP; + } + } + rc_for_all_reads_mask(inst, scan_read_callback, s); + } - compute_live_intervals(cc, &s); + /* Create classes for input registers */ + for (i = 0; i < s->NumInputs; i++) { + unsigned int chan, class_id, writemask = 0; + for (chan = 0; chan < 4; chan++) { + if (s->Input[i].Live[chan].Used) { + writemask |= (1 << chan); + } + } + s->Input[i].Writemask = writemask; + if (!writemask) { + continue; + } + + class_id = ra_alloc_reg_class(regs); + input_classes[i] = class_id; + ra_class_add_reg(regs, class_id, + get_reg_id(s->Input[i].Index, writemask)); + } - c->AllocateHwInputs(c, &alloc_input, &s); + ra_set_finalize(regs); - temp_reg_offset = 0; - for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) { - if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index) - temp_reg_offset = s.Input[i].Index + 1; + graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs); + + /* Build the interference graph */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next,node_index++) { + struct rc_list * a, * b; + unsigned int b_index; + + ra_set_node_class(graph, node_index, node_classes[node_index]); + + for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; + b; b = b->Next, b_index++) { + struct rc_variable * var_a = a->Item; + while (var_a) { + struct rc_variable * var_b = b->Item; + while (var_b) { + if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { + ra_add_node_interference(graph, + node_index, b_index); + } + var_b = var_b->Friend; + } + var_a = var_a->Friend; + } + } } - if (temp_reg_offset) { - for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) { - if (s.Temporary[i].Used) { - s.Temporary[i].Allocated = 1; - s.Temporary[i].File = RC_FILE_TEMPORARY; - s.Temporary[i].Index = i + temp_reg_offset; + /* Add input registers to the interference graph */ + for (i = 0, input_node = 0; i< s->NumInputs; i++) { + if (!s->Input[i].Writemask) { + continue; + } + ra_set_node_class(graph, node_count + input_node, + input_classes[i]); + for (var_ptr = variables, node_index = 0; + var_ptr; var_ptr = var_ptr->Next, node_index++) { + struct rc_variable * var = var_ptr->Item; + if (overlap_live_intervals_array(s->Input[i].Live, + var->Live)) { + ra_add_node_interference(graph, node_index, + node_count + input_node); } } + /* Manually allocate a register for this input */ + ra_set_node_reg(graph, node_count + input_node, get_reg_id( + s->Input[i].Index, s->Input[i].Writemask)); + input_node++; + } + + if (!ra_allocate_no_spills(graph)) { + rc_error(s->C, "Ran out of hardware temporaries\n"); + return; + } + + /* Rewrite the registers */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + int reg = ra_get_node_reg(graph, node_index); + unsigned int writemask = reg_get_writemask(reg); + unsigned int index = reg_get_index(reg); + struct rc_variable * var = var_ptr->Item; + + if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = rc_variable_writemask_sum(var); + } - /* Rewrite all registers. */ - for (struct rc_instruction *inst = cc->Program.Instructions.Next; - inst != &cc->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, &remap_register, &s); + if (var->Dst.File == RC_FILE_INPUT) { + continue; } + rc_variable_change_dst(var, index, writemask); + } + + ralloc_free(graph); + ralloc_free(regs); +} + +/** + * @param user This parameter should be a pointer to an integer value. If this + * integer value is zero, then a simple register allocator will be used that + * only allocates space for input registers (\sa do_regalloc_inputs_only). If + * user is non-zero, then the regular register allocator will be used + * (\sa do_regalloc). + */ +void rc_pair_regalloc(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = + (struct r300_fragment_program_compiler*)cc; + struct regalloc_state s; + int * do_full_regalloc = (int*)user; + + memset(&s, 0, sizeof(s)); + s.C = cc; + s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; + s.Input = memory_pool_malloc(&cc->Pool, + s.NumInputs * sizeof(struct register_info)); + memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); + + s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; + s.Temporary = memory_pool_malloc(&cc->Pool, + s.NumTemporaries * sizeof(struct register_info)); + memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); + + rc_recompute_ips(s.C); + + c->AllocateHwInputs(c, &alloc_input_simple, &s); + if (*do_full_regalloc) { + do_advanced_regalloc(&s); + } else { + s.Simple = 1; + do_regalloc_inputs_only(&s); + } + + /* Rewrite inputs and if we are doing the simple allocation, rewrite + * temporaries too. */ + for (struct rc_instruction *inst = s.C->Program.Instructions.Next; + inst != &s.C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_register, &s); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 8e10813ff06..25cd52c9cd4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -709,7 +709,7 @@ static int convert_rgb_to_alpha( pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; pair_inst->Alpha.DestIndex = new_index; - pair_inst->Alpha.WriteMask = 1; + pair_inst->Alpha.WriteMask = RC_MASK_W; pair_inst->Alpha.Target = pair_inst->RGB.Target; pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; @@ -739,7 +739,7 @@ static int convert_rgb_to_alpha( for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; - rgb_to_alpha_remap(reader.Inst, reader.U.Arg, + rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg, RC_FILE_TEMPORARY, old_swz, new_index); } return 1; @@ -952,6 +952,7 @@ static void schedule_block(struct r300_fragment_program_compiler * c, instruction_ready(&s, s.Current); /* Get global readers for possible RGB->Alpha conversion. */ + s.Current->GlobalReaders.ExitOnAbort = 1; rc_get_readers(s.C, inst, &s.Current->GlobalReaders, is_rgb_to_alpha_possible_normal, is_rgb_to_alpha_possible, NULL); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index a07f6b63c6e..b899eccbf53 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -108,6 +108,9 @@ struct rc_sub_instruction { /** True if tex instruction should do shadow comparison */ unsigned int TexShadow:1; + + /**R500 Only. How to swizzle the result of a TEX lookup*/ + unsigned int TexSwizzle:12; /*@}*/ /** This holds information about the presubtract operation used by diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 45f79ece5ba..24577333450 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -129,6 +129,7 @@ typedef enum { #define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) #define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) #define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF) +#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED) /** * \name Bitmasks for components of vectors. diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index 68874795b8a..52315957520 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -223,3 +223,17 @@ struct rc_pair_instruction_source * rc_pair_get_src( return NULL; } } + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src) +{ + int i; + for (i = 0; i < 3; i++) { + if (&pair_inst->RGB.Src[i] == src + || &pair_inst->Alpha.Src[i] == src) { + return i; + } + } + return -1; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index d1a435fc530..a957ea9f7a0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -114,6 +114,10 @@ void rc_pair_foreach_source_that_rgb_reads( struct rc_pair_instruction_source * rc_pair_get_src( struct rc_pair_instruction * pair_inst, struct rc_pair_instruction_arg * arg); + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src); /*@}*/ @@ -127,6 +131,7 @@ void rc_pair_translate(struct radeon_compiler *cc, void *user); void rc_pair_schedule(struct radeon_compiler *cc, void *user); void rc_pair_regalloc(struct radeon_compiler *cc, void *user); void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user); +void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user); /*@}*/ #endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index cef448ee4e1..8d16b2cf9ec 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -142,6 +142,8 @@ int radeonTransformTEX( if (inst->U.I.Opcode != RC_OPCODE_TEX && inst->U.I.Opcode != RC_OPCODE_TXB && inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_TXD && + inst->U.I.Opcode != RC_OPCODE_TXL && inst->U.I.Opcode != RC_OPCODE_KIL) return 0; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c index 5bd19c0b9c6..cafa0579734 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -71,6 +71,7 @@ void rc_rename_regs(struct radeon_compiler *c, void *user) if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) continue; + reader_data.ExitOnAbort = 1; rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); if (reader_data.Abort || reader_data.ReaderCount == 0) @@ -85,7 +86,7 @@ void rc_rename_regs(struct radeon_compiler *c, void *user) reader_data.Writer->U.I.DstReg.Index = new_index; for(i = 0; i < reader_data.ReaderCount; i++) { - reader_data.Readers[i].U.Src->Index = new_index; + reader_data.Readers[i].U.I.Src->Index = new_index; } } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c new file mode 100644 index 00000000000..16fa5d28902 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c @@ -0,0 +1,484 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_variable.h" + +#include "memory_pool.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_opcodes.h" +#include "radeon_program.h" + +/** + * Rewrite the index and writemask for the destination register of var + * and its friends to new_index and new_writemask. This function also takes + * care of rewriting the swizzles for the sources of var. + */ +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask) +{ + struct rc_variable * var_ptr; + struct rc_list * readers; + unsigned int old_mask = rc_variable_writemask_sum(var); + unsigned int conversion_swizzle = + rc_make_conversion_swizzle(old_mask, new_writemask); + + for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) { + if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { + rc_normal_rewrite_writemask(var_ptr->Inst, + conversion_swizzle); + var_ptr->Inst->U.I.DstReg.Index = new_index; + } else { + struct rc_pair_sub_instruction * sub; + if (var_ptr->Dst.WriteMask == RC_MASK_W) { + assert(new_writemask & RC_MASK_W); + sub = &var_ptr->Inst->U.P.Alpha; + } else { + sub = &var_ptr->Inst->U.P.RGB; + rc_pair_rewrite_writemask(sub, + conversion_swizzle); + } + sub->DestIndex = new_index; + } + } + + readers = rc_variable_readers_union(var); + + for ( ; readers; readers = readers->Next) { + struct rc_reader * reader = readers->Item; + if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) { + reader->U.I.Src->Index = new_index; + reader->U.I.Src->Swizzle = rc_rewrite_swizzle( + reader->U.I.Src->Swizzle, conversion_swizzle); + } else { + struct rc_pair_instruction * pair_inst = + &reader->Inst->U.P; + unsigned int src_type = rc_source_type_swz( + reader->U.P.Arg->Swizzle); + + int src_index = reader->U.P.Arg->Source; + if (src_index == RC_PAIR_PRESUB_SRC) { + src_index = rc_pair_get_src_index( + pair_inst, reader->U.P.Src); + } + /* Try to delete the old src, it is OK if this fails, + * because rc_pair_alloc_source might be able to + * find a source the ca be reused. + */ + if (rc_pair_remove_src(reader->Inst, src_type, + src_index, old_mask)) { + /* Reuse the source index of the source that + * was just deleted and set its register + * index. We can't use rc_pair_alloc_source + * for this becuase it might return a source + * index that is already being used. */ + if (src_type & RC_SOURCE_RGB) { + pair_inst->RGB.Src[src_index] + .Used = 1; + pair_inst->RGB.Src[src_index] + .Index = new_index; + pair_inst->RGB.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + if (src_type & RC_SOURCE_ALPHA) { + pair_inst->Alpha.Src[src_index] + .Used = 1; + pair_inst->Alpha.Src[src_index] + .Index = new_index; + pair_inst->Alpha.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + } else { + src_index = rc_pair_alloc_source( + &reader->Inst->U.P, + src_type & RC_SOURCE_RGB, + src_type & RC_SOURCE_ALPHA, + RC_FILE_TEMPORARY, + new_index); + if (src_index < 0) { + rc_error(var->C, "Rewrite of inst %u failed " + "Can't allocate source for " + "Inst %u src_type=%x " + "new_index=%u new_mask=%u\n", + var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask); + continue; + } + } + reader->U.P.Arg->Swizzle = rc_rewrite_swizzle( + reader->U.P.Arg->Swizzle, conversion_swizzle); + if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) { + reader->U.P.Arg->Source = src_index; + } + } + } +} + +/** + * Compute the live intervals for var and its friends. + */ +void rc_variable_compute_live_intervals(struct rc_variable * var) +{ + while(var) { + unsigned int i; + unsigned int start = var->Inst->IP; + + for (i = 0; i < var->ReaderCount; i++) { + unsigned int chan; + unsigned int chan_start = start; + unsigned int chan_end = var->Readers[i].Inst->IP; + unsigned int mask = var->Readers[i].WriteMask; + struct rc_instruction * inst; + + /* Extend the live interval of T0 to the start of the + * loop for sequences like: + * BGNLOOP + * read T0 + * ... + * write T0 + * ENDLOOP + */ + if (var->Readers[i].Inst->IP < start) { + struct rc_instruction * bgnloop = + rc_match_endloop(var->Readers[i].Inst); + chan_start = bgnloop->IP; + } + + /* Extend the live interval of T0 to the start of the + * loop in case there is a BRK instruction in the loop + * (we don't actually check for a BRK instruction we + * assume there is one somewhere in the loop, which + * there usually is) for sequences like: + * BGNLOOP + * ... + * conditional BRK + * ... + * write T0 + * ENDLOOP + * read T0 + *************************************************** + * Extend the live interval of T0 to the end of the + * loop for sequences like: + * write T0 + * BGNLOOP + * ... + * read T0 + * ENDLOOP + */ + for (inst = var->Inst; inst != var->Readers[i].Inst; + inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + struct rc_instruction * bgnloop = + rc_match_endloop(inst); + if (bgnloop->IP < chan_start) { + chan_start = bgnloop->IP; + } + } else if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction * endloop = + rc_match_bgnloop(inst); + if (endloop->IP > chan_end) { + chan_end = endloop->IP; + } + } + } + + for (chan = 0; chan < 4; chan++) { + if ((mask >> chan) & 0x1) { + if (!var->Live[chan].Used + || chan_start < var->Live[chan].Start) { + var->Live[chan].Start = + chan_start; + } + if (!var->Live[chan].Used + || chan_end > var->Live[chan].End) { + var->Live[chan].End = chan_end; + } + var->Live[chan].Used = 1; + } + } + } + var = var->Friend; + } +} + +/** + * @return 1 if a and b share a reader + * @return 0 if they do not + */ +static unsigned int readers_intersect( + struct rc_variable * a, + struct rc_variable * b) +{ + unsigned int a_index, b_index; + for (a_index = 0; a_index < a->ReaderCount; a_index++) { + struct rc_reader reader_a = a->Readers[a_index]; + for (b_index = 0; b_index < b->ReaderCount; b_index++) { + struct rc_reader reader_b = b->Readers[b_index]; + if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_a.U.I.Src == reader_b.U.I.Src) { + + return 1; + } + if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR + && reader_b.Inst->Type == RC_INSTRUCTION_PAIR + && reader_a.U.P.Src == reader_b.U.P.Src) { + + return 1; + } + } + } + return 0; +} + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend) +{ + assert(var->Dst.Index == friend->Dst.Index); + while(var->Friend) { + var = var->Friend; + } + var->Friend = friend; +} + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data) +{ + struct rc_variable * new = + memory_pool_malloc(&c->Pool, sizeof(struct rc_variable)); + memset(new, 0, sizeof(struct rc_variable)); + new->C = c; + new->Dst.File = DstFile; + new->Dst.Index = DstIndex; + new->Dst.WriteMask = DstWriteMask; + if (reader_data) { + new->Inst = reader_data->Writer; + new->ReaderCount = reader_data->ReaderCount; + new->Readers = reader_data->Readers; + } + return new; +} + +static void get_variable_helper( + struct rc_list ** aborted_list, + struct rc_list ** variable_list, + unsigned int aborted, + struct rc_variable * variable) +{ + if (aborted) { + rc_list_add(aborted_list, rc_list(&variable->C->Pool, variable)); + } else { + rc_list_add(variable_list, rc_list(&variable->C->Pool, variable)); + } +} + +static void get_variable_pair_helper( + struct rc_list ** aborted_list, + struct rc_list ** variable_list, + struct radeon_compiler * c, + struct rc_instruction * inst, + struct rc_pair_sub_instruction * sub_inst) +{ + struct rc_reader_data reader_data; + struct rc_variable * new_var; + rc_register_file file; + unsigned int writemask; + + if (sub_inst->Opcode == RC_OPCODE_NOP) { + return; + } + memset(&reader_data, 0, sizeof(struct rc_reader_data)); + rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.ReaderCount == 0) { + return; + } + + if (sub_inst->WriteMask) { + file = RC_FILE_TEMPORARY; + writemask = sub_inst->WriteMask; + } else if (sub_inst->OutputWriteMask) { + file = RC_FILE_OUTPUT; + writemask = sub_inst->OutputWriteMask; + } else { + writemask = 0; + file = RC_FILE_NONE; + } + new_var = rc_variable(c, file, sub_inst->DestIndex, writemask, + &reader_data); + get_variable_helper(aborted_list, variable_list, reader_data.Abort, + new_var); +} + +/** + * Generate a list of variables used by the shader program. Each instruction + * that writes to a register is considered a variable. The struct rc_variable + * data structure includes a list of readers and is essentially a + * definition-use chain. Any two variables that share a reader are considered + * "friends" and they are linked together via the Friend attribute. + */ +struct rc_list * rc_get_variables(struct radeon_compiler * c) +{ + struct rc_instruction * inst; + struct rc_list * aborted_list = NULL; + struct rc_list * variable_list = NULL; + struct rc_list * var_ptr; + struct rc_list * search_ptr; + + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + struct rc_reader_data reader_data; + struct rc_variable * new_var; + memset(&reader_data, 0, sizeof(reader_data)); + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + if (reader_data.ReaderCount == 0) { + continue; + } + new_var = rc_variable(c, inst->U.I.DstReg.File, + inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask, &reader_data); + get_variable_helper(&aborted_list, &variable_list, + reader_data.Abort, new_var); + } else { + get_variable_pair_helper(&aborted_list, &variable_list, + c, inst, &inst->U.P.RGB); + get_variable_pair_helper(&aborted_list, &variable_list, + c, inst, &inst->U.P.Alpha); + } + } + + /* The aborted_list contains a list of variables that might share a + * reader with another variable. We need to search through this list + * and pair together variables that do share the same reader. + */ + while (aborted_list) { + struct rc_list * search_ptr_next; + var_ptr = aborted_list; + + search_ptr = var_ptr->Next; + while(search_ptr) { + search_ptr_next = search_ptr->Next; + if (readers_intersect(var_ptr->Item, search_ptr->Item)){ + rc_list_remove(&aborted_list, search_ptr); + rc_variable_add_friend(var_ptr->Item, + search_ptr->Item); + } + search_ptr = search_ptr_next; + } + rc_list_remove(&aborted_list, var_ptr); + rc_list_add(&variable_list, rc_list( + &((struct rc_variable*)(var_ptr->Item))->C->Pool, + var_ptr->Item)); + } + return variable_list; +} + +/** + * @return The bitwise or of the writemasks of a variable and all of its + * friends. + */ +unsigned int rc_variable_writemask_sum(struct rc_variable * var) +{ + unsigned int writemask = 0; + while(var) { + writemask |= var->Dst.WriteMask; + var = var->Friend; + } + return writemask; +} + +/* + * @return A list of readers for a variable and its friends. Readers + * that read from two different variable friends are only included once in + * this list. + */ +struct rc_list * rc_variable_readers_union(struct rc_variable * var) +{ + struct rc_list * list = NULL; + while (var) { + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + struct rc_list * temp; + struct rc_reader * a = &var->Readers[i]; + unsigned int match = 0; + for (temp = list; temp; temp = temp->Next) { + struct rc_reader * b = temp->Item; + if (a->Inst->Type != b->Inst->Type) { + continue; + } + if (a->Inst->Type == RC_INSTRUCTION_NORMAL) { + if (a->U.I.Src == b->U.I.Src) { + match = 1; + break; + } + } + if (a->Inst->Type == RC_INSTRUCTION_PAIR) { + if (a->U.P.Arg == b->U.P.Arg + && a->U.P.Src == b->U.P.Src) { + match = 1; + break; + } + } + } + if (match) { + continue; + } + rc_list_add(&list, rc_list(&var->C->Pool, a)); + } + var = var->Friend; + } + return list; +} + +void rc_variable_print(struct rc_variable * var) +{ + unsigned int i; + while (var) { + fprintf(stderr, "%u: TEMP[%u].%u: ", + var->Inst->IP, var->Dst.Index, var->Dst.WriteMask); + for (i = 0; i < 4; i++) { + fprintf(stderr, "chan %u: start=%u end=%u ", i, + var->Live[i].Start, var->Live[i].End); + } + fprintf(stderr, "%u readers\n", var->ReaderCount); + if (var->Friend) { + fprintf(stderr, "Friend: \n\t"); + } + var = var->Friend; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h new file mode 100644 index 00000000000..b8fbcaa4029 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h @@ -0,0 +1,84 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_VARIABLE_H +#define RADEON_VARIABLE_H + +#include "radeon_compiler.h" + +struct radeon_compiler; +struct rc_list; +struct rc_reader_data; +struct rc_readers; + +struct live_intervals { + int Start; + int End; + int Used; +}; + +struct rc_variable { + struct radeon_compiler * C; + struct rc_dst_register Dst; + + struct rc_instruction * Inst; + unsigned int ReaderCount; + struct rc_reader * Readers; + struct live_intervals Live[4]; + + /* A friend is a variable that shares a reader with another variable. + */ + struct rc_variable * Friend; +}; + +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask); + +void rc_variable_compute_live_intervals(struct rc_variable * var); + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend); + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data); + +struct rc_list * rc_get_variables(struct radeon_compiler * c); + +unsigned int rc_variable_writemask_sum(struct rc_variable * var); + +struct rc_list * rc_variable_readers_union(struct rc_variable * var); + +void rc_variable_print(struct rc_variable * var); + +#endif /* RADEON_VARIABLE_H */ diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 88b68e3d191..9145023826e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -406,6 +406,7 @@ #define PCI_CHIP_CEDAR_68E8 0x68E8 #define PCI_CHIP_CEDAR_68E9 0x68E9 #define PCI_CHIP_CEDAR_68F1 0x68F1 +#define PCI_CHIP_CEDAR_68F2 0x68F2 #define PCI_CHIP_CEDAR_68F8 0x68F8 #define PCI_CHIP_CEDAR_68F9 0x68F9 #define PCI_CHIP_CEDAR_68FE 0x68FE @@ -426,7 +427,9 @@ #define PCI_CHIP_JUNIPER_68B0 0x68B0 #define PCI_CHIP_JUNIPER_68B8 0x68B8 #define PCI_CHIP_JUNIPER_68B9 0x68B9 +#define PCI_CHIP_JUNIPER_68BA 0x68BA #define PCI_CHIP_JUNIPER_68BE 0x68BE +#define PCI_CHIP_JUNIPER_68BF 0x68BF #define PCI_CHIP_CYPRESS_6880 0x6880 #define PCI_CHIP_CYPRESS_6888 0x6888 @@ -434,6 +437,7 @@ #define PCI_CHIP_CYPRESS_688A 0x688A #define PCI_CHIP_CYPRESS_6898 0x6898 #define PCI_CHIP_CYPRESS_6899 0x6899 +#define PCI_CHIP_CYPRESS_689B 0x689B #define PCI_CHIP_CYPRESS_689E 0x689E #define PCI_CHIP_HEMLOCK_689C 0x689C @@ -458,6 +462,7 @@ #define PCI_CHIP_BARTS_6729 0x6729 #define PCI_CHIP_BARTS_6738 0x6738 #define PCI_CHIP_BARTS_6739 0x6739 +#define PCI_CHIP_BARTS_673E 0x673E #define PCI_CHIP_TURKS_6740 0x6740 #define PCI_CHIP_TURKS_6741 0x6741 diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 732efe8bd85..6449229e088 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1106,6 +1106,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_CEDAR_68E8: case PCI_CHIP_CEDAR_68E9: case PCI_CHIP_CEDAR_68F1: + case PCI_CHIP_CEDAR_68F2: case PCI_CHIP_CEDAR_68F8: case PCI_CHIP_CEDAR_68F9: case PCI_CHIP_CEDAR_68FE: @@ -1132,7 +1133,9 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_JUNIPER_68B0: case PCI_CHIP_JUNIPER_68B8: case PCI_CHIP_JUNIPER_68B9: + case PCI_CHIP_JUNIPER_68BA: case PCI_CHIP_JUNIPER_68BE: + case PCI_CHIP_JUNIPER_68BF: screen->chip_family = CHIP_FAMILY_JUNIPER; screen->chip_flags = RADEON_CHIPSET_TCL; break; @@ -1143,6 +1146,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_CYPRESS_688A: case PCI_CHIP_CYPRESS_6898: case PCI_CHIP_CYPRESS_6899: + case PCI_CHIP_CYPRESS_689B: case PCI_CHIP_CYPRESS_689E: screen->chip_family = CHIP_FAMILY_CYPRESS; screen->chip_flags = RADEON_CHIPSET_TCL; @@ -1176,6 +1180,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_BARTS_6729: case PCI_CHIP_BARTS_6738: case PCI_CHIP_BARTS_6739: + case PCI_CHIP_BARTS_673E: screen->chip_family = CHIP_FAMILY_BARTS; screen->chip_flags = RADEON_CHIPSET_TCL; break; diff --git a/src/mesa/drivers/windows/fx/fx.rc b/src/mesa/drivers/windows/fx/fx.rc deleted file mode 100644 index f920b8768dd..00000000000 --- a/src/mesa/drivers/windows/fx/fx.rc +++ /dev/null @@ -1,39 +0,0 @@ -#include <windows.h> - -#define PRODNAME "Mesa 6.x" -#define CONTACTSTR "http://www.mesa3d.org" -#define HWSTR "3dfx Voodoo Graphics, Voodoo Rush, Voodoo^2, Voodoo Banshee, Velocity 100/200, Voodoo3, Voodoo4, Voodoo5" -#define COPYRIGHTSTR "Copyright \251 Brian E. Paul" - -#define VERSIONSTR "6.3.0.1" -#define MANVERSION 6 -#define MANREVISION 3 -#define BUILD_NUMBER 1 - -VS_VERSION_INFO VERSIONINFO - FILEVERSION MANVERSION, MANREVISION, 0, BUILD_NUMBER - PRODUCTVERSION MANVERSION, MANREVISION, 0, BUILD_NUMBER - FILEFLAGSMASK 0x0030003FL - - FILEOS VOS_DOS_WINDOWS32 - FILETYPE VFT_DRV - FILESUBTYPE VFT2_DRV_INSTALLABLE -BEGIN - BLOCK "StringFileInfo" - BEGIN - BLOCK "040904E4" - BEGIN - VALUE "FileDescription", PRODNAME - VALUE "FileVersion", VERSIONSTR - VALUE "LegalCopyright", COPYRIGHTSTR - VALUE "ProductName", PRODNAME - VALUE "Graphics Subsystem", HWSTR - VALUE "Contact", CONTACTSTR - END - END - BLOCK "VarFileInfo" - BEGIN - /* the following line should be extended for localized versions */ - VALUE "Translation", 0x409, 1252 - END -END diff --git a/src/mesa/drivers/windows/fx/fxopengl.def b/src/mesa/drivers/windows/fx/fxopengl.def deleted file mode 100644 index bc615e93ae6..00000000000 --- a/src/mesa/drivers/windows/fx/fxopengl.def +++ /dev/null @@ -1,953 +0,0 @@ -LIBRARY OpenGL32 -DESCRIPTION "Mesa 5.1" -EXPORTS - glAccum - glActiveStencilFaceEXT - glActiveTexture - glActiveTextureARB - glAlphaFunc - glAreProgramsResidentNV - glAreTexturesResident - glAreTexturesResidentEXT - glArrayElement - glArrayElementEXT - glBegin - glBeginQueryARB - glBindBufferARB - glBindProgramARB - glBindProgramNV - glBindTexture - glBindTextureEXT - glBitmap -;glBlendColor -;glBlendColorEXT - glBlendEquation - glBlendEquationEXT - glBlendFunc - glBlendFuncSeparate - glBlendFuncSeparateEXT - glBlendFuncSeparateINGR - glBufferDataARB - glBufferSubDataARB - glCallList - glCallLists - glClear - glClearAccum - glClearColor - glClearDepth - glClearIndex - glClearStencil - glClientActiveTexture - glClientActiveTextureARB - glClipPlane - glColorMask - glColorMaterial - glColorPointer - glColorPointerEXT - glColorSubTable - glColorSubTableEXT - glColorTable - glColorTableEXT - glColorTableParameterfv - glColorTableParameterfvSGI - glColorTableParameteriv - glColorTableParameterivSGI - glColorTableSGI - glColor3b - glColor3bv - glColor3d - glColor3dv - glColor3f - glColor3fv - glColor3i - glColor3iv - glColor3s - glColor3sv - glColor3ub - glColor3ubv - glColor3ui - glColor3uiv - glColor3us - glColor3usv - glColor4b - glColor4bv - glColor4d - glColor4dv - glColor4f - glColor4fv - glColor4i - glColor4iv - glColor4s - glColor4sv - glColor4ub - glColor4ubv - glColor4ui - glColor4uiv - glColor4us - glColor4usv - glCombinerInputNV - glCombinerOutputNV - glCombinerParameterfNV - glCombinerParameterfvNV - glCombinerParameteriNV - glCombinerParameterivNV - glCompressedTexImage1D - glCompressedTexImage1DARB - glCompressedTexImage2D - glCompressedTexImage2DARB - glCompressedTexImage3D - glCompressedTexImage3DARB - glCompressedTexSubImage1D - glCompressedTexSubImage1DARB - glCompressedTexSubImage2D - glCompressedTexSubImage2DARB - glCompressedTexSubImage3D - glCompressedTexSubImage3DARB - glConvolutionFilter1D - glConvolutionFilter1DEXT - glConvolutionFilter2D - glConvolutionFilter2DEXT - glConvolutionParameterf - glConvolutionParameterfEXT - glConvolutionParameterfv - glConvolutionParameterfvEXT - glConvolutionParameteri - glConvolutionParameteriEXT - glConvolutionParameteriv - glConvolutionParameterivEXT - glCopyColorSubTable - glCopyColorSubTableEXT - glCopyColorTable - glCopyColorTableSGI - glCopyConvolutionFilter1D - glCopyConvolutionFilter1DEXT - glCopyConvolutionFilter2D - glCopyConvolutionFilter2DEXT - glCopyPixels - glCopyTexImage1D - glCopyTexImage1DEXT - glCopyTexImage2D - glCopyTexImage2DEXT - glCopyTexSubImage1D - glCopyTexSubImage1DEXT - glCopyTexSubImage2D - glCopyTexSubImage2DEXT - glCopyTexSubImage3D - glCopyTexSubImage3DEXT - glCullFace - glDeleteBuffersARB - glDeleteFencesNV - glDeleteLists - glDeleteProgramsARB - glDeleteProgramsNV - glDeleteQueriesARB - glDeleteTextures - glDeleteTexturesEXT - glDepthBoundsEXT - glDepthFunc - glDepthMask - glDepthRange - glDetailTexFuncSGIS - glDisable - glDisableClientState - glDisableVertexAttribArrayARB - glDrawArrays - glDrawArraysEXT - glDrawBuffer - glDrawElements - glDrawPixels - glDrawRangeElements - glDrawRangeElementsEXT - glEdgeFlag - glEdgeFlagPointer - glEdgeFlagPointerEXT - glEdgeFlagv - glEnable - glEnableClientState - glEnableVertexAttribArrayARB - glEnd - glEndList - glEndQueryARB - glEvalCoord1d - glEvalCoord1dv - glEvalCoord1f - glEvalCoord1fv - glEvalCoord2d - glEvalCoord2dv - glEvalCoord2f - glEvalCoord2fv - glEvalMesh1 - glEvalMesh2 - glEvalPoint1 - glEvalPoint2 - glExecuteProgramNV - glFeedbackBuffer - glFinalCombinerInputNV - glFinish - glFinishFenceNV - glFlush - glFlushRasterSGIX - glFlushVertexArrayRangeNV - glFogCoordd - glFogCoorddEXT - glFogCoorddv - glFogCoorddvEXT - glFogCoordf - glFogCoordfEXT - glFogCoordfv - glFogCoordfvEXT - glFogCoordPointer - glFogCoordPointerEXT - glFogf - glFogfv - glFogi - glFogiv - glFragmentColorMaterialSGIX - glFragmentLightfSGIX - glFragmentLightfvSGIX - glFragmentLightiSGIX - glFragmentLightivSGIX - glFragmentLightModelfSGIX - glFragmentLightModelfvSGIX - glFragmentLightModeliSGIX - glFragmentLightModelivSGIX - glFragmentMaterialfSGIX - glFragmentMaterialfvSGIX - glFragmentMaterialiSGIX - glFragmentMaterialivSGIX - glFrameZoomSGIX - glFrontFace - glFrustum - glGenBuffersARB - glGenFencesNV - glGenLists - glGenProgramsARB - glGenProgramsNV - glGenQueriesARB - glGenTextures - glGenTexturesEXT - glGetBooleanv - glGetBufferParameterivARB - glGetBufferPointervARB - glGetBufferSubDataARB - glGetClipPlane - glGetColorTable - glGetColorTableEXT - glGetColorTableParameterfv - glGetColorTableParameterfvEXT - glGetColorTableParameterfvSGI - glGetColorTableParameteriv - glGetColorTableParameterivEXT - glGetColorTableParameterivSGI - glGetColorTableSGI - glGetCombinerInputParameterfvNV - glGetCombinerInputParameterivNV - glGetCombinerOutputParameterfvNV - glGetCombinerOutputParameterivNV - glGetCompressedTexImage - glGetCompressedTexImageARB - glGetConvolutionFilter - glGetConvolutionFilterEXT - glGetConvolutionParameterfv - glGetConvolutionParameterfvEXT - glGetConvolutionParameteriv - glGetConvolutionParameterivEXT - glGetDetailTexFuncSGIS - glGetDoublev - glGetError - glGetFenceivNV - glGetFinalCombinerInputParameterfvNV - glGetFinalCombinerInputParameterivNV - glGetFloatv - glGetFragmentLightfvSGIX - glGetFragmentLightivSGIX - glGetFragmentMaterialfvSGIX - glGetFragmentMaterialivSGIX - glGetHistogram - glGetHistogramEXT - glGetHistogramParameterfv - glGetHistogramParameterfvEXT - glGetHistogramParameteriv - glGetHistogramParameterivEXT - glGetInstrumentsSGIX - glGetIntegerv - glGetLightfv - glGetLightiv - glGetListParameterfvSGIX - glGetListParameterivSGIX - glGetMapdv - glGetMapfv - glGetMapiv - glGetMaterialfv - glGetMaterialiv - glGetMinmax - glGetMinmaxEXT - glGetMinmaxParameterfv - glGetMinmaxParameterfvEXT - glGetMinmaxParameteriv - glGetMinmaxParameterivEXT - glGetPixelMapfv - glGetPixelMapuiv - glGetPixelMapusv - glGetPixelTexGenParameterfvSGIS - glGetPixelTexGenParameterivSGIS - glGetPointerv - glGetPointervEXT - glGetPolygonStipple - glGetProgramEnvParameterdvARB - glGetProgramEnvParameterfvARB - glGetProgramivARB - glGetProgramivNV - glGetProgramLocalParameterdvARB - glGetProgramLocalParameterfvARB - glGetProgramNamedParameterdvNV - glGetProgramNamedParameterfvNV - glGetProgramParameterdvNV - glGetProgramParameterfvNV - glGetProgramStringARB - glGetProgramStringNV - glGetQueryivARB - glGetQueryObjectivARB - glGetQueryObjectuivARB - glGetSeparableFilter - glGetSeparableFilterEXT - glGetSharpenTexFuncSGIS - glGetString - glGetTexEnvfv - glGetTexEnviv - glGetTexFilterFuncSGIS - glGetTexGendv - glGetTexGenfv - glGetTexGeniv - glGetTexImage - glGetTexLevelParameterfv - glGetTexLevelParameteriv - glGetTexParameterfv - glGetTexParameteriv - glGetTrackMatrixivNV - glGetVertexAttribdvARB - glGetVertexAttribdvNV - glGetVertexAttribfvARB - glGetVertexAttribfvNV - glGetVertexAttribivARB - glGetVertexAttribivNV - glGetVertexAttribPointervARB - glGetVertexAttribPointervNV - glHint - glHintPGI - glHistogram - glHistogramEXT - glIndexd - glIndexdv - glIndexf - glIndexFuncEXT - glIndexfv - glIndexi - glIndexiv - glIndexMask - glIndexMaterialEXT - glIndexPointer - glIndexPointerEXT - glIndexs - glIndexsv - glIndexub - glIndexubv - glInitNames - glInstrumentsBufferSGIX - glInterleavedArrays - glIsBufferARB - glIsEnabled - glIsFenceNV - glIsList - glIsProgramARB - glIsProgramNV - glIsQueryARB - glIsTexture - glIsTextureEXT - glLightEnviSGIX - glLightf - glLightfv - glLighti - glLightiv - glLightModelf - glLightModelfv - glLightModeli - glLightModeliv - glLineStipple - glLineWidth - glListBase - glListParameterfSGIX - glListParameterfvSGIX - glListParameteriSGIX - glListParameterivSGIX - glLoadIdentity - glLoadMatrixd - glLoadMatrixf - glLoadName - glLoadProgramNV - glLoadTransposeMatrixd - glLoadTransposeMatrixdARB - glLoadTransposeMatrixf - glLoadTransposeMatrixfARB - glLockArraysEXT - glLogicOp - glMapBufferARB - glMapGrid1d - glMapGrid1f - glMapGrid2d - glMapGrid2f - glMap1d - glMap1f - glMap2d - glMap2f - glMaterialf - glMaterialfv - glMateriali - glMaterialiv - glMatrixMode - glMinmax - glMinmaxEXT - glMultiDrawArrays - glMultiDrawArraysEXT - glMultiDrawElements - glMultiDrawElementsEXT - glMultiModeDrawArraysIBM - glMultiModeDrawElementsIBM - glMultiTexCoord1d - glMultiTexCoord1dARB - glMultiTexCoord1dv - glMultiTexCoord1dvARB - glMultiTexCoord1f - glMultiTexCoord1fARB - glMultiTexCoord1fv - glMultiTexCoord1fvARB - glMultiTexCoord1i - glMultiTexCoord1iARB - glMultiTexCoord1iv - glMultiTexCoord1ivARB - glMultiTexCoord1s - glMultiTexCoord1sARB - glMultiTexCoord1sv - glMultiTexCoord1svARB - glMultiTexCoord2d - glMultiTexCoord2dARB - glMultiTexCoord2dv - glMultiTexCoord2dvARB - glMultiTexCoord2f - glMultiTexCoord2fARB - glMultiTexCoord2fv - glMultiTexCoord2fvARB - glMultiTexCoord2i - glMultiTexCoord2iARB - glMultiTexCoord2iv - glMultiTexCoord2ivARB - glMultiTexCoord2s - glMultiTexCoord2sARB - glMultiTexCoord2sv - glMultiTexCoord2svARB - glMultiTexCoord3d - glMultiTexCoord3dARB - glMultiTexCoord3dv - glMultiTexCoord3dvARB - glMultiTexCoord3f - glMultiTexCoord3fARB - glMultiTexCoord3fv - glMultiTexCoord3fvARB - glMultiTexCoord3i - glMultiTexCoord3iARB - glMultiTexCoord3iv - glMultiTexCoord3ivARB - glMultiTexCoord3s - glMultiTexCoord3sARB - glMultiTexCoord3sv - glMultiTexCoord3svARB - glMultiTexCoord4d - glMultiTexCoord4dARB - glMultiTexCoord4dv - glMultiTexCoord4dvARB - glMultiTexCoord4f - glMultiTexCoord4fARB - glMultiTexCoord4fv - glMultiTexCoord4fvARB - glMultiTexCoord4i - glMultiTexCoord4iARB - glMultiTexCoord4iv - glMultiTexCoord4ivARB - glMultiTexCoord4s - glMultiTexCoord4sARB - glMultiTexCoord4sv - glMultiTexCoord4svARB - glMultMatrixd - glMultMatrixf - glMultTransposeMatrixd - glMultTransposeMatrixdARB - glMultTransposeMatrixf - glMultTransposeMatrixfARB - glNewList - glNormalPointer - glNormalPointerEXT - glNormal3b - glNormal3bv - glNormal3d - glNormal3dv - glNormal3f - glNormal3fv - glNormal3i - glNormal3iv - glNormal3s - glNormal3sv - glOrtho - glPassThrough - glPixelMapfv - glPixelMapuiv - glPixelMapusv - glPixelStoref - glPixelStorei - glPixelTexGenParameterfSGIS - glPixelTexGenParameterfvSGIS - glPixelTexGenParameteriSGIS - glPixelTexGenParameterivSGIS - glPixelTexGenSGIX - glPixelTransferf - glPixelTransferi - glPixelZoom - glPointParameterf - glPointParameterfARB - glPointParameterfEXT - glPointParameterfSGIS - glPointParameterfv - glPointParameterfvARB - glPointParameterfvEXT - glPointParameterfvSGIS - glPointParameteri - glPointParameteriNV - glPointParameteriv - glPointParameterivNV - glPointSize - glPollInstrumentsSGIX - glPolygonMode - glPolygonOffset - glPolygonOffsetEXT - glPolygonStipple - glPopAttrib - glPopClientAttrib - glPopMatrix - glPopName - glPrioritizeTextures - glPrioritizeTexturesEXT - glProgramEnvParameter4dARB - glProgramEnvParameter4dvARB - glProgramEnvParameter4fARB - glProgramEnvParameter4fvARB - glProgramLocalParameter4dARB - glProgramLocalParameter4dvARB - glProgramLocalParameter4fARB - glProgramLocalParameter4fvARB - glProgramNamedParameter4dNV - glProgramNamedParameter4dvNV - glProgramNamedParameter4fNV - glProgramNamedParameter4fvNV - glProgramParameters4dvNV - glProgramParameters4fvNV - glProgramParameter4dNV - glProgramParameter4dvNV - glProgramParameter4fNV - glProgramParameter4fvNV - glProgramStringARB - glPushAttrib - glPushClientAttrib - glPushMatrix - glPushName - glRasterPos2d - glRasterPos2dv - glRasterPos2f - glRasterPos2fv - glRasterPos2i - glRasterPos2iv - glRasterPos2s - glRasterPos2sv - glRasterPos3d - glRasterPos3dv - glRasterPos3f - glRasterPos3fv - glRasterPos3i - glRasterPos3iv - glRasterPos3s - glRasterPos3sv - glRasterPos4d - glRasterPos4dv - glRasterPos4f - glRasterPos4fv - glRasterPos4i - glRasterPos4iv - glRasterPos4s - glRasterPos4sv - glReadBuffer - glReadInstrumentsSGIX - glReadPixels - glRectd - glRectdv - glRectf - glRectfv - glRecti - glRectiv - glRects - glRectsv - glReferencePlaneSGIX - glRenderMode - glRequestResidentProgramsNV - glResetHistogram - glResetHistogramEXT - glResetMinmax - glResetMinmaxEXT - glResizeBuffersMESA - glRotated - glRotatef - glSampleCoverage - glSampleCoverageARB - glSampleMaskEXT - glSampleMaskSGIS - glSamplePatternEXT - glSamplePatternSGIS - glScaled - glScalef - glScissor - glSecondaryColorPointer - glSecondaryColorPointerEXT - glSecondaryColor3b - glSecondaryColor3bEXT - glSecondaryColor3bv - glSecondaryColor3bvEXT - glSecondaryColor3d - glSecondaryColor3dEXT - glSecondaryColor3dv - glSecondaryColor3dvEXT - glSecondaryColor3f - glSecondaryColor3fEXT - glSecondaryColor3fv - glSecondaryColor3fvEXT - glSecondaryColor3i - glSecondaryColor3iEXT - glSecondaryColor3iv - glSecondaryColor3ivEXT - glSecondaryColor3s - glSecondaryColor3sEXT - glSecondaryColor3sv - glSecondaryColor3svEXT - glSecondaryColor3ub - glSecondaryColor3ubEXT - glSecondaryColor3ubv - glSecondaryColor3ubvEXT - glSecondaryColor3ui - glSecondaryColor3uiEXT - glSecondaryColor3uiv - glSecondaryColor3uivEXT - glSecondaryColor3us - glSecondaryColor3usEXT - glSecondaryColor3usv - glSecondaryColor3usvEXT - glSelectBuffer - glSeparableFilter2D - glSeparableFilter2DEXT - glSetFenceNV - glShadeModel - glSharpenTexFuncSGIS - glSpriteParameterfSGIX - glSpriteParameterfvSGIX - glSpriteParameteriSGIX - glSpriteParameterivSGIX - glStartInstrumentsSGIX - glStencilFunc - glStencilMask - glStencilOp - glStopInstrumentsSGIX - glTagSampleBufferSGIX - glTbufferMask3DFX - glTestFenceNV - glTexCoordPointer - glTexCoordPointerEXT - glTexCoord1d - glTexCoord1dv - glTexCoord1f - glTexCoord1fv - glTexCoord1i - glTexCoord1iv - glTexCoord1s - glTexCoord1sv - glTexCoord2d - glTexCoord2dv - glTexCoord2f - glTexCoord2fv - glTexCoord2i - glTexCoord2iv - glTexCoord2s - glTexCoord2sv - glTexCoord3d - glTexCoord3dv - glTexCoord3f - glTexCoord3fv - glTexCoord3i - glTexCoord3iv - glTexCoord3s - glTexCoord3sv - glTexCoord4d - glTexCoord4dv - glTexCoord4f - glTexCoord4fv - glTexCoord4i - glTexCoord4iv - glTexCoord4s - glTexCoord4sv - glTexEnvf - glTexEnvfv - glTexEnvi - glTexEnviv - glTexFilterFuncSGIS - glTexGend - glTexGendv - glTexGenf - glTexGenfv - glTexGeni - glTexGeniv - glTexImage1D - glTexImage2D - glTexImage3D - glTexImage3DEXT - glTexImage4DSGIS - glTexParameterf - glTexParameterfv - glTexParameteri - glTexParameteriv - glTexSubImage1D - glTexSubImage1DEXT - glTexSubImage2D - glTexSubImage2DEXT - glTexSubImage3D - glTexSubImage3DEXT - glTexSubImage4DSGIS - glTrackMatrixNV - glTranslated - glTranslatef - glUnlockArraysEXT - glUnmapBufferARB - glVertexArrayRangeNV - glVertexAttribPointerARB - glVertexAttribPointerNV - glVertexAttribs1dvNV - glVertexAttribs1fvNV - glVertexAttribs1svNV - glVertexAttribs2dvNV - glVertexAttribs2fvNV - glVertexAttribs2svNV - glVertexAttribs3dvNV - glVertexAttribs3fvNV - glVertexAttribs3svNV - glVertexAttribs4dvNV - glVertexAttribs4fvNV - glVertexAttribs4svNV - glVertexAttribs4ubvNV - glVertexAttrib1dARB - glVertexAttrib1dNV - glVertexAttrib1dvARB - glVertexAttrib1dvNV - glVertexAttrib1fARB - glVertexAttrib1fNV - glVertexAttrib1fvARB - glVertexAttrib1fvNV - glVertexAttrib1sARB - glVertexAttrib1sNV - glVertexAttrib1svARB - glVertexAttrib1svNV - glVertexAttrib2dARB - glVertexAttrib2dNV - glVertexAttrib2dvARB - glVertexAttrib2dvNV - glVertexAttrib2fARB - glVertexAttrib2fNV - glVertexAttrib2fvARB - glVertexAttrib2fvNV - glVertexAttrib2sARB - glVertexAttrib2sNV - glVertexAttrib2svARB - glVertexAttrib2svNV - glVertexAttrib3dARB - glVertexAttrib3dNV - glVertexAttrib3dvARB - glVertexAttrib3dvNV - glVertexAttrib3fARB - glVertexAttrib3fNV - glVertexAttrib3fvARB - glVertexAttrib3fvNV - glVertexAttrib3sARB - glVertexAttrib3sNV - glVertexAttrib3svARB - glVertexAttrib3svNV - glVertexAttrib4bvARB - glVertexAttrib4dARB - glVertexAttrib4dNV - glVertexAttrib4dvARB - glVertexAttrib4dvNV - glVertexAttrib4fARB - glVertexAttrib4fNV - glVertexAttrib4fvARB - glVertexAttrib4fvNV - glVertexAttrib4ivARB - glVertexAttrib4NbvARB - glVertexAttrib4NivARB - glVertexAttrib4NsvARB - glVertexAttrib4NubARB - glVertexAttrib4NubvARB - glVertexAttrib4NuivARB - glVertexAttrib4NusvARB - glVertexAttrib4sARB - glVertexAttrib4sNV - glVertexAttrib4svARB - glVertexAttrib4svNV - glVertexAttrib4ubNV - glVertexAttrib4ubvARB - glVertexAttrib4ubvNV - glVertexAttrib4uivARB - glVertexAttrib4usvARB - glVertexPointer - glVertexPointerEXT - glVertexWeightfEXT - glVertexWeightfvEXT - glVertexWeightPointerEXT - glVertex2d - glVertex2dv - glVertex2f - glVertex2fv - glVertex2i - glVertex2iv - glVertex2s - glVertex2sv - glVertex3d - glVertex3dv - glVertex3f - glVertex3fv - glVertex3i - glVertex3iv - glVertex3s - glVertex3sv - glVertex4d - glVertex4dv - glVertex4f - glVertex4fv - glVertex4i - glVertex4iv - glVertex4s - glVertex4sv - glViewport - glWindowPos2d - glWindowPos2dARB - glWindowPos2dMESA - glWindowPos2dv - glWindowPos2dvARB - glWindowPos2dvMESA - glWindowPos2f - glWindowPos2fARB - glWindowPos2fMESA - glWindowPos2fv - glWindowPos2fvARB - glWindowPos2fvMESA - glWindowPos2i - glWindowPos2iARB - glWindowPos2iMESA - glWindowPos2iv - glWindowPos2ivARB - glWindowPos2ivMESA - glWindowPos2s - glWindowPos2sARB - glWindowPos2sMESA - glWindowPos2sv - glWindowPos2svARB - glWindowPos2svMESA - glWindowPos3d - glWindowPos3dARB - glWindowPos3dMESA - glWindowPos3dv - glWindowPos3dvARB - glWindowPos3dvMESA - glWindowPos3f - glWindowPos3fARB - glWindowPos3fMESA - glWindowPos3fv - glWindowPos3fvARB - glWindowPos3fvMESA - glWindowPos3i - glWindowPos3iARB - glWindowPos3iMESA - glWindowPos3iv - glWindowPos3ivARB - glWindowPos3ivMESA - glWindowPos3s - glWindowPos3sARB - glWindowPos3sMESA - glWindowPos3sv - glWindowPos3svARB - glWindowPos3svMESA - glWindowPos4dMESA - glWindowPos4dvMESA - glWindowPos4fMESA - glWindowPos4fvMESA - glWindowPos4iMESA - glWindowPos4ivMESA - glWindowPos4sMESA - glWindowPos4svMESA - fxCloseHardware -;fxGetScreenGeometry - fxMesaCreateBestContext - fxMesaCreateContext - fxMesaDestroyContext - fxMesaGetCurrentContext - fxMesaMakeCurrent - fxMesaSelectCurrentBoard -;fxMesaSetNearFar - fxMesaSwapBuffers - fxMesaUpdateScreenSize - wglChoosePixelFormat - wglCopyContext - wglCreateContext - wglCreateLayerContext - wglDeleteContext - wglDescribeLayerPlane - wglDescribePixelFormat - wglGetCurrentContext - wglGetCurrentDC - wglGetDefaultProcAddress - wglGetLayerPaletteEntries - wglGetPixelFormat - wglGetProcAddress - wglMakeCurrent - wglRealizeLayerPalette - wglSetLayerPaletteEntries - wglSetPixelFormat - wglShareLists - wglSwapBuffers - wglSwapLayerBuffers - wglUseFontBitmapsA - wglUseFontBitmapsW - wglUseFontOutlinesA - wglUseFontOutlinesW - ChoosePixelFormat - DescribePixelFormat - GetPixelFormat - SetPixelFormat - SwapBuffers - DrvCopyContext - DrvCreateContext - DrvCreateLayerContext - DrvDeleteContext - DrvDescribeLayerPlane - DrvDescribePixelFormat - DrvGetLayerPaletteEntries - DrvGetProcAddress - DrvReleaseContext - DrvRealizeLayerPalette - DrvSetContext - DrvSetLayerPaletteEntries - DrvSetPixelFormat - DrvShareLists - DrvSwapBuffers - DrvSwapLayerBuffers - DrvValidateVersion diff --git a/src/mesa/drivers/windows/fx/fxwgl.c b/src/mesa/drivers/windows/fx/fxwgl.c deleted file mode 100644 index ce76ecd1568..00000000000 --- a/src/mesa/drivers/windows/fx/fxwgl.c +++ /dev/null @@ -1,1307 +0,0 @@ -/* - * Mesa 3-D graphics library - * Version: 4.0 - * - * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* Authors: - * David Bucciarelli - * Brian Paul - * Keith Whitwell - * Hiroshi Morii - * Daniel Borca - */ - -/* fxwgl.c - Microsoft wgl functions emulation for - * 3Dfx VooDoo/Mesa interface - */ - - -#ifdef _WIN32 - -#ifdef __cplusplus -extern "C" { -#endif - -#include <windows.h> -#define GL_GLEXT_PROTOTYPES -#include "GL/gl.h" -#include "GL/glext.h" - -#ifdef __cplusplus -} -#endif - -#include "GL/fxmesa.h" -#include "glheader.h" -#include "glapi.h" -#include "imports.h" -#include "../../glide/fxdrv.h" - -#define MAX_MESA_ATTRS 20 - -#if (_MSC_VER >= 1200) -#pragma warning( push ) -#pragma warning( disable : 4273 ) -#endif - -struct __extensions__ { - PROC proc; - char *name; -}; - -struct __pixelformat__ { - PIXELFORMATDESCRIPTOR pfd; - GLint mesaAttr[MAX_MESA_ATTRS]; -}; - -WINGDIAPI void GLAPIENTRY gl3DfxSetPaletteEXT(GLuint *); -static GLushort gammaTable[3 * 256]; - -struct __pixelformat__ pix[] = { - /* 16bit RGB565 single buffer with depth */ - { - {sizeof(PIXELFORMATDESCRIPTOR), 1, - PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL, - PFD_TYPE_RGBA, - 16, - 5, 0, 6, 5, 5, 11, 0, 0, - 0, 0, 0, 0, 0, - 16, - 0, - 0, - PFD_MAIN_PLANE, - 0, 0, 0, 0} - , - {FXMESA_COLORDEPTH, 16, - FXMESA_ALPHA_SIZE, 0, - FXMESA_DEPTH_SIZE, 16, - FXMESA_STENCIL_SIZE, 0, - FXMESA_ACCUM_SIZE, 0, - FXMESA_NONE} - } - , - /* 16bit RGB565 double buffer with depth */ - { - {sizeof(PIXELFORMATDESCRIPTOR), 1, - PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | - PFD_DOUBLEBUFFER | PFD_SWAP_COPY, - PFD_TYPE_RGBA, - 16, - 5, 0, 6, 5, 5, 11, 0, 0, - 0, 0, 0, 0, 0, - 16, - 0, - 0, - PFD_MAIN_PLANE, - 0, 0, 0, 0} - , - {FXMESA_COLORDEPTH, 16, - FXMESA_DOUBLEBUFFER, - FXMESA_ALPHA_SIZE, 0, - FXMESA_DEPTH_SIZE, 16, - FXMESA_STENCIL_SIZE, 0, - FXMESA_ACCUM_SIZE, 0, - FXMESA_NONE} - } - , - /* 16bit ARGB1555 single buffer with depth */ - { - {sizeof(PIXELFORMATDESCRIPTOR), 1, - PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL, - PFD_TYPE_RGBA, - 16, - 5, 0, 5, 5, 5, 10, 1, 15, - 0, 0, 0, 0, 0, - 16, - 0, - 0, - PFD_MAIN_PLANE, - 0, 0, 0, 0} - , - {FXMESA_COLORDEPTH, 15, - FXMESA_ALPHA_SIZE, 1, - FXMESA_DEPTH_SIZE, 16, - FXMESA_STENCIL_SIZE, 0, - FXMESA_ACCUM_SIZE, 0, - FXMESA_NONE} - } - , - /* 16bit ARGB1555 double buffer with depth */ - { - {sizeof(PIXELFORMATDESCRIPTOR), 1, - PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | - PFD_DOUBLEBUFFER | PFD_SWAP_COPY, - PFD_TYPE_RGBA, - 16, - 5, 0, 5, 5, 5, 10, 1, 15, - 0, 0, 0, 0, 0, - 16, - 0, - 0, - PFD_MAIN_PLANE, - 0, 0, 0, 0} - , - {FXMESA_COLORDEPTH, 15, - FXMESA_DOUBLEBUFFER, - FXMESA_ALPHA_SIZE, 1, - FXMESA_DEPTH_SIZE, 16, - FXMESA_STENCIL_SIZE, 0, - FXMESA_ACCUM_SIZE, 0, - FXMESA_NONE} - } - , - /* 32bit ARGB8888 single buffer with depth */ - { - {sizeof(PIXELFORMATDESCRIPTOR), 1, - PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL, - PFD_TYPE_RGBA, - 32, - 8, 0, 8, 8, 8, 16, 8, 24, - 0, 0, 0, 0, 0, - 24, - 8, - 0, - PFD_MAIN_PLANE, - 0, 0, 0, 0} - , - {FXMESA_COLORDEPTH, 32, - FXMESA_ALPHA_SIZE, 8, - FXMESA_DEPTH_SIZE, 24, - FXMESA_STENCIL_SIZE, 8, - FXMESA_ACCUM_SIZE, 0, - FXMESA_NONE} - } - , - /* 32bit ARGB8888 double buffer with depth */ - { - {sizeof(PIXELFORMATDESCRIPTOR), 1, - PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | - PFD_DOUBLEBUFFER | PFD_SWAP_COPY, - PFD_TYPE_RGBA, - 32, - 8, 0, 8, 8, 8, 16, 8, 24, - 0, 0, 0, 0, 0, - 24, - 8, - 0, - PFD_MAIN_PLANE, - 0, 0, 0, 0} - , - {FXMESA_COLORDEPTH, 32, - FXMESA_DOUBLEBUFFER, - FXMESA_ALPHA_SIZE, 8, - FXMESA_DEPTH_SIZE, 24, - FXMESA_STENCIL_SIZE, 8, - FXMESA_ACCUM_SIZE, 0, - FXMESA_NONE} - } -}; - -static fxMesaContext ctx = NULL; -static WNDPROC hWNDOldProc; -static int curPFD = 0; -static HDC hDC; -static HWND hWND; - -static GLboolean haveDualHead; - -/* For the in-window-rendering hack */ - -#ifndef GR_CONTROL_RESIZE -/* Apparently GR_CONTROL_RESIZE can be ignored. OK? */ -#define GR_CONTROL_RESIZE -1 -#endif - -static GLboolean gdiWindowHack; -static void *dibSurfacePtr; -static BITMAPINFO *dibBMI; -static HBITMAP dibHBM; -static HWND dibWnd; - -static int -env_check (const char *var, int val) -{ - const char *env = getenv(var); - return (env && (env[0] == val)); -} - -static LRESULT APIENTRY -__wglMonitor (HWND hwnd, UINT message, UINT wParam, LONG lParam) -{ - long ret; /* Now gives the resized window at the end to hWNDOldProc */ - - if (ctx && hwnd == hWND) { - switch (message) { - case WM_PAINT: - case WM_MOVE: - break; - case WM_DISPLAYCHANGE: - case WM_SIZE: -#if 0 - if (wParam != SIZE_MINIMIZED) { - static int moving = 0; - if (!moving) { - if (!FX_grSstControl(GR_CONTROL_RESIZE)) { - moving = 1; - SetWindowPos(hwnd, 0, 0, 0, 300, 300, SWP_NOMOVE | SWP_NOZORDER); - moving = 0; - if (!FX_grSstControl(GR_CONTROL_RESIZE)) { - /*MessageBox(0,_T("Error changing windowsize"),_T("fxMESA"),MB_OK);*/ - PostMessage(hWND, WM_CLOSE, 0, 0); - } - } - /* Do the clipping in the glide library */ - grClipWindow(0, 0, FX_grSstScreenWidth(), FX_grSstScreenHeight()); - /* And let the new size set in the context */ - fxMesaUpdateScreenSize(ctx); - } - } -#endif - break; - case WM_ACTIVATE: - break; - case WM_SHOWWINDOW: - break; - case WM_SYSKEYDOWN: - case WM_SYSCHAR: - break; - } - } - - /* Finally call the hWNDOldProc, which handles the resize with the - * now changed window sizes */ - ret = CallWindowProc(hWNDOldProc, hwnd, message, wParam, lParam); - - return ret; -} - -static void -wgl_error (long error) -{ -#define WGL_INVALID_PIXELFORMAT ERROR_INVALID_PIXEL_FORMAT - SetLastError(0xC0000000 /* error severity */ - |0x00070000 /* error facility (who we are) */ - |error); -} - -GLAPI BOOL GLAPIENTRY -wglCopyContext (HGLRC hglrcSrc, HGLRC hglrcDst, UINT mask) -{ - return FALSE; -} - -GLAPI HGLRC GLAPIENTRY -wglCreateContext (HDC hdc) -{ - HWND hWnd; - WNDPROC oldProc; - int error; - - if (ctx) { - SetLastError(0); - return NULL; - } - - if (!(hWnd = WindowFromDC(hdc))) { - SetLastError(0); - return NULL; - } - - if (curPFD == 0) { - wgl_error(WGL_INVALID_PIXELFORMAT); - return NULL; - } - - if ((oldProc = (WNDPROC)GetWindowLong(hWnd, GWL_WNDPROC)) != __wglMonitor) { - hWNDOldProc = oldProc; - SetWindowLong(hWnd, GWL_WNDPROC, (LONG)__wglMonitor); - } - - /* always log when debugging, or if user demands */ - if (TDFX_DEBUG || env_check("MESA_FX_INFO", 'r')) { - freopen("MESA.LOG", "w", stderr); - } - - { - RECT cliRect; - ShowWindow(hWnd, SW_SHOWNORMAL); - SetForegroundWindow(hWnd); - Sleep(100); /* a hack for win95 */ - if (env_check("MESA_GLX_FX", 'w') && !(GetWindowLong(hWnd, GWL_STYLE) & WS_POPUP)) { - /* XXX todo - windowed modes */ - error = !(ctx = fxMesaCreateContext((GLuint) hWnd, GR_RESOLUTION_NONE, GR_REFRESH_NONE, pix[curPFD - 1].mesaAttr)); - } else { - GetClientRect(hWnd, &cliRect); - error = !(ctx = fxMesaCreateBestContext((GLuint) hWnd, cliRect.right, cliRect.bottom, pix[curPFD - 1].mesaAttr)); - } - } - - /*if (getenv("SST_DUALHEAD")) - haveDualHead = - ((atoi(getenv("SST_DUALHEAD")) == 1) ? GL_TRUE : GL_FALSE); - else - haveDualHead = GL_FALSE;*/ - - if (error) { - SetLastError(0); - return NULL; - } - - hDC = hdc; - hWND = hWnd; - - /* Required by the OpenGL Optimizer 1.1 (is it a Optimizer bug ?) */ - wglMakeCurrent(hdc, (HGLRC)1); - - return (HGLRC)1; -} - -GLAPI HGLRC GLAPIENTRY -wglCreateLayerContext (HDC hdc, int iLayerPlane) -{ - SetLastError(0); - return NULL; -} - -GLAPI BOOL GLAPIENTRY -wglDeleteContext (HGLRC hglrc) -{ - if (ctx && hglrc == (HGLRC)1) { - - fxMesaDestroyContext(ctx); - - SetWindowLong(WindowFromDC(hDC), GWL_WNDPROC, (LONG) hWNDOldProc); - - ctx = NULL; - hDC = 0; - return TRUE; - } - - SetLastError(0); - - return FALSE; -} - -GLAPI HGLRC GLAPIENTRY -wglGetCurrentContext (VOID) -{ - if (ctx) - return (HGLRC)1; - - SetLastError(0); - return NULL; -} - -GLAPI HDC GLAPIENTRY -wglGetCurrentDC (VOID) -{ - if (ctx) - return hDC; - - SetLastError(0); - return NULL; -} - -GLAPI BOOL GLAPIENTRY -wglSwapIntervalEXT (int interval) -{ - if (ctx == NULL) { - return FALSE; - } - if (interval < 0) { - interval = 0; - } else if (interval > 3) { - interval = 3; - } - ctx->swapInterval = interval; - return TRUE; -} - -GLAPI int GLAPIENTRY -wglGetSwapIntervalEXT (void) -{ - return (ctx == NULL) ? -1 : ctx->swapInterval; -} - -GLAPI BOOL GLAPIENTRY -wglGetDeviceGammaRamp3DFX (HDC hdc, LPVOID arrays) -{ - /* gammaTable should be per-context */ - memcpy(arrays, gammaTable, 3 * 256 * sizeof(GLushort)); - return TRUE; -} - -GLAPI BOOL GLAPIENTRY -wglSetDeviceGammaRamp3DFX (HDC hdc, LPVOID arrays) -{ - GLint i, tableSize, inc, index; - GLushort *red, *green, *blue; - FxU32 gammaTableR[256], gammaTableG[256], gammaTableB[256]; - - /* gammaTable should be per-context */ - memcpy(gammaTable, arrays, 3 * 256 * sizeof(GLushort)); - - tableSize = FX_grGetInteger(GR_GAMMA_TABLE_ENTRIES); - inc = 256 / tableSize; - red = (GLushort *)arrays; - green = (GLushort *)arrays + 256; - blue = (GLushort *)arrays + 512; - for (i = 0, index = 0; i < tableSize; i++, index += inc) { - gammaTableR[i] = red[index] >> 8; - gammaTableG[i] = green[index] >> 8; - gammaTableB[i] = blue[index] >> 8; - } - - grLoadGammaTable(tableSize, gammaTableR, gammaTableG, gammaTableB); - - return TRUE; -} - -typedef void *HPBUFFERARB; - -/* WGL_ARB_pixel_format */ -GLAPI BOOL GLAPIENTRY -wglGetPixelFormatAttribivARB (HDC hdc, - int iPixelFormat, - int iLayerPlane, - UINT nAttributes, - const int *piAttributes, - int *piValues) -{ - SetLastError(0); - return FALSE; -} - -GLAPI BOOL GLAPIENTRY -wglGetPixelFormatAttribfvARB (HDC hdc, - int iPixelFormat, - int iLayerPlane, - UINT nAttributes, - const int *piAttributes, - FLOAT *pfValues) -{ - SetLastError(0); - return FALSE; -} - -GLAPI BOOL GLAPIENTRY -wglChoosePixelFormatARB (HDC hdc, - const int *piAttribIList, - const FLOAT *pfAttribFList, - UINT nMaxFormats, - int *piFormats, - UINT *nNumFormats) -{ - SetLastError(0); - return FALSE; -} - -/* WGL_ARB_render_texture */ -GLAPI BOOL GLAPIENTRY -wglBindTexImageARB (HPBUFFERARB hPbuffer, int iBuffer) -{ - SetLastError(0); - return FALSE; -} - -GLAPI BOOL GLAPIENTRY -wglReleaseTexImageARB (HPBUFFERARB hPbuffer, int iBuffer) -{ - SetLastError(0); - return FALSE; -} - -GLAPI BOOL GLAPIENTRY -wglSetPbufferAttribARB (HPBUFFERARB hPbuffer, - const int *piAttribList) -{ - SetLastError(0); - return FALSE; -} - -/* WGL_ARB_pbuffer */ -GLAPI HPBUFFERARB GLAPIENTRY -wglCreatePbufferARB (HDC hDC, - int iPixelFormat, - int iWidth, - int iHeight, - const int *piAttribList) -{ - SetLastError(0); - return NULL; -} - -GLAPI HDC GLAPIENTRY -wglGetPbufferDCARB (HPBUFFERARB hPbuffer) -{ - SetLastError(0); - return NULL; -} - -GLAPI int GLAPIENTRY -wglReleasePbufferDCARB (HPBUFFERARB hPbuffer, HDC hDC) -{ - SetLastError(0); - return -1; -} - -GLAPI BOOL GLAPIENTRY -wglDestroyPbufferARB (HPBUFFERARB hPbuffer) -{ - SetLastError(0); - return FALSE; -} - -GLAPI BOOL GLAPIENTRY -wglQueryPbufferARB (HPBUFFERARB hPbuffer, - int iAttribute, - int *piValue) -{ - SetLastError(0); - return FALSE; -} - -GLAPI const char * GLAPIENTRY -wglGetExtensionsStringEXT (void) -{ - return "WGL_3DFX_gamma_control " - "WGL_EXT_swap_control " - "WGL_EXT_extensions_string WGL_ARB_extensions_string" - /*WGL_ARB_pixel_format WGL_ARB_render_texture WGL_ARB_pbuffer*/; -} - -GLAPI const char * GLAPIENTRY -wglGetExtensionsStringARB (HDC hdc) -{ - return wglGetExtensionsStringEXT(); -} - -static struct { - const char *name; - PROC func; -} wgl_ext[] = { - {"wglGetExtensionsStringARB", (PROC)wglGetExtensionsStringARB}, - {"wglGetExtensionsStringEXT", (PROC)wglGetExtensionsStringEXT}, - {"wglSwapIntervalEXT", (PROC)wglSwapIntervalEXT}, - {"wglGetSwapIntervalEXT", (PROC)wglGetSwapIntervalEXT}, - {"wglGetDeviceGammaRamp3DFX", (PROC)wglGetDeviceGammaRamp3DFX}, - {"wglSetDeviceGammaRamp3DFX", (PROC)wglSetDeviceGammaRamp3DFX}, - /* WGL_ARB_pixel_format */ - {"wglGetPixelFormatAttribivARB", (PROC)wglGetPixelFormatAttribivARB}, - {"wglGetPixelFormatAttribfvARB", (PROC)wglGetPixelFormatAttribfvARB}, - {"wglChoosePixelFormatARB", (PROC)wglChoosePixelFormatARB}, - /* WGL_ARB_render_texture */ - {"wglBindTexImageARB", (PROC)wglBindTexImageARB}, - {"wglReleaseTexImageARB", (PROC)wglReleaseTexImageARB}, - {"wglSetPbufferAttribARB", (PROC)wglSetPbufferAttribARB}, - /* WGL_ARB_pbuffer */ - {"wglCreatePbufferARB", (PROC)wglCreatePbufferARB}, - {"wglGetPbufferDCARB", (PROC)wglGetPbufferDCARB}, - {"wglReleasePbufferDCARB", (PROC)wglReleasePbufferDCARB}, - {"wglDestroyPbufferARB", (PROC)wglDestroyPbufferARB}, - {"wglQueryPbufferARB", (PROC)wglQueryPbufferARB}, - {NULL, NULL} -}; - -GLAPI PROC GLAPIENTRY -wglGetProcAddress (LPCSTR lpszProc) -{ - int i; - PROC p = (PROC)_glapi_get_proc_address((const char *)lpszProc); - - /* we can't BlendColor. work around buggy applications */ - if (p && strcmp(lpszProc, "glBlendColor") - && strcmp(lpszProc, "glBlendColorEXT")) - return p; - - for (i = 0; wgl_ext[i].name; i++) { - if (!strcmp(lpszProc, wgl_ext[i].name)) { - return wgl_ext[i].func; - } - } - - SetLastError(0); - return NULL; -} - -GLAPI PROC GLAPIENTRY -wglGetDefaultProcAddress (LPCSTR lpszProc) -{ - SetLastError(0); - return NULL; -} - -GLAPI BOOL GLAPIENTRY -wglMakeCurrent (HDC hdc, HGLRC hglrc) -{ - if ((hdc == NULL) && (hglrc == NULL)) - return TRUE; - - if (!ctx || hglrc != (HGLRC)1 || WindowFromDC(hdc) != hWND) { - SetLastError(0); - return FALSE; - } - - hDC = hdc; - - fxMesaMakeCurrent(ctx); - - return TRUE; -} - -GLAPI BOOL GLAPIENTRY -wglShareLists (HGLRC hglrc1, HGLRC hglrc2) -{ - if (!ctx || hglrc1 != (HGLRC)1 || hglrc1 != hglrc2) { - SetLastError(0); - return FALSE; - } - - return TRUE; -} - -static BOOL -wglUseFontBitmaps_FX (HDC fontDevice, DWORD firstChar, DWORD numChars, - DWORD listBase) -{ - TEXTMETRIC metric; - BITMAPINFO *dibInfo; - HDC bitDevice; - COLORREF tempColor; - int i; - - GetTextMetrics(fontDevice, &metric); - - dibInfo = (BITMAPINFO *)calloc(sizeof(BITMAPINFO) + sizeof(RGBQUAD), 1); - dibInfo->bmiHeader.biSize = sizeof(BITMAPINFOHEADER); - dibInfo->bmiHeader.biPlanes = 1; - dibInfo->bmiHeader.biBitCount = 1; - dibInfo->bmiHeader.biCompression = BI_RGB; - - bitDevice = CreateCompatibleDC(fontDevice); - - /* Swap fore and back colors so the bitmap has the right polarity */ - tempColor = GetBkColor(bitDevice); - SetBkColor(bitDevice, GetTextColor(bitDevice)); - SetTextColor(bitDevice, tempColor); - - /* Place chars based on base line */ - SetTextAlign(bitDevice, TA_BASELINE); - - for (i = 0; i < (int)numChars; i++) { - SIZE size; - char curChar; - int charWidth, charHeight, bmapWidth, bmapHeight, numBytes, res; - HBITMAP bitObject; - HGDIOBJ origBmap; - unsigned char *bmap; - - curChar = (char)(i + firstChar); /* [koolsmoky] explicit cast */ - - /* Find how high/wide this character is */ - GetTextExtentPoint32(bitDevice, &curChar, 1, &size); - - /* Create the output bitmap */ - charWidth = size.cx; - charHeight = size.cy; - bmapWidth = ((charWidth + 31) / 32) * 32; /* Round up to the next multiple of 32 bits */ - bmapHeight = charHeight; - bitObject = CreateCompatibleBitmap(bitDevice, bmapWidth, bmapHeight); - /*VERIFY(bitObject);*/ - - /* Assign the output bitmap to the device */ - origBmap = SelectObject(bitDevice, bitObject); - - PatBlt(bitDevice, 0, 0, bmapWidth, bmapHeight, BLACKNESS); - - /* Use our source font on the device */ - SelectObject(bitDevice, GetCurrentObject(fontDevice, OBJ_FONT)); - - /* Draw the character */ - TextOut(bitDevice, 0, metric.tmAscent, &curChar, 1); - - /* Unselect our bmap object */ - SelectObject(bitDevice, origBmap); - - /* Convert the display dependant representation to a 1 bit deep DIB */ - numBytes = (bmapWidth * bmapHeight) / 8; - bmap = MALLOC(numBytes); - dibInfo->bmiHeader.biWidth = bmapWidth; - dibInfo->bmiHeader.biHeight = bmapHeight; - res = GetDIBits(bitDevice, bitObject, 0, bmapHeight, bmap, - dibInfo, DIB_RGB_COLORS); - - /* Create the GL object */ - glNewList(i + listBase, GL_COMPILE); - glBitmap(bmapWidth, bmapHeight, 0.0, metric.tmDescent, - charWidth, 0.0, bmap); - glEndList(); - /* CheckGL(); */ - - /* Destroy the bmap object */ - DeleteObject(bitObject); - - /* Deallocate the bitmap data */ - FREE(bmap); - } - - /* Destroy the DC */ - DeleteDC(bitDevice); - - FREE(dibInfo); - - return TRUE; -} - -GLAPI BOOL GLAPIENTRY -wglUseFontBitmapsW (HDC hdc, DWORD first, DWORD count, DWORD listBase) -{ - return FALSE; -} - -GLAPI BOOL GLAPIENTRY -wglUseFontOutlinesA (HDC hdc, DWORD first, DWORD count, - DWORD listBase, FLOAT deviation, - FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf) -{ - SetLastError(0); - return FALSE; -} - -GLAPI BOOL GLAPIENTRY -wglUseFontOutlinesW (HDC hdc, DWORD first, DWORD count, - DWORD listBase, FLOAT deviation, - FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf) -{ - SetLastError(0); - return FALSE; -} - - -GLAPI BOOL GLAPIENTRY -wglSwapLayerBuffers (HDC hdc, UINT fuPlanes) -{ - if (ctx && WindowFromDC(hdc) == hWND) { - fxMesaSwapBuffers(); - - return TRUE; - } - - SetLastError(0); - return FALSE; -} - -static int -pfd_tablen (void) -{ - /* we should take an envvar for `fxMesaSelectCurrentBoard' */ - return (fxMesaSelectCurrentBoard(0) < GR_SSTTYPE_Voodoo4) - ? 2 /* only 16bit entries */ - : sizeof(pix) / sizeof(pix[0]); /* full table */ -} - -GLAPI int GLAPIENTRY -wglChoosePixelFormat (HDC hdc, const PIXELFORMATDESCRIPTOR *ppfd) -{ - int i, best = -1, qt_valid_pix; - PIXELFORMATDESCRIPTOR pfd = *ppfd; - - qt_valid_pix = pfd_tablen(); - -#if 1 || QUAKE2 || GORE - /* QUAKE2: 24+32 */ - /* GORE : 24+16 */ - if ((pfd.cColorBits == 24) || (pfd.cColorBits == 32)) { - /* the first 2 entries are 16bit */ - pfd.cColorBits = (qt_valid_pix > 2) ? 32 : 16; - } - if (pfd.cColorBits == 32) { - pfd.cDepthBits = 24; - } else if (pfd.cColorBits == 16) { - pfd.cDepthBits = 16; - } -#endif - - if (pfd.nSize != sizeof(PIXELFORMATDESCRIPTOR) || pfd.nVersion != 1) { - SetLastError(0); - return 0; - } - - for (i = 0; i < qt_valid_pix; i++) { - if (pfd.cColorBits > 0 && pix[i].pfd.cColorBits != pfd.cColorBits) - continue; - - if ((pfd.dwFlags & PFD_DRAW_TO_WINDOW) - && !(pix[i].pfd.dwFlags & PFD_DRAW_TO_WINDOW)) continue; - if ((pfd.dwFlags & PFD_DRAW_TO_BITMAP) - && !(pix[i].pfd.dwFlags & PFD_DRAW_TO_BITMAP)) continue; - if ((pfd.dwFlags & PFD_SUPPORT_GDI) - && !(pix[i].pfd.dwFlags & PFD_SUPPORT_GDI)) continue; - if ((pfd.dwFlags & PFD_SUPPORT_OPENGL) - && !(pix[i].pfd.dwFlags & PFD_SUPPORT_OPENGL)) continue; - if (!(pfd.dwFlags & PFD_DOUBLEBUFFER_DONTCARE) - && ((pfd.dwFlags & PFD_DOUBLEBUFFER) != - (pix[i].pfd.dwFlags & PFD_DOUBLEBUFFER))) continue; -#if 1 /* Doom3 fails here! */ - if (!(pfd.dwFlags & PFD_STEREO_DONTCARE) - && ((pfd.dwFlags & PFD_STEREO) != - (pix[i].pfd.dwFlags & PFD_STEREO))) continue; -#endif - - if (pfd.cDepthBits > 0 && pix[i].pfd.cDepthBits == 0) - continue; /* need depth buffer */ - - if (pfd.cAlphaBits > 0 && pix[i].pfd.cAlphaBits == 0) - continue; /* need alpha buffer */ - -#if 0 /* regression bug? */ - if (pfd.cStencilBits > 0 && pix[i].pfd.cStencilBits == 0) - continue; /* need stencil buffer */ -#endif - - if (pfd.iPixelType == pix[i].pfd.iPixelType) { - best = i + 1; - break; - } - } - - if (best == -1) { - FILE *err = fopen("MESA.LOG", "w"); - if (err != NULL) { - fprintf(err, "wglChoosePixelFormat failed\n"); - fprintf(err, "\tnSize = %d\n", ppfd->nSize); - fprintf(err, "\tnVersion = %d\n", ppfd->nVersion); - fprintf(err, "\tdwFlags = %lu\n", ppfd->dwFlags); - fprintf(err, "\tiPixelType = %d\n", ppfd->iPixelType); - fprintf(err, "\tcColorBits = %d\n", ppfd->cColorBits); - fprintf(err, "\tcRedBits = %d\n", ppfd->cRedBits); - fprintf(err, "\tcRedShift = %d\n", ppfd->cRedShift); - fprintf(err, "\tcGreenBits = %d\n", ppfd->cGreenBits); - fprintf(err, "\tcGreenShift = %d\n", ppfd->cGreenShift); - fprintf(err, "\tcBlueBits = %d\n", ppfd->cBlueBits); - fprintf(err, "\tcBlueShift = %d\n", ppfd->cBlueShift); - fprintf(err, "\tcAlphaBits = %d\n", ppfd->cAlphaBits); - fprintf(err, "\tcAlphaShift = %d\n", ppfd->cAlphaShift); - fprintf(err, "\tcAccumBits = %d\n", ppfd->cAccumBits); - fprintf(err, "\tcAccumRedBits = %d\n", ppfd->cAccumRedBits); - fprintf(err, "\tcAccumGreenBits = %d\n", ppfd->cAccumGreenBits); - fprintf(err, "\tcAccumBlueBits = %d\n", ppfd->cAccumBlueBits); - fprintf(err, "\tcAccumAlphaBits = %d\n", ppfd->cAccumAlphaBits); - fprintf(err, "\tcDepthBits = %d\n", ppfd->cDepthBits); - fprintf(err, "\tcStencilBits = %d\n", ppfd->cStencilBits); - fprintf(err, "\tcAuxBuffers = %d\n", ppfd->cAuxBuffers); - fprintf(err, "\tiLayerType = %d\n", ppfd->iLayerType); - fprintf(err, "\tbReserved = %d\n", ppfd->bReserved); - fprintf(err, "\tdwLayerMask = %lu\n", ppfd->dwLayerMask); - fprintf(err, "\tdwVisibleMask = %lu\n", ppfd->dwVisibleMask); - fprintf(err, "\tdwDamageMask = %lu\n", ppfd->dwDamageMask); - fclose(err); - } - - SetLastError(0); - return 0; - } - - return best; -} - -GLAPI int GLAPIENTRY -ChoosePixelFormat (HDC hdc, const PIXELFORMATDESCRIPTOR *ppfd) -{ - - return wglChoosePixelFormat(hdc, ppfd); -} - -GLAPI int GLAPIENTRY -wglDescribePixelFormat (HDC hdc, int iPixelFormat, UINT nBytes, - LPPIXELFORMATDESCRIPTOR ppfd) -{ - int qt_valid_pix; - - qt_valid_pix = pfd_tablen(); - - if (iPixelFormat < 1 || iPixelFormat > qt_valid_pix || - ((nBytes != sizeof(PIXELFORMATDESCRIPTOR)) && (nBytes != 0))) { - SetLastError(0); - return qt_valid_pix; - } - - if (nBytes != 0) - *ppfd = pix[iPixelFormat - 1].pfd; - - return qt_valid_pix; -} - -GLAPI int GLAPIENTRY -DescribePixelFormat (HDC hdc, int iPixelFormat, UINT nBytes, - LPPIXELFORMATDESCRIPTOR ppfd) -{ - return wglDescribePixelFormat(hdc, iPixelFormat, nBytes, ppfd); -} - -GLAPI int GLAPIENTRY -wglGetPixelFormat (HDC hdc) -{ - if (curPFD == 0) { - SetLastError(0); - return 0; - } - - return curPFD; -} - -GLAPI int GLAPIENTRY -GetPixelFormat (HDC hdc) -{ - return wglGetPixelFormat(hdc); -} - -GLAPI BOOL GLAPIENTRY -wglSetPixelFormat (HDC hdc, int iPixelFormat, const PIXELFORMATDESCRIPTOR *ppfd) -{ - int qt_valid_pix; - - qt_valid_pix = pfd_tablen(); - - if (iPixelFormat < 1 || iPixelFormat > qt_valid_pix) { - if (ppfd == NULL) { - PIXELFORMATDESCRIPTOR my_pfd; - if (!wglDescribePixelFormat(hdc, iPixelFormat, sizeof(PIXELFORMATDESCRIPTOR), &my_pfd)) { - SetLastError(0); - return FALSE; - } - } else if (ppfd->nSize != sizeof(PIXELFORMATDESCRIPTOR)) { - SetLastError(0); - return FALSE; - } - } - curPFD = iPixelFormat; - - return TRUE; -} - -GLAPI BOOL GLAPIENTRY -wglSwapBuffers (HDC hdc) -{ - if (!ctx) { - SetLastError(0); - return FALSE; - } - - fxMesaSwapBuffers(); - - return TRUE; -} - -GLAPI BOOL GLAPIENTRY -SetPixelFormat (HDC hdc, int iPixelFormat, const PIXELFORMATDESCRIPTOR *ppfd) -{ - return wglSetPixelFormat(hdc, iPixelFormat, ppfd); -} - -GLAPI BOOL GLAPIENTRY -SwapBuffers(HDC hdc) -{ - return wglSwapBuffers(hdc); -} - -static FIXED -FixedFromDouble (double d) -{ - struct { - FIXED f; - long l; - } pun; - pun.l = (long)(d * 65536L); - return pun.f; -} - -/* -** This was yanked from windows/gdi/wgl.c -*/ -GLAPI BOOL GLAPIENTRY -wglUseFontBitmapsA (HDC hdc, DWORD first, DWORD count, DWORD listBase) -{ - int i; - GLuint font_list; - DWORD size; - GLYPHMETRICS gm; - HANDLE hBits; - LPSTR lpBits; - MAT2 mat; - int success = TRUE; - - font_list = listBase; - - mat.eM11 = FixedFromDouble(1); - mat.eM12 = FixedFromDouble(0); - mat.eM21 = FixedFromDouble(0); - mat.eM22 = FixedFromDouble(-1); - - memset(&gm, 0, sizeof(gm)); - - /* - ** If we can't get the glyph outline, it may be because this is a fixed - ** font. Try processing it that way. - */ - if (GetGlyphOutline(hdc, first, GGO_BITMAP, &gm, 0, NULL, &mat) == GDI_ERROR) { - return wglUseFontBitmaps_FX(hdc, first, count, listBase); - } - - /* - ** Otherwise process all desired characters. - */ - for (i = 0; i < count; i++) { - DWORD err; - - glNewList(font_list + i, GL_COMPILE); - - /* allocate space for the bitmap/outline */ - size = GetGlyphOutline(hdc, first + i, GGO_BITMAP, &gm, 0, NULL, &mat); - if (size == GDI_ERROR) { - glEndList(); - err = GetLastError(); - success = FALSE; - continue; - } - - hBits = GlobalAlloc(GHND, size + 1); - lpBits = GlobalLock(hBits); - - err = GetGlyphOutline(hdc, /* handle to device context */ - first + i, /* character to query */ - GGO_BITMAP, /* format of data to return */ - &gm, /* pointer to structure for metrics */ - size, /* size of buffer for data */ - lpBits, /* pointer to buffer for data */ - &mat /* pointer to transformation */ - /* matrix structure */ - ); - - if (err == GDI_ERROR) { - GlobalUnlock(hBits); - GlobalFree(hBits); - - glEndList(); - err = GetLastError(); - success = FALSE; - continue; - } - - glBitmap(gm.gmBlackBoxX, gm.gmBlackBoxY, - -gm.gmptGlyphOrigin.x, - gm.gmptGlyphOrigin.y, - gm.gmCellIncX, gm.gmCellIncY, - (const GLubyte *)lpBits); - - GlobalUnlock(hBits); - GlobalFree(hBits); - - glEndList(); - } - - return success; -} - -GLAPI BOOL GLAPIENTRY -wglDescribeLayerPlane (HDC hdc, int iPixelFormat, int iLayerPlane, - UINT nBytes, LPLAYERPLANEDESCRIPTOR ppfd) -{ - SetLastError(0); - return FALSE; -} - -GLAPI int GLAPIENTRY -wglGetLayerPaletteEntries (HDC hdc, int iLayerPlane, int iStart, - int cEntries, COLORREF *pcr) -{ - SetLastError(0); - return FALSE; -} - -GLAPI BOOL GLAPIENTRY -wglRealizeLayerPalette (HDC hdc, int iLayerPlane, BOOL bRealize) -{ - SetLastError(0); - return FALSE; -} - -GLAPI int GLAPIENTRY -wglSetLayerPaletteEntries (HDC hdc, int iLayerPlane, int iStart, - int cEntries, CONST COLORREF *pcr) -{ - SetLastError(0); - return FALSE; -} - - -/*************************************************************************** - * [dBorca] simplistic ICD implementation, based on ICD code by Gregor Anich - */ - -typedef struct _icdTable { - DWORD size; - PROC table[336]; -} ICDTABLE, *PICDTABLE; - -#ifdef USE_MGL_NAMESPACE -#define GL_FUNC(func) mgl##func -#else -#define GL_FUNC(func) gl##func -#endif - -static ICDTABLE icdTable = { 336, { -#define ICD_ENTRY(func) (PROC)GL_FUNC(func), -#include "../icd/icdlist.h" -#undef ICD_ENTRY -} }; - - -GLAPI BOOL GLAPIENTRY -DrvCopyContext (HGLRC hglrcSrc, HGLRC hglrcDst, UINT mask) -{ - return wglCopyContext(hglrcSrc, hglrcDst, mask); -} - - -GLAPI HGLRC GLAPIENTRY -DrvCreateContext (HDC hdc) -{ - return wglCreateContext(hdc); -} - - -GLAPI BOOL GLAPIENTRY -DrvDeleteContext (HGLRC hglrc) -{ - return wglDeleteContext(hglrc); -} - - -GLAPI HGLRC GLAPIENTRY -DrvCreateLayerContext (HDC hdc, int iLayerPlane) -{ - return wglCreateContext(hdc); -} - - -GLAPI PICDTABLE GLAPIENTRY -DrvSetContext (HDC hdc, HGLRC hglrc, void *callback) -{ - return wglMakeCurrent(hdc, hglrc) ? &icdTable : NULL; -} - - -GLAPI BOOL GLAPIENTRY -DrvReleaseContext (HGLRC hglrc) -{ - return TRUE; -} - - -GLAPI BOOL GLAPIENTRY -DrvShareLists (HGLRC hglrc1, HGLRC hglrc2) -{ - return wglShareLists(hglrc1, hglrc2); -} - - -GLAPI BOOL GLAPIENTRY -DrvDescribeLayerPlane (HDC hdc, int iPixelFormat, - int iLayerPlane, UINT nBytes, - LPLAYERPLANEDESCRIPTOR plpd) -{ - return wglDescribeLayerPlane(hdc, iPixelFormat, iLayerPlane, nBytes, plpd); -} - - -GLAPI int GLAPIENTRY -DrvSetLayerPaletteEntries (HDC hdc, int iLayerPlane, - int iStart, int cEntries, CONST COLORREF *pcr) -{ - return wglSetLayerPaletteEntries(hdc, iLayerPlane, iStart, cEntries, pcr); -} - - -GLAPI int GLAPIENTRY -DrvGetLayerPaletteEntries (HDC hdc, int iLayerPlane, - int iStart, int cEntries, COLORREF *pcr) -{ - return wglGetLayerPaletteEntries(hdc, iLayerPlane, iStart, cEntries, pcr); -} - - -GLAPI BOOL GLAPIENTRY -DrvRealizeLayerPalette (HDC hdc, int iLayerPlane, BOOL bRealize) -{ - return wglRealizeLayerPalette(hdc, iLayerPlane, bRealize); -} - - -GLAPI BOOL GLAPIENTRY -DrvSwapLayerBuffers (HDC hdc, UINT fuPlanes) -{ - return wglSwapLayerBuffers(hdc, fuPlanes); -} - -GLAPI int GLAPIENTRY -DrvDescribePixelFormat (HDC hdc, int iPixelFormat, UINT nBytes, - LPPIXELFORMATDESCRIPTOR ppfd) -{ - return wglDescribePixelFormat(hdc, iPixelFormat, nBytes, ppfd); -} - - -GLAPI PROC GLAPIENTRY -DrvGetProcAddress (LPCSTR lpszProc) -{ - return wglGetProcAddress(lpszProc); -} - - -GLAPI BOOL GLAPIENTRY -DrvSetPixelFormat (HDC hdc, int iPixelFormat) -{ - return wglSetPixelFormat(hdc, iPixelFormat, NULL); -} - - -GLAPI BOOL GLAPIENTRY -DrvSwapBuffers (HDC hdc) -{ - return wglSwapBuffers(hdc); -} - - -GLAPI BOOL GLAPIENTRY -DrvValidateVersion (DWORD version) -{ - (void)version; - return TRUE; -} - - -#if (_MSC_VER >= 1200) -#pragma warning( pop ) -#endif - -#endif /* FX */ diff --git a/src/mesa/drivers/windows/gdi/InitCritSections.cpp b/src/mesa/drivers/windows/gdi/InitCritSections.cpp deleted file mode 100644 index 69f03b8e47c..00000000000 --- a/src/mesa/drivers/windows/gdi/InitCritSections.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "glapi.h" -#include "glThread.h" - -#ifdef WIN32 - -extern "C" _glthread_Mutex OneTimeLock; -extern "C" _glthread_Mutex GenTexturesLock; - -extern "C" void FreeAllTSD(void); - -class _CriticalSectionInit -{ -public: - static _CriticalSectionInit m_inst; - - _CriticalSectionInit() - { - _glthread_INIT_MUTEX(OneTimeLock); - _glthread_INIT_MUTEX(GenTexturesLock); - } - - ~_CriticalSectionInit() - { - _glthread_DESTROY_MUTEX(OneTimeLock); - _glthread_DESTROY_MUTEX(GenTexturesLock); - FreeAllTSD(); - } -}; - -_CriticalSectionInit _CriticalSectionInit::m_inst; - - -#endif /* WIN32 */ diff --git a/src/mesa/drivers/windows/gdi/wgl.c b/src/mesa/drivers/windows/gdi/wgl.c index 1dafe6e2952..bf4ca9c908f 100644 --- a/src/mesa/drivers/windows/gdi/wgl.c +++ b/src/mesa/drivers/windows/gdi/wgl.c @@ -390,7 +390,7 @@ static FIXED FixedFromDouble(double d) static BOOL wglUseFontBitmaps_FX(HDC fontDevice, DWORD firstChar, DWORD numChars, DWORD listBase) { -#define VERIFY(a) a +#define VERIFY(a) (void)(a) TEXTMETRIC metric; BITMAPINFO *dibInfo; diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c index e3a37eb1ace..35a150d0687 100644 --- a/src/mesa/drivers/windows/gdi/wmesa.c +++ b/src/mesa/drivers/windows/gdi/wmesa.c @@ -5,7 +5,7 @@ #include "wmesadef.h" #include "colors.h" -#include <GL/wmesa.h> +#include "GL/wmesa.h" #include <winuser.h> #include "main/context.h" #include "main/extensions.h" @@ -30,7 +30,7 @@ static WMesaFramebuffer FirstFramebuffer = NULL; * Create a new WMesaFramebuffer object which will correspond to the * given HDC (Window handle). */ -WMesaFramebuffer +static WMesaFramebuffer wmesa_new_framebuffer(HDC hdc, struct gl_config *visual) { WMesaFramebuffer pwfb @@ -48,7 +48,7 @@ wmesa_new_framebuffer(HDC hdc, struct gl_config *visual) /** * Given an hdc, free the corresponding WMesaFramebuffer */ -void +static void wmesa_free_framebuffer(HDC hdc) { WMesaFramebuffer pwfb, prev; @@ -71,7 +71,7 @@ wmesa_free_framebuffer(HDC hdc) /** * Given an hdc, return the corresponding WMesaFramebuffer */ -WMesaFramebuffer +static WMesaFramebuffer wmesa_lookup_framebuffer(HDC hdc) { WMesaFramebuffer pwfb; @@ -147,9 +147,8 @@ static void wmSetPixelFormat(WMesaFramebuffer pwfb, HDC hDC) * We write into this memory with the span routines and then blit it * to the window on a buffer swap. */ -BOOL wmCreateBackingStore(WMesaFramebuffer pwfb, long lxSize, long lySize) +static BOOL wmCreateBackingStore(WMesaFramebuffer pwfb, long lxSize, long lySize) { - HDC hdc = pwfb->hDC; LPBITMAPINFO pbmi = &(pwfb->bmi); HDC hic; @@ -227,7 +226,6 @@ wmesa_get_buffer_size(struct gl_framebuffer *buffer, GLuint *width, GLuint *heig static void wmesa_flush(struct gl_context *ctx) { - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->WinSysDrawBuffer); if (ctx->Visual.doubleBufferMode == 1) { @@ -254,9 +252,7 @@ static void wmesa_flush(struct gl_context *ctx) static void clear_color(struct gl_context *ctx, const GLfloat color[4]) { WMesaContext pwc = wmesa_context(ctx); - WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); GLubyte col[3]; - UINT bytesPerPixel = pwfb->cColorBits / 8; CLAMPED_FLOAT_TO_UBYTE(col[0], color[0]); CLAMPED_FLOAT_TO_UBYTE(col[1], color[1]); @@ -448,21 +444,15 @@ static void clear(struct gl_context *ctx, GLbitfield mask) **/ /* Write a horizontal span of RGBA color pixels with a boolean mask. */ -static void write_rgba_span_front(const struct gl_context *ctx, - struct gl_renderbuffer *rb, - GLuint n, GLint x, GLint y, - const GLubyte rgba[][4], - const GLubyte mask[] ) +static void write_rgba_span_front(struct gl_context *ctx, + struct gl_renderbuffer *rb, + GLuint n, GLint x, GLint y, + const void *values, + const GLubyte *mask) { + const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values; WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_lookup_framebuffer(pwc->hDC); - CONST BITMAPINFO bmi= - { - { - sizeof(BITMAPINFOHEADER), - n, 1, 1, 32, BI_RGB, 0, 1, 1, 0, 0 - } - }; HBITMAP bmp=0; HDC mdc=0; typedef union @@ -535,12 +525,13 @@ static void write_rgba_span_front(const struct gl_context *ctx, } /* Write a horizontal span of RGB color pixels with a boolean mask. */ -static void write_rgb_span_front(const struct gl_context *ctx, - struct gl_renderbuffer *rb, - GLuint n, GLint x, GLint y, - const GLubyte rgb[][3], - const GLubyte mask[] ) +static void write_rgb_span_front(struct gl_context *ctx, + struct gl_renderbuffer *rb, + GLuint n, GLint x, GLint y, + const void *values, + const GLubyte *mask) { + const GLubyte (*rgb)[3] = (const GLubyte (*)[3])values; WMesaContext pwc = wmesa_context(ctx); GLuint i; @@ -564,12 +555,13 @@ static void write_rgb_span_front(const struct gl_context *ctx, * Write a horizontal span of pixels with a boolean mask. The current color * is used for all pixels. */ -static void write_mono_rgba_span_front(const struct gl_context *ctx, - struct gl_renderbuffer *rb, - GLuint n, GLint x, GLint y, - const GLchan color[4], - const GLubyte mask[]) +static void write_mono_rgba_span_front(struct gl_context *ctx, + struct gl_renderbuffer *rb, + GLuint n, GLint x, GLint y, + const void *value, + const GLubyte *mask) { + const GLchan *color = (const GLchan *)value; GLuint i; WMesaContext pwc = wmesa_context(ctx); COLORREF colorref; @@ -589,13 +581,14 @@ static void write_mono_rgba_span_front(const struct gl_context *ctx, } /* Write an array of RGBA pixels with a boolean mask. */ -static void write_rgba_pixels_front(const struct gl_context *ctx, - struct gl_renderbuffer *rb, - GLuint n, - const GLint x[], const GLint y[], - const GLubyte rgba[][4], - const GLubyte mask[] ) +static void write_rgba_pixels_front(struct gl_context *ctx, + struct gl_renderbuffer *rb, + GLuint n, + const GLint x[], const GLint y[], + const void *values, + const GLubyte *mask) { + const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values; GLuint i; WMesaContext pwc = wmesa_context(ctx); (void) ctx; @@ -612,13 +605,14 @@ static void write_rgba_pixels_front(const struct gl_context *ctx, * Write an array of pixels with a boolean mask. The current color * is used for all pixels. */ -static void write_mono_rgba_pixels_front(const struct gl_context *ctx, - struct gl_renderbuffer *rb, - GLuint n, - const GLint x[], const GLint y[], - const GLchan color[4], - const GLubyte mask[] ) +static void write_mono_rgba_pixels_front(struct gl_context *ctx, + struct gl_renderbuffer *rb, + GLuint n, + const GLint x[], const GLint y[], + const void *value, + const GLubyte *mask) { + const GLchan *color = (const GLchan *)value; GLuint i; WMesaContext pwc = wmesa_context(ctx); COLORREF colorref; @@ -630,11 +624,12 @@ static void write_mono_rgba_pixels_front(const struct gl_context *ctx, } /* Read a horizontal span of color pixels. */ -static void read_rgba_span_front(const struct gl_context *ctx, - struct gl_renderbuffer *rb, - GLuint n, GLint x, GLint y, - GLubyte rgba[][4] ) +static void read_rgba_span_front(struct gl_context *ctx, + struct gl_renderbuffer *rb, + GLuint n, GLint x, GLint y, + void *values) { + GLubyte (*rgba)[4] = (GLubyte (*)[4])values; WMesaContext pwc = wmesa_context(ctx); GLuint i; COLORREF Color; @@ -650,11 +645,12 @@ static void read_rgba_span_front(const struct gl_context *ctx, /* Read an array of color pixels. */ -static void read_rgba_pixels_front(const struct gl_context *ctx, - struct gl_renderbuffer *rb, - GLuint n, const GLint x[], const GLint y[], - GLubyte rgba[][4]) +static void read_rgba_pixels_front(struct gl_context *ctx, + struct gl_renderbuffer *rb, + GLuint n, const GLint x[], const GLint y[], + void *values) { + GLubyte (*rgba)[4] = (GLubyte (*)[4])values; WMesaContext pwc = wmesa_context(ctx); GLuint i; COLORREF Color; @@ -679,13 +675,13 @@ LPDWORD lpdw = ((LPDWORD)((pwc)->pbPixels + (pwc)->ScanWidth * (y)) + (x)); \ /* Write a horizontal span of RGBA color pixels with a boolean mask. */ -static void write_rgba_span_32(const struct gl_context *ctx, +static void write_rgba_span_32(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - const GLubyte rgba[][4], - const GLubyte mask[] ) + const void *values, + const GLubyte *mask) { - WMesaContext pwc = wmesa_context(ctx); + const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values; WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); GLuint i; LPDWORD lpdw; @@ -709,13 +705,13 @@ static void write_rgba_span_32(const struct gl_context *ctx, /* Write a horizontal span of RGB color pixels with a boolean mask. */ -static void write_rgb_span_32(const struct gl_context *ctx, +static void write_rgb_span_32(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - const GLubyte rgb[][3], - const GLubyte mask[] ) + const void *values, + const GLubyte *mask) { - WMesaContext pwc = wmesa_context(ctx); + const GLubyte (*rgb)[3] = (const GLubyte (*)[3])values; WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); GLuint i; LPDWORD lpdw; @@ -741,16 +737,16 @@ static void write_rgb_span_32(const struct gl_context *ctx, * Write a horizontal span of pixels with a boolean mask. The current color * is used for all pixels. */ -static void write_mono_rgba_span_32(const struct gl_context *ctx, +static void write_mono_rgba_span_32(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - const GLchan color[4], - const GLubyte mask[]) + const void *value, + const GLubyte *mask) { + const GLchan *color = (const GLchan *)value; LPDWORD lpdw; DWORD pixel; GLuint i; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); lpdw = ((LPDWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x; y=FLIP(y); @@ -767,14 +763,14 @@ static void write_mono_rgba_span_32(const struct gl_context *ctx, } /* Write an array of RGBA pixels with a boolean mask. */ -static void write_rgba_pixels_32(const struct gl_context *ctx, +static void write_rgba_pixels_32(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, const GLint x[], const GLint y[], - const GLubyte rgba[][4], - const GLubyte mask[]) + const void *values, + const GLubyte *mask) { + const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values; GLuint i; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); for (i=0; i<n; i++) if (mask[i]) @@ -786,15 +782,15 @@ static void write_rgba_pixels_32(const struct gl_context *ctx, * Write an array of pixels with a boolean mask. The current color * is used for all pixels. */ -static void write_mono_rgba_pixels_32(const struct gl_context *ctx, +static void write_mono_rgba_pixels_32(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, const GLint x[], const GLint y[], - const GLchan color[4], - const GLubyte mask[]) + const void *value, + const GLubyte *mask) { + const GLchan *color = (const GLchan *)value; GLuint i; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); for (i=0; i<n; i++) if (mask[i]) @@ -803,15 +799,15 @@ static void write_mono_rgba_pixels_32(const struct gl_context *ctx, } /* Read a horizontal span of color pixels. */ -static void read_rgba_span_32(const struct gl_context *ctx, +static void read_rgba_span_32(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - GLubyte rgba[][4] ) + void *values) { + GLubyte (*rgba)[4] = (GLubyte (*)[4])values; GLuint i; DWORD pixel; LPDWORD lpdw; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); y = FLIP(y); @@ -827,15 +823,15 @@ static void read_rgba_span_32(const struct gl_context *ctx, /* Read an array of color pixels. */ -static void read_rgba_pixels_32(const struct gl_context *ctx, +static void read_rgba_pixels_32(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, const GLint x[], const GLint y[], - GLubyte rgba[][4]) + void *values) { + GLubyte (*rgba)[4] = (GLubyte (*)[4])values; GLuint i; DWORD pixel; LPDWORD lpdw; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); for (i=0; i<n; i++) { @@ -861,13 +857,13 @@ lpb[1] = (g); \ lpb[2] = (r); } /* Write a horizontal span of RGBA color pixels with a boolean mask. */ -static void write_rgba_span_24(const struct gl_context *ctx, +static void write_rgba_span_24(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - const GLubyte rgba[][4], - const GLubyte mask[] ) + const void *values, + const GLubyte *mask) { - WMesaContext pwc = wmesa_context(ctx); + const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values; WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); GLuint i; LPBYTE lpb; @@ -895,13 +891,13 @@ static void write_rgba_span_24(const struct gl_context *ctx, /* Write a horizontal span of RGB color pixels with a boolean mask. */ -static void write_rgb_span_24(const struct gl_context *ctx, +static void write_rgb_span_24(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - const GLubyte rgb[][3], - const GLubyte mask[] ) + const void *values, + const GLubyte *mask) { - WMesaContext pwc = wmesa_context(ctx); + const GLubyte (*rgb)[3] = (const GLubyte (*)[3])values; WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); GLuint i; LPBYTE lpb; @@ -931,15 +927,15 @@ static void write_rgb_span_24(const struct gl_context *ctx, * Write a horizontal span of pixels with a boolean mask. The current color * is used for all pixels. */ -static void write_mono_rgba_span_24(const struct gl_context *ctx, +static void write_mono_rgba_span_24(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - const GLchan color[4], - const GLubyte mask[]) + const void *value, + const GLubyte *mask) { + const GLchan *color = (const GLchan *)value; LPBYTE lpb; GLuint i; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); lpb = ((LPBYTE)(pwfb->pbPixels + pwfb->ScanWidth * y)) + (3 * x); y=FLIP(y); @@ -960,14 +956,14 @@ static void write_mono_rgba_span_24(const struct gl_context *ctx, } /* Write an array of RGBA pixels with a boolean mask. */ -static void write_rgba_pixels_24(const struct gl_context *ctx, +static void write_rgba_pixels_24(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, const GLint x[], const GLint y[], - const GLubyte rgba[][4], - const GLubyte mask[]) + const void *values, + const GLubyte *mask) { + const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values; GLuint i; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); for (i=0; i<n; i++) if (mask[i]) @@ -979,15 +975,15 @@ static void write_rgba_pixels_24(const struct gl_context *ctx, * Write an array of pixels with a boolean mask. The current color * is used for all pixels. */ -static void write_mono_rgba_pixels_24(const struct gl_context *ctx, +static void write_mono_rgba_pixels_24(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, const GLint x[], const GLint y[], - const GLchan color[4], - const GLubyte mask[]) + const void *value, + const GLubyte *mask) { + const GLchan *color = (const GLchan *)value; GLuint i; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); for (i=0; i<n; i++) if (mask[i]) @@ -996,14 +992,14 @@ static void write_mono_rgba_pixels_24(const struct gl_context *ctx, } /* Read a horizontal span of color pixels. */ -static void read_rgba_span_24(const struct gl_context *ctx, +static void read_rgba_span_24(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - GLubyte rgba[][4] ) + void *values) { + GLubyte (*rgba)[4] = (GLubyte (*)[4])values; GLuint i; LPBYTE lpb; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); y = FLIP(y); @@ -1018,14 +1014,14 @@ static void read_rgba_span_24(const struct gl_context *ctx, /* Read an array of color pixels. */ -static void read_rgba_pixels_24(const struct gl_context *ctx, +static void read_rgba_pixels_24(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, const GLint x[], const GLint y[], - GLubyte rgba[][4]) + void *values) { + GLubyte (*rgba)[4] = (GLubyte (*)[4])values; GLuint i; LPBYTE lpb; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); for (i=0; i<n; i++) { @@ -1050,13 +1046,13 @@ LPWORD lpw = ((LPWORD)((pwc)->pbPixels + (pwc)->ScanWidth * (y)) + (x)); \ /* Write a horizontal span of RGBA color pixels with a boolean mask. */ -static void write_rgba_span_16(const struct gl_context *ctx, +static void write_rgba_span_16(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - const GLubyte rgba[][4], - const GLubyte mask[] ) + const void *values, + const GLubyte *mask) { - WMesaContext pwc = wmesa_context(ctx); + const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values; WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); GLuint i; LPWORD lpw; @@ -1080,13 +1076,13 @@ static void write_rgba_span_16(const struct gl_context *ctx, /* Write a horizontal span of RGB color pixels with a boolean mask. */ -static void write_rgb_span_16(const struct gl_context *ctx, +static void write_rgb_span_16(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - const GLubyte rgb[][3], - const GLubyte mask[] ) + const void *values, + const GLubyte *mask) { - WMesaContext pwc = wmesa_context(ctx); + const GLubyte (*rgb)[3] = (const GLubyte (*)[3])values; WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); GLuint i; LPWORD lpw; @@ -1112,16 +1108,16 @@ static void write_rgb_span_16(const struct gl_context *ctx, * Write a horizontal span of pixels with a boolean mask. The current color * is used for all pixels. */ -static void write_mono_rgba_span_16(const struct gl_context *ctx, +static void write_mono_rgba_span_16(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - const GLchan color[4], - const GLubyte mask[]) + const void *value, + const GLubyte *mask) { + const GLchan *color = (const GLchan *)value; LPWORD lpw; WORD pixel; GLuint i; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); (void) ctx; lpw = ((LPWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x; @@ -1139,14 +1135,14 @@ static void write_mono_rgba_span_16(const struct gl_context *ctx, } /* Write an array of RGBA pixels with a boolean mask. */ -static void write_rgba_pixels_16(const struct gl_context *ctx, +static void write_rgba_pixels_16(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, const GLint x[], const GLint y[], - const GLubyte rgba[][4], - const GLubyte mask[]) + const void *values, + const GLubyte *mask) { + const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values; GLuint i; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); (void) ctx; for (i=0; i<n; i++) @@ -1159,15 +1155,15 @@ static void write_rgba_pixels_16(const struct gl_context *ctx, * Write an array of pixels with a boolean mask. The current color * is used for all pixels. */ -static void write_mono_rgba_pixels_16(const struct gl_context *ctx, +static void write_mono_rgba_pixels_16(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, const GLint x[], const GLint y[], - const GLchan color[4], - const GLubyte mask[]) + const void *value, + const GLubyte *mask) { + const GLchan *color = (const GLchan *)value; GLuint i; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); (void) ctx; for (i=0; i<n; i++) @@ -1177,14 +1173,14 @@ static void write_mono_rgba_pixels_16(const struct gl_context *ctx, } /* Read a horizontal span of color pixels. */ -static void read_rgba_span_16(const struct gl_context *ctx, +static void read_rgba_span_16(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, GLint x, GLint y, - GLubyte rgba[][4] ) + void *values) { + GLubyte (*rgba)[4] = (GLubyte (*)[4])values; GLuint i, pixel; LPWORD lpw; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); y = FLIP(y); @@ -1201,14 +1197,14 @@ static void read_rgba_span_16(const struct gl_context *ctx, /* Read an array of color pixels. */ -static void read_rgba_pixels_16(const struct gl_context *ctx, +static void read_rgba_pixels_16(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint n, const GLint x[], const GLint y[], - GLubyte rgba[][4]) + void *values) { + GLubyte (*rgba)[4] = (GLubyte (*)[4])values; GLuint i, pixel; LPWORD lpw; - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer); for (i=0; i<n; i++) { @@ -1261,8 +1257,9 @@ wmesa_renderbuffer_storage(struct gl_context *ctx, * Plug in the Get/PutRow/Values functions for a renderbuffer depending * on if we're drawing to the front or back color buffer. */ -void wmesa_set_renderbuffer_funcs(struct gl_renderbuffer *rb, int pixelformat, - int cColorBits, int double_buffer) +static void +wmesa_set_renderbuffer_funcs(struct gl_renderbuffer *rb, int pixelformat, + int cColorBits, int double_buffer) { if (double_buffer) { /* back buffer */ @@ -1324,7 +1321,6 @@ static void wmesa_resize_buffers(struct gl_context *ctx, struct gl_framebuffer *buffer, GLuint width, GLuint height) { - WMesaContext pwc = wmesa_context(ctx); WMesaFramebuffer pwfb = wmesa_framebuffer(buffer); if (pwfb->Base.Width != width || pwfb->Base.Height != height) { @@ -1353,7 +1349,6 @@ static void wmesa_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height) { - WMesaContext pwc = wmesa_context(ctx); GLuint new_width, new_height; wmesa_get_buffer_size(ctx->WinSysDrawBuffer, &new_width, &new_height); @@ -1553,7 +1548,7 @@ void WMesaDestroyContext( WMesaContext pwc ) /** * Create a new color renderbuffer. */ -struct gl_renderbuffer * +static struct gl_renderbuffer * wmesa_new_renderbuffer(void) { struct gl_renderbuffer *rb = CALLOC_STRUCT(gl_renderbuffer); diff --git a/src/mesa/drivers/windows/gdi/wmesadef.h b/src/mesa/drivers/windows/gdi/wmesadef.h index 32289ebc700..9fda8839014 100644 --- a/src/mesa/drivers/windows/gdi/wmesadef.h +++ b/src/mesa/drivers/windows/gdi/wmesadef.h @@ -1,8 +1,8 @@ #ifndef WMESADEF_H #define WMESADEF_H -#ifdef __MINGW32__ + #include <windows.h> -#endif + #include "main/context.h" diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index 3031b7b3273..81f48f9d95a 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -445,11 +445,11 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx, if (swrast->NewState) _swrast_validate_derived( ctx ); - if (unpack->BufferObj->Name) { + if (_mesa_is_bufferobj(unpack->BufferObj)) { /* unpack from PBO */ GLubyte *buf; if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, - format, type, pixels)) { + format, type, INT_MAX, pixels)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels(invalid PBO access)"); return; @@ -507,7 +507,7 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx, XPutImage(dpy, xrb->pixmap, gc, &ximage, 0, 0, dstX, dstY, w, h); } - if (unpack->BufferObj->Name) { + if (_mesa_is_bufferobj(unpack->BufferObj)) { ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, unpack->BufferObj); } @@ -580,11 +580,11 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx, if (swrast->NewState) _swrast_validate_derived( ctx ); - if (unpack->BufferObj->Name) { + if (_mesa_is_bufferobj(unpack->BufferObj)) { /* unpack from PBO */ GLubyte *buf; if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, - format, type, pixels)) { + format, type, INT_MAX, pixels)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels(invalid PBO access)"); return; |