diff options
author | Christian König <[email protected]> | 2011-07-04 15:04:41 +0200 |
---|---|---|
committer | Christian König <[email protected]> | 2011-07-04 15:04:41 +0200 |
commit | c3b2230b71cb3a00a7f4c0987197d397bada650b (patch) | |
tree | 018f5df0f8b5976ddb56ef4f13e9466587838998 /src/mesa | |
parent | 003401f95c9b59471c22368b7da16fe7a951e490 (diff) | |
parent | 424b1210d951c206e7c2fb8f2778acbd384eb247 (diff) |
Merge remote-tracking branch 'origin/master' into pipe-video
Conflicts:
configure.ac
src/gallium/drivers/r600/r600_state_inlines.h
src/gallium/tests/trivial/Makefile
src/gallium/winsys/g3dvl/dri/XF86dri.c
src/gallium/winsys/g3dvl/dri/driclient.c
src/gallium/winsys/g3dvl/dri/driclient.h
src/gallium/winsys/g3dvl/dri/xf86dri.h
src/gallium/winsys/g3dvl/dri/xf86dristr.h
src/gallium/winsys/r600/drm/r600_bo.c
Diffstat (limited to 'src/mesa')
119 files changed, 2583 insertions, 1566 deletions
diff --git a/src/mesa/SConscript b/src/mesa/SConscript index fdb4d5a5814..24e2155c387 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -121,7 +121,6 @@ main_sources = [ 'main/texobj.c', 'main/texpal.c', 'main/texparam.c', - 'main/texrender.c', 'main/texstate.c', 'main/texstore.c', 'main/texturebarrier.c', @@ -174,6 +173,7 @@ swrast_sources = [ 'swrast/s_stencil.c', 'swrast/s_texcombine.c', 'swrast/s_texfilter.c', + 'swrast/s_texrender.c', 'swrast/s_triangle.c', 'swrast/s_zoom.c', ] diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 0dbc7c3e853..8ab129dd73d 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -40,7 +40,6 @@ #include "main/texstore.h" #include "main/bufferobj.h" #include "main/fbobject.h" -#include "main/texrender.h" #include "main/samplerobj.h" #include "main/syncobj.h" #include "main/texturebarrier.h" @@ -183,8 +182,8 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->NewFramebuffer = _mesa_new_framebuffer; driver->NewRenderbuffer = _mesa_new_soft_renderbuffer; - driver->RenderTexture = _mesa_render_texture; - driver->FinishRenderTexture = _mesa_finish_render_texture; + driver->RenderTexture = _swrast_render_texture; + driver->FinishRenderTexture = _swrast_finish_render_texture; driver->FramebufferRenderbuffer = _mesa_framebuffer_renderbuffer; driver->ValidateFramebuffer = _mesa_validate_framebuffer; diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 979926a7e8f..0e58aeca3f5 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -2691,12 +2691,26 @@ copy_tex_image(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, GLenum format, type; GLint bpp; void *buf; + struct gl_renderbuffer *read_rb = ctx->ReadBuffer->_ColorReadBuffer; texObj = _mesa_get_current_tex_object(ctx, target); texImage = _mesa_get_tex_image(ctx, texObj, target, level); /* Choose format/type for temporary image buffer */ format = _mesa_base_tex_format(ctx, internalFormat); + + if (format == GL_LUMINANCE && + _mesa_get_format_base_format(read_rb->Format) != GL_LUMINANCE) { + /* The glReadPixels() path will convert RGB to luminance by + * summing R+G+B. glCopyTexImage() is supposed to behave as + * glCopyPixels, which doesn't do that change, and instead + * leaves it up to glTexImage which converts RGB to luminance by + * just taking the R channel. To avoid glReadPixels() trashing + * our data, use RGBA for our temporary image. + */ + format = GL_RGBA; + } + type = get_temp_image_type(ctx, format); bpp = _mesa_bytes_per_pixel(format, type); if (bpp <= 0) { diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index 3298dbb69f5..d4af5e51026 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -40,7 +40,7 @@ static GLuint -translate_texture_format(GLuint mesa_format, GLuint internal_format) +translate_texture_format(GLuint mesa_format) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -156,8 +156,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) */ i830->state.tex_offset[unit] = dst_x * intelObj->mt->cpp + dst_y * pitch; - format = translate_texture_format(firstImage->TexFormat, - firstImage->InternalFormat); + format = translate_texture_format(firstImage->TexFormat); state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | (LOAD_TEXTURE_MAP0 << unit) | 4); diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 19f08077599..584df82b50c 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -31,8 +31,10 @@ #include "intel_regions.h" #include "intel_tris.h" #include "intel_fbo.h" +#include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" +#include "swrast_setup/swrast_setup.h" #define FILE_DEBUG_FLAG DEBUG_STATE @@ -609,6 +611,8 @@ i830_set_draw_region(struct intel_context *intel, struct gl_context *ctx = &intel->ctx; struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct gl_renderbuffer *drb; + struct intel_renderbuffer *idrb = NULL; GLuint value; struct i830_hw_state *state = &i830->state; uint32_t draw_x, draw_y; @@ -649,6 +653,13 @@ i830_set_draw_region(struct intel_context *intel, } state->Buffer[I830_DESTREG_DV1] = value; + drb = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; + if (!drb) + drb = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer; + + if (drb) + idrb = intel_renderbuffer(drb); + /* We set up the drawing rectangle to be offset into the color * region's location in the miptree. If it doesn't match with * depth's offsets, we can't render to it. @@ -660,16 +671,15 @@ i830_set_draw_region(struct intel_context *intel, * can't do in general due to tiling) */ FALLBACK(intel, I830_FALLBACK_DRAW_OFFSET, - (depth_region && color_regions[0]) && - (depth_region->draw_x != color_regions[0]->draw_x || - depth_region->draw_y != color_regions[0]->draw_y)); - - if (color_regions[0]) { - draw_x = color_regions[0]->draw_x; - draw_y = color_regions[0]->draw_y; - } else if (depth_region) { - draw_x = depth_region->draw_x; - draw_y = depth_region->draw_y; + idrb && irb && (idrb->draw_x != irb->draw_x || + idrb->draw_y != irb->draw_y)); + + if (irb) { + draw_x = irb->draw_x; + draw_y = irb->draw_y; + } else if (idrb) { + draw_x = idrb->draw_x; + draw_y = idrb->draw_y; } else { draw_x = 0; draw_y = 0; @@ -707,6 +717,12 @@ i830_assert_not_dirty( struct intel_context *intel ) static void i830_invalidate_state(struct intel_context *intel, GLuint new_state) { + struct gl_context *ctx = &intel->ctx; + + _swsetup_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + _tnl_invalidate_vertex_state(ctx, new_state); + if (new_state & _NEW_LIGHT) i830_update_provoking_vertex(&intel->ctx); } diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index 5aa2ea18048..bcf42d59969 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -39,8 +39,7 @@ static GLuint -translate_texture_format(gl_format mesa_format, GLuint internal_format, - GLenum DepthMode) +translate_texture_format(gl_format mesa_format, GLenum DepthMode) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -165,7 +164,6 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) i915->state.tex_offset[unit] = 0; /* Always the origin of the miptree */ format = translate_texture_format(firstImage->TexFormat, - firstImage->InternalFormat, sampler->DepthMode); pitch = intelObj->mt->region->pitch * intelObj->mt->cpp; diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 820feba04ba..9721a1c0e4d 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -33,8 +33,10 @@ #include "main/macros.h" #include "main/colormac.h" +#include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" +#include "swrast_setup/swrast_setup.h" #include "intel_batchbuffer.h" #include "intel_regions.h" @@ -561,6 +563,8 @@ i915_set_draw_region(struct intel_context *intel, struct gl_context *ctx = &intel->ctx; struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct gl_renderbuffer *drb; + struct intel_renderbuffer *idrb = NULL; GLuint value; struct i915_hw_state *state = &i915->state; uint32_t draw_x, draw_y, draw_offset; @@ -609,6 +613,13 @@ i915_set_draw_region(struct intel_context *intel, } state->Buffer[I915_DESTREG_DV1] = value; + drb = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; + if (!drb) + drb = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer; + + if (drb) + idrb = intel_renderbuffer(drb); + /* We set up the drawing rectangle to be offset into the color * region's location in the miptree. If it doesn't match with * depth's offsets, we can't render to it. @@ -620,16 +631,15 @@ i915_set_draw_region(struct intel_context *intel, * can't do in general due to tiling) */ FALLBACK(intel, I915_FALLBACK_DRAW_OFFSET, - (depth_region && color_regions[0]) && - (depth_region->draw_x != color_regions[0]->draw_x || - depth_region->draw_y != color_regions[0]->draw_y)); - - if (color_regions[0]) { - draw_x = color_regions[0]->draw_x; - draw_y = color_regions[0]->draw_y; - } else if (depth_region) { - draw_x = depth_region->draw_x; - draw_y = depth_region->draw_y; + idrb && irb && (idrb->draw_x != irb->draw_x || + idrb->draw_y != irb->draw_y)); + + if (irb) { + draw_x = irb->draw_x; + draw_y = irb->draw_y; + } else if (idrb) { + draw_x = idrb->draw_x; + draw_y = idrb->draw_y; } else { draw_x = 0; draw_y = 0; @@ -695,6 +705,16 @@ i915_is_hiz_depth_format(struct intel_context *intel, return false; } +static void +i915_invalidate_state(struct intel_context *intel, GLuint new_state) +{ + struct gl_context *ctx = &intel->ctx; + + _swsetup_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + _tnl_invalidate_vertex_state(ctx, new_state); +} + void i915InitVtbl(struct i915_context *i915) { @@ -709,6 +729,7 @@ i915InitVtbl(struct i915_context *i915) i915->intel.vtbl.update_texture_state = i915UpdateTextureState; i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty; i915->intel.vtbl.finish_batch = intel_finish_vb; + i915->intel.vtbl.invalidate_state = i915_invalidate_state; i915->intel.vtbl.render_target_supported = i915_render_target_supported; i915->intel.vtbl.is_hiz_depth_format = i915_is_hiz_depth_format; } diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index c7d428ba48d..d82206bae52 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -146,15 +146,12 @@ static void compile_clip_prog( struct brw_context *brw, printf("\n"); } - /* Upload - */ - drm_intel_bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_upload_cache(&brw->cache, - BRW_CLIP_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), - &brw->clip.prog_data); + brw_upload_cache(&brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->clip.prog_offset, &brw->clip.prog_data); ralloc_free(mem_ctx); } @@ -271,12 +268,11 @@ static void upload_clip_prog(struct brw_context *brw) } } - drm_intel_bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, - &key, sizeof(key), - &brw->clip.prog_data); - if (brw->clip.prog_bo == NULL) + if (!brw_search_cache(&brw->cache, BRW_CLIP_PROG, + &key, sizeof(key), + &brw->clip.prog_offset, &brw->clip.prog_data)) { compile_clip_prog( brw, &key ); + } } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 6015c8cbe9f..b9efbb74c87 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -43,11 +43,15 @@ brw_prepare_clip_unit(struct brw_context *brw) clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); memset(clip, 0, sizeof(*clip)); - /* CACHE_NEW_CLIP_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */ clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1); - /* reloc */ - clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; + clip->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->clip.state_offset + + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_offset + + (clip->thread0.grf_reg_count << 1)) >> 6; clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; clip->thread1.single_program_flow = 1; @@ -110,14 +114,6 @@ brw_prepare_clip_unit(struct brw_context *brw) clip->viewport_ymin = -1; clip->viewport_ymax = 1; - /* Emit clip program relocation */ - assert(brw->clip.prog_bo); - drm_intel_bo_emit_reloc(intel->batch.bo, - (brw->clip.state_offset + - offsetof(struct brw_clip_unit_state, thread0)), - brw->clip.prog_bo, clip->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); - brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT; } @@ -125,6 +121,7 @@ const struct brw_tracked_state brw_clip_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index d6a99ab06e2..636821839a1 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -240,6 +240,8 @@ GLboolean brwCreateContext( int api, brw->emit_state_always = 0; + intel->batch.need_workaround_flush = true; + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 621b6f8990b..a8e2b802803 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -142,7 +142,9 @@ enum brw_state_id { BRW_STATE_NR_VS_SURFACES, BRW_STATE_INDEX_BUFFER, BRW_STATE_VS_CONSTBUF, - BRW_STATE_WM_CONSTBUF + BRW_STATE_WM_CONSTBUF, + BRW_STATE_PROGRAM_CACHE, + BRW_STATE_STATE_BASE_ADDRESS, }; #define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE) @@ -172,6 +174,8 @@ enum brw_state_id { #define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER) #define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) #define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF) +#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) +#define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -363,9 +367,11 @@ struct brw_cache_item { /** 32-bit hash of the key data */ GLuint hash; GLuint key_size; /* for variable-sized keys */ + GLuint aux_size; const void *key; - drm_intel_bo *bo; + uint32_t offset; + uint32_t size; struct brw_cache_item *next; }; @@ -376,14 +382,11 @@ struct brw_cache { struct brw_context *brw; struct brw_cache_item **items; + drm_intel_bo *bo; GLuint size, n_items; - char *name[BRW_MAX_CACHE]; - - /* Record of the last BOs chosen for each cache_id. Used to set - * brw->state.dirty.cache when a new cache item is chosen. - */ - drm_intel_bo *last_bo[BRW_MAX_CACHE]; + uint32_t next_offset; + bool bo_used_by_gpu; }; @@ -634,8 +637,9 @@ struct brw_context struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ - drm_intel_bo *prog_bo; drm_intel_bo *const_bo; + /** Offset in the program cache to the VS program */ + uint32_t prog_offset; uint32_t state_offset; /** Binding table of pointers to surf_bo entries */ @@ -651,14 +655,16 @@ struct brw_context struct brw_gs_prog_data *prog_data; GLboolean prog_active; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; uint32_t state_offset; - drm_intel_bo *prog_bo; } gs; struct { struct brw_clip_prog_data *prog_data; - drm_intel_bo *prog_bo; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; /* Offset in the batch to the CLIP state on pre-gen6. */ uint32_t state_offset; @@ -673,7 +679,8 @@ struct brw_context struct { struct brw_sf_prog_data *prog_data; - drm_intel_bo *prog_bo; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; uint32_t state_offset; uint32_t vp_offset; } sf; @@ -700,12 +707,14 @@ struct brw_context GLuint sampler_count; uint32_t sampler_offset; + /** Offset in the program cache to the WM program */ + uint32_t prog_offset; + /** Binding table of pointers to surf_bo entries */ uint32_t bind_bo_offset; uint32_t surf_offset[BRW_WM_MAX_SURF]; uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */ - drm_intel_bo *prog_bo; drm_intel_bo *const_bo; /* pull constant buffer. */ /** * This is offset in the batch to the push constants on gen6. @@ -717,9 +726,6 @@ struct brw_context struct { - /* gen4 */ - drm_intel_bo *prog_bo; - uint32_t state_offset; uint32_t blend_state_offset; uint32_t depth_stencil_state_offset; @@ -874,6 +880,26 @@ brw_register_blocks(int reg_count) return ALIGN(reg_count, 16) / 16 - 1; } +static inline uint32_t +brw_program_reloc(struct brw_context *brw, uint32_t state_offset, + uint32_t prog_offset) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 5) { + /* Using state base address. */ + return prog_offset; + } + + drm_intel_bo_emit_reloc(intel->batch.bo, + state_offset, + brw->cache.bo, + prog_offset, + I915_GEM_DOMAIN_INSTRUCTION, 0); + + return brw->cache.bo->offset + prog_offset; +} + GLboolean brw_do_cubemap_normalize(struct exec_list *instructions); #endif diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 6144f0a2bce..bdb5b672899 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -177,6 +177,8 @@ static void brw_emit_prim(struct brw_context *brw, OUT_BATCH(base_vertex_location); ADVANCE_BATCH(); + intel->batch.need_workaround_flush = true; + if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(intel); } @@ -434,6 +436,7 @@ void brw_draw_prims( struct gl_context *ctx, */ if (!retval) { _swsetup_Wakeup(ctx); + _tnl_wakeup(ctx); _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); } diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 3cc33720486..32a1d297479 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -207,6 +207,10 @@ static GLuint get_surface_type( GLenum type, GLuint size, case GL_UNSIGNED_INT: return uint_types_scale[size]; case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; + /* This produces GL_FIXED inputs as values between INT32_MIN and + * INT32_MAX, which will be scaled down by 1/65536 by the VS. + */ + case GL_FIXED: return int_types_scale[size]; default: assert(0); return 0; } } @@ -225,6 +229,7 @@ static GLuint get_size( GLenum type ) case GL_UNSIGNED_INT: return sizeof(GLuint); case GL_UNSIGNED_SHORT: return sizeof(GLushort); case GL_UNSIGNED_BYTE: return sizeof(GLubyte); + case GL_FIXED: return sizeof(GLuint); default: assert(0); return 0; } } @@ -273,6 +278,7 @@ static void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); + /* CACHE_NEW_VS_PROG */ GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0, total_size = 0; @@ -494,6 +500,8 @@ static void brw_prepare_vertices(struct brw_context *brw) break; d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset; + if (d < 0) + break; if (i == 0) delta = d / brw->vb.current_buffers[i].stride; if (delta * brw->vb.current_buffers[i].stride != d) @@ -641,7 +649,7 @@ const struct brw_tracked_state brw_vertices = { .dirty = { .mesa = 0, .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES, - .cache = 0, + .cache = CACHE_NEW_VS_PROG, }, .prepare = brw_prepare_vertices, .emit = brw_emit_vertices, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 09033aecd7c..b5ea943387d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -100,7 +100,7 @@ fs_visitor::fail(const char *format, ...) this->fail_msg = msg; if (INTEL_DEBUG & DEBUG_WM) { - fprintf(stderr, msg); + fprintf(stderr, "%s", msg); } } @@ -1533,6 +1533,8 @@ fs_visitor::run() this->result = reg_undef; ir->accept(this); } + if (failed) + return false; emit_fb_writes(); @@ -1684,6 +1686,9 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) key.clamp_fragment_color = true; for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) { + if (fp->Base.ShadowSamplers & (1 << i)) + key.compare_funcs[i] = GL_LESS; + /* FINISHME: depth compares might use (0,0,0,W) for example */ key.tex_swizzles[i] = SWIZZLE_XYZW; } @@ -1697,14 +1702,12 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) key.program_string_id = bfp->id; - drm_intel_bo *old_prog_bo = brw->wm.prog_bo; + uint32_t old_prog_offset = brw->wm.prog_offset; struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data; - brw->wm.prog_bo = NULL; bool success = do_wm_prog(brw, prog, bfp, &key); - drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = old_prog_bo; + brw->wm.prog_offset = old_prog_offset; brw->wm.prog_data = old_prog_data; return success; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 7570dda1024..2bf850e5dea 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -441,6 +441,8 @@ public: void visit(ir_function *ir); void visit(ir_function_signature *ir); + void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler); + fs_inst *emit(fs_inst inst); fs_inst *emit(int opcode) diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 7f3f52854d2..46677a6f2ef 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -191,6 +191,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_log: case ir_unop_exp2: case ir_unop_log2: + case ir_unop_i2u: + case ir_unop_u2i: case ir_unop_f2i: case ir_unop_i2f: case ir_unop_f2b: diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 6b7c434949c..1d89b8f1d11 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -273,7 +273,8 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) } break; case FS_OPCODE_TXD: - assert(!"TXD isn't supported on gen5+ yet."); + /* There is no sample_d_c message; comparisons are done manually */ + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; break; } } else { @@ -311,7 +312,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) } break; case FS_OPCODE_TXD: - assert(!"TXD isn't supported on gen4 yet."); + /* There is no sample_d_c message; comparisons are done manually */ + assert(inst->mlen == 7 || inst->mlen == 10); + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; break; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index f88b1316775..b4689d2c293 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -101,7 +101,6 @@ fs_visitor::assign_regs() * for reg_width == 2. */ int reg_width = c->dispatch_width / 8; - int last_grf = 0; int hw_reg_mapping[this->virtual_grf_next + 1]; int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; @@ -263,6 +262,7 @@ fs_visitor::assign_regs() * regs in the register classes back down to real hardware reg * numbers. */ + this->grf_used = first_assigned_grf; hw_reg_mapping[0] = 0; /* unused */ for (int i = 1; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); @@ -278,8 +278,9 @@ fs_visitor::assign_regs() assert(hw_reg >= 0); hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width; - last_grf = MAX2(last_grf, - hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); + this->grf_used = MAX2(this->grf_used, + hw_reg_mapping[i] + this->virtual_grf_sizes[i] * + reg_width); } foreach_iter(exec_list_iterator, iter, this->instructions) { @@ -290,8 +291,6 @@ fs_visitor::assign_regs() assign_reg(hw_reg_mapping, &inst->src[1], reg_width); } - this->grf_used = last_grf + reg_width; - ralloc_free(g); ralloc_free(regs); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index b4857871c78..9091014976b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -349,6 +349,14 @@ fs_visitor::visit(ir_expression *ir) emit_math(FS_OPCODE_RSQ, this->result, op[0]); break; + case ir_unop_i2u: + op[0].type = BRW_REGISTER_TYPE_UD; + this->result = op[0]; + break; + case ir_unop_u2i: + op[0].type = BRW_REGISTER_TYPE_D; + this->result = op[0]; + break; case ir_unop_i2f: case ir_unop_b2f: case ir_unop_b2i: @@ -549,7 +557,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* g0 header. */ mlen = 1; - if (ir->shadow_comparitor) { + if (ir->shadow_comparitor && ir->op != ir_txd) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); @@ -595,7 +603,42 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; } else if (ir->op == ir_txd) { - assert(!"TXD isn't supported on gen4 yet."); + ir->lod_info.grad.dPdx->accept(this); + fs_reg dPdx = this->result; + + ir->lod_info.grad.dPdy->accept(this); + fs_reg dPdy = this->result; + + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); + coordinate.reg_offset++; + } + /* the slots for u and v are always present, but r is optional */ + mlen += MAX2(ir->coordinate->type->vector_elements, 2); + + /* P = u, v, r + * dPdx = dudx, dvdx, drdx + * dPdy = dudy, dvdy, drdy + * + * 2-arg: dudx dvdx dudy dvdy + * dPdx.x dPdx.y dPdy.x dPdy.y + * m4 m5 m6 m7 + * + * 3-arg: dudx dvdx drdx dudy dvdy drdy + * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z + * m5 m6 m7 m8 m9 m10 + */ + for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); + dPdx.reg_offset++; + mlen++; + } + + for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); + dPdy.reg_offset++; + mlen++; + } } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod * instructions. We'll need to do SIMD16 here. @@ -709,7 +752,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, } mlen += ir->coordinate->type->vector_elements * reg_width; - if (ir->shadow_comparitor) { + if (ir->shadow_comparitor && ir->op != ir_txd) { mlen = MAX2(mlen, header_present + 4 * reg_width); this->result = reg_undef; @@ -742,7 +785,37 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst = emit(FS_OPCODE_TXL, dst); break; - case ir_txd: + case ir_txd: { + ir->lod_info.grad.dPdx->accept(this); + fs_reg dPdx = this->result; + + ir->lod_info.grad.dPdy->accept(this); + fs_reg dPdy = this->result; + + mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */ + + /** + * P = u, v, r + * dPdx = dudx, dvdx, drdx + * dPdy = dudy, dvdy, drdy + * + * Load up these values: + * - dudx dudy dvdx dvdy drdx drdy + * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z + */ + for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); + dPdx.reg_offset++; + mlen += reg_width; + + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); + dPdy.reg_offset++; + mlen += reg_width; + } + + inst = emit(FS_OPCODE_TXD, dst); + break; + } case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -776,7 +849,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, base_mrf--; } - if (ir->shadow_comparitor) { + if (ir->shadow_comparitor && ir->op != ir_txd) { ir->shadow_comparitor->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -796,20 +869,52 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; break; - case ir_txd: + case ir_txd: { + if (c->dispatch_width == 16) + fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); + + ir->lod_info.grad.dPdx->accept(this); + fs_reg dPdx = this->result; + + ir->lod_info.grad.dPdy->accept(this); + fs_reg dPdy = this->result; + + /* Load dPdx and the coordinate together: + * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z + */ + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; + coordinate.reg_offset++; + mlen += reg_width; + + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); + dPdx.reg_offset++; + mlen += reg_width; + + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); + dPdy.reg_offset++; + mlen += reg_width; + } + break; + } case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; } - /* Set up the coordinate */ - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), - coordinate); - if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) - inst->saturate = true; - coordinate.reg_offset++; - mlen += reg_width; + /* Set up the coordinate (except for TXD where it was done earlier) */ + if (ir->op != ir_txd) { + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; + coordinate.reg_offset++; + mlen += reg_width; + } } /* Generate the SEND */ @@ -835,9 +940,24 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, void fs_visitor::visit(ir_texture *ir) { - int sampler; fs_inst *inst = NULL; + int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &fp->Base); + sampler = fp->Base.SamplerUnits[sampler]; + + /* Our hardware doesn't have a sample_d_c message, so shadow compares + * for textureGrad/TXD need to be emulated with instructions. + */ + bool hw_compare_supported = ir->op != ir_txd; + if (ir->shadow_comparitor && !hw_compare_supported) { + assert(c->key.compare_funcs[sampler] != GL_NONE); + /* No need to even sample for GL_ALWAYS or GL_NEVER...bail early */ + if (c->key.compare_funcs[sampler] == GL_ALWAYS) + return swizzle_result(ir, fs_reg(1.0f), sampler); + else if (c->key.compare_funcs[sampler] == GL_NEVER) + return swizzle_result(ir, fs_reg(0.0f), sampler); + } + this->result = reg_undef; ir->coordinate->accept(this); fs_reg coordinate = this->result; @@ -876,11 +996,6 @@ fs_visitor::visit(ir_texture *ir) /* Should be lowered by do_lower_texture_projection */ assert(!ir->projector); - sampler = _mesa_get_sampler_uniform_value(ir->sampler, - prog, - &fp->Base); - sampler = fp->Base.SamplerUnits[sampler]; - /* The 965 requires the EU to do the normalization of GL rectangle * texture coordinates. We use the program parameter state * tracking to get the scaling factor. @@ -951,20 +1066,69 @@ fs_visitor::visit(ir_texture *ir) inst->sampler = sampler; - this->result = dst; + if (ir->shadow_comparitor) { + if (hw_compare_supported) { + inst->shadow_compare = true; + } else { + ir->shadow_comparitor->accept(this); + fs_reg ref = this->result; + + fs_reg value = dst; + dst = fs_reg(this, glsl_type::vec4_type); + + /* FINISHME: This needs to be done pre-filtering. */ + + uint32_t conditional = 0; + switch (c->key.compare_funcs[sampler]) { + /* GL_ALWAYS and GL_NEVER were handled at the top of the function */ + case GL_LESS: conditional = BRW_CONDITIONAL_L; break; + case GL_GREATER: conditional = BRW_CONDITIONAL_G; break; + case GL_LEQUAL: conditional = BRW_CONDITIONAL_LE; break; + case GL_GEQUAL: conditional = BRW_CONDITIONAL_GE; break; + case GL_EQUAL: conditional = BRW_CONDITIONAL_EQ; break; + case GL_NOTEQUAL: conditional = BRW_CONDITIONAL_NEQ; break; + default: assert(!"Should not get here: bad shadow compare function"); + } + + /* Use conditional moves to load 0 or 1 as the result */ + this->current_annotation = "manual shadow comparison"; + for (int i = 0; i < 4; i++) { + inst = emit(BRW_OPCODE_MOV, dst, fs_reg(0.0f)); + + inst = emit(BRW_OPCODE_CMP, reg_null_f, ref, value); + inst->conditional_mod = conditional; + + inst = emit(BRW_OPCODE_MOV, dst, fs_reg(1.0f)); + inst->predicated = true; - if (ir->shadow_comparitor) - inst->shadow_compare = true; + dst.reg_offset++; + value.reg_offset++; + } + dst.reg_offset = 0; + } + } + + swizzle_result(ir, dst, sampler); +} + +/** + * Swizzle the result of a texture result. This is necessary for + * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. + */ +void +fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) +{ + this->result = orig_val; if (ir->type == glsl_type::float_type) { /* Ignore DEPTH_TEXTURE_MODE swizzling. */ assert(ir->sampler->type->sampler_shadow); - } else if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { - fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); + } else if (c->key.tex_swizzles[sampler] != SWIZZLE_NOOP) { + fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type); for (int i = 0; i < 4; i++) { - int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); - fs_reg l = swizzle_dst; + int swiz = GET_SWZ(c->key.tex_swizzles[sampler], i); + fs_reg l = swizzled_result; l.reg_offset += i; if (swiz == SWIZZLE_ZERO) { @@ -972,12 +1136,12 @@ fs_visitor::visit(ir_texture *ir) } else if (swiz == SWIZZLE_ONE) { emit(BRW_OPCODE_MOV, l, fs_reg(1.0f)); } else { - fs_reg r = dst; - r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); + fs_reg r = orig_val; + r.reg_offset += GET_SWZ(c->key.tex_swizzles[sampler], i); emit(BRW_OPCODE_MOV, l, r); } } - this->result = swizzle_dst; + this->result = swizzled_result; } } @@ -1466,7 +1630,7 @@ fs_visitor::emit_dummy_fs() fs_inst *write; write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); - write->base_mrf = 0; + write->base_mrf = 2; } /* The register location here is relative to the start of the URB @@ -1627,7 +1791,7 @@ fs_visitor::emit_fb_writes() { this->current_annotation = "FB write header"; GLboolean header_present = GL_TRUE; - int nr = 0; + int nr = 2; int reg_width = c->dispatch_width / 8; if (intel->gen >= 6 && @@ -1637,7 +1801,7 @@ fs_visitor::emit_fb_writes() } if (header_present) { - /* m0, m1 header */ + /* m2, m3 header */ nr += 2; } @@ -1706,7 +1870,7 @@ fs_visitor::emit_fb_writes() fs_inst *inst = emit(FS_OPCODE_FB_WRITE); inst->target = target; - inst->base_mrf = 0; + inst->base_mrf = 2; inst->mlen = nr; if (target == c->key.nr_color_regions - 1) inst->eot = true; @@ -1724,7 +1888,7 @@ fs_visitor::emit_fb_writes() } fs_inst *inst = emit(FS_OPCODE_FB_WRITE); - inst->base_mrf = 0; + inst->base_mrf = 2; inst->mlen = nr; inst->eot = true; inst->header_present = header_present; diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 001cd62f8ca..3171e97d7af 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -121,14 +121,11 @@ static void compile_gs_prog( struct brw_context *brw, printf("\n"); } - /* Upload - */ - drm_intel_bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = brw_upload_cache(&brw->cache, BRW_GS_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), - &brw->gs.prog_data); + brw_upload_cache(&brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->gs.prog_offset, &brw->gs.prog_data); ralloc_free(mem_ctx); } @@ -189,15 +186,12 @@ static void prepare_gs_prog(struct brw_context *brw) brw->gs.prog_active = key.need_gs_prog; } - drm_intel_bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = NULL; - if (brw->gs.prog_active) { - brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, - &key, sizeof(key), - &brw->gs.prog_data); - if (brw->gs.prog_bo == NULL) + if (!brw_search_cache(&brw->cache, BRW_GS_PROG, + &key, sizeof(key), + &brw->gs.prog_offset, &brw->gs.prog_data)) { compile_gs_prog( brw, &key ); + } } } diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 542874b7706..bbfefcd816a 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -45,12 +45,17 @@ brw_prepare_gs_unit(struct brw_context *brw) memset(gs, 0, sizeof(*gs)); - /* CACHE_NEW_GS_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_GS_PROG */ if (brw->gs.prog_active) { gs->thread0.grf_reg_count = (ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1); - /* reloc */ - gs->thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; + + gs->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->gs.state_offset + + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_offset + + (gs->thread0.grf_reg_count << 1)) >> 6; gs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; gs->thread1.single_program_flow = 1; @@ -69,13 +74,6 @@ brw_prepare_gs_unit(struct brw_context *brw) gs->thread4.max_threads = 1; else gs->thread4.max_threads = 0; - - /* Emit GS program relocation */ - drm_intel_bo_emit_reloc(intel->batch.bo, - (brw->gs.state_offset + - offsetof(struct brw_gs_unit_state, thread0)), - brw->gs.prog_bo, gs->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); } if (intel->gen == 5) @@ -91,6 +89,7 @@ const struct brw_tracked_state brw_gs_unit = { .dirty = { .mesa = 0, .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_GS_PROG diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index a6de28b3add..033c77cd321 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -87,10 +87,11 @@ static void upload_binding_table_pointers(struct brw_context *brw) const struct brw_tracked_state brw_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH - | BRW_NEW_VS_BINDING_TABLE - | BRW_NEW_GS_BINDING_TABLE - | BRW_NEW_PS_BINDING_TABLE, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS | + BRW_NEW_VS_BINDING_TABLE | + BRW_NEW_GS_BINDING_TABLE | + BRW_NEW_PS_BINDING_TABLE), .cache = 0, }, .emit = upload_binding_table_pointers, @@ -122,10 +123,11 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw) const struct brw_tracked_state gen6_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH - | BRW_NEW_VS_BINDING_TABLE - | BRW_NEW_GS_BINDING_TABLE - | BRW_NEW_PS_BINDING_TABLE, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS | + BRW_NEW_VS_BINDING_TABLE | + BRW_NEW_GS_BINDING_TABLE | + BRW_NEW_PS_BINDING_TABLE), .cache = 0, }, .emit = upload_gen6_binding_table_pointers, @@ -180,7 +182,9 @@ static void upload_psp_urb_cbs(struct brw_context *brw ) const struct brw_tracked_state brw_psp_urb_cbs = { .dirty = { .mesa = 0, - .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH, + .brw = (BRW_NEW_URB_FENCE | + BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS), .cache = (CACHE_NEW_VS_UNIT | CACHE_NEW_GS_UNIT | CACHE_NEW_GS_PROG | @@ -219,20 +223,20 @@ static void emit_depthbuffer(struct brw_context *brw) struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL; unsigned int len; - /* - * If depth and stencil buffers are identical, then don't use separate - * stencil. + /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both + * non-pipelined state that will need the PIPE_CONTROL workaround. */ - if (depth_irb && depth_irb == stencil_irb) { - stencil_irb = NULL; - } + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); /* - * If stencil buffer uses combined depth/stencil format, but no depth buffer - * is attached, then use stencil buffer as depth buffer. + * If either depth or stencil buffer has packed depth/stencil format, + * then don't use separate stencil. Emit only a depth buffer. */ - if (!depth_irb && stencil_irb - && stencil_irb->Base.Format == MESA_FORMAT_S8_Z24) { + if (depth_irb && depth_irb->Base.Format == MESA_FORMAT_S8_Z24) { + stencil_irb = NULL; + } else if (!depth_irb && stencil_irb + && stencil_irb->Base.Format == MESA_FORMAT_S8_Z24) { depth_irb = stencil_irb; stencil_irb = NULL; } @@ -328,7 +332,7 @@ static void emit_depthbuffer(struct brw_context *brw) return; } - offset = intel_region_tile_offsets(region, &tile_x, &tile_y); + offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y); assert(intel->gen < 6 || region->tiling == I915_TILING_Y); assert(!hiz_region || region->tiling == I915_TILING_Y); @@ -361,26 +365,48 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } - /* Emit hiz buffer. */ if (hiz_region || stencil_irb) { - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); - OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); - OUT_RELOC(hiz_region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - ADVANCE_BATCH(); - } + /* + * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate + * stencil enable' and 'hiz enable' bits were set. Therefore we must + * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if + * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted; + * failure to do so causes hangs on gen5 and a stall on gen6. + */ - /* Emit stencil buffer. */ - if (hiz_region || stencil_irb) { - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); - OUT_BATCH(stencil_irb->region->pitch * stencil_irb->region->cpp - 1); - OUT_RELOC(stencil_irb->region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - ADVANCE_BATCH(); + /* Emit hiz buffer. */ + if (hiz_region) { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); + OUT_RELOC(hiz_region->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* Emit stencil buffer. */ + if (stencil_irb) { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(stencil_irb->region->pitch * stencil_irb->region->cpp - 1); + OUT_RELOC(stencil_irb->region->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } } /* @@ -392,6 +418,9 @@ static void emit_depthbuffer(struct brw_context *brw) * when HiZ is enabled and the DEPTH_BUFFER_STATE changes. */ if (intel->gen >= 6 || hiz_region) { + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); OUT_BATCH(0); @@ -424,6 +453,9 @@ static void upload_polygon_stipple(struct brw_context *brw) if (!ctx->Polygon.StippleFlag) return; + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(33); OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2)); @@ -467,6 +499,9 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) if (!ctx->Polygon.StippleFlag) return; + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2)); @@ -507,6 +542,9 @@ static void upload_aa_line_parameters(struct brw_context *brw) if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters) return; + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2)); /* use legacy aa line coverage computation */ OUT_BATCH(0); @@ -537,6 +575,9 @@ static void upload_line_stipple(struct brw_context *brw) if (!ctx->Line.StippleFlag) return; + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2)); OUT_BATCH(ctx->Line.StipplePattern); @@ -564,6 +605,10 @@ static void upload_invarient_state( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; + /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */ + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + { /* 0x61040000 Pipeline Select */ /* PipelineSelect : 0 */ @@ -627,6 +672,7 @@ static void upload_invarient_state( struct brw_context *brw ) sip.header.length = 0; sip.bits0.pad = 0; sip.bits0.system_instruction_pointer = 0; + BRW_BATCH_STRUCT(brw, &sip); } @@ -666,7 +712,19 @@ static void upload_state_base_address( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; + /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of + * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be + * programmed prior to STATE_BASE_ADDRESS. + * + * However, given that the instruction SBA (general state base + * address) on this chipset is always set to 0 across X and GL, + * maybe this isn't required for us in particular. + */ + if (intel->gen >= 6) { + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); /* General state base address: stateless DP read/write requests */ @@ -690,7 +748,9 @@ static void upload_state_base_address( struct brw_context *brw ) I915_GEM_DOMAIN_INSTRUCTION), 0, 1); OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ - OUT_BATCH(1); /* Instruction base address: shader kernels (incl. SIP) */ + OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); /* Instruction base address: shader kernels (incl. SIP) */ + OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Dynamic state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ @@ -703,7 +763,8 @@ static void upload_state_base_address( struct brw_context *brw ) OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ - OUT_BATCH(1); /* Instruction base address */ + OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); /* Instruction base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ OUT_BATCH(1); /* Instruction access upper bound */ @@ -719,12 +780,37 @@ static void upload_state_base_address( struct brw_context *brw ) OUT_BATCH(1); /* Indirect object upper bound */ ADVANCE_BATCH(); } + + /* According to section 3.6.1 of VOL1 of the 965 PRM, + * STATE_BASE_ADDRESS updates require a reissue of: + * + * 3DSTATE_PIPELINE_POINTERS + * 3DSTATE_BINDING_TABLE_POINTERS + * MEDIA_STATE_POINTERS + * + * and this continues through Ironlake. The Sandy Bridge PRM, vol + * 1 part 1 says that the folowing packets must be reissued: + * + * 3DSTATE_CC_POINTERS + * 3DSTATE_BINDING_TABLE_POINTERS + * 3DSTATE_SAMPLER_STATE_POINTERS + * 3DSTATE_VIEWPORT_STATE_POINTERS + * MEDIA_STATE_POINTERS + * + * Those are always reissued following SBA updates anyway (new + * batch time), except in the case of the program cache BO + * changing. Having a separate state flag makes the sequence more + * obvious. + */ + + brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS; } const struct brw_tracked_state brw_state_base_address = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, + .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE), .cache = 0, }, .emit = upload_state_base_address diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index c2227777cfb..fca30a74aaf 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -120,14 +120,11 @@ static void compile_sf_prog( struct brw_context *brw, printf("\n"); } - /* Upload - */ - drm_intel_bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_upload_cache(&brw->cache, BRW_SF_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), - &brw->sf.prog_data); + brw_upload_cache(&brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->sf.prog_offset, &brw->sf.prog_data); ralloc_free(mem_ctx); } @@ -191,12 +188,11 @@ static void upload_sf_prog(struct brw_context *brw) key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0); } - drm_intel_bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, - &key, sizeof(key), - &brw->sf.prog_data); - if (brw->sf.prog_bo == NULL) + if (!brw_search_cache(&brw->cache, BRW_SF_PROG, + &key, sizeof(key), + &brw->sf.prog_offset, &brw->sf.prog_data)) { compile_sf_prog( brw, &key ); + } } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 78b22c4df3d..eb3d103099b 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -133,9 +133,14 @@ static void upload_sf_unit( struct brw_context *brw ) memset(sf, 0, sizeof(*sf)); - /* CACHE_NEW_SF_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_SF_PROG */ sf->thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1; - sf->thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ + sf->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->sf.state_offset + + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_offset + + (sf->thread0.grf_reg_count << 1)) >> 6; sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -282,11 +287,6 @@ static void upload_sf_unit( struct brw_context *brw ) /* STATE_PREFETCH command description describes this state as being * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. */ - /* Emit SF program relocation */ - drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset + - offsetof(struct brw_sf_unit_state, thread0)), - brw->sf.prog_bo, sf->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); /* Emit SF viewport relocation */ drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset + @@ -308,6 +308,7 @@ const struct brw_tracked_state brw_sf_unit = { _NEW_SCISSOR | _NEW_BUFFERS), .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_URB_FENCE), .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 544ef7d47e6..b384651d8d0 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -145,21 +145,21 @@ void brw_clear_validated_bos(struct brw_context *brw); * brw_state_cache.c */ -drm_intel_bo *brw_upload_cache(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_sz, - const void *data, - GLuint data_sz, - const void *aux, - GLuint aux_sz, - void *aux_return); - -drm_intel_bo *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - void *aux_return); +void brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + const void *data, + GLuint data_sz, + const void *aux, + GLuint aux_sz, + uint32_t *out_offset, void *out_aux); + +bool brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + uint32_t *inout_offset, void *out_aux); void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index f13a41fa7cc..3988625ea91 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -45,6 +45,7 @@ */ #include "main/imports.h" +#include "intel_batchbuffer.h" #include "brw_state.h" #define FILE_DEBUG_FLAG DEBUG_STATE @@ -67,23 +68,6 @@ hash_key(struct brw_cache_item *item) return hash; } - -/** - * Marks a new buffer as being chosen for the given cache id. - */ -static void -update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, - drm_intel_bo *bo) -{ - if (bo == cache->last_bo[cache_id]) - return; /* no change */ - - drm_intel_bo_unreference(cache->last_bo[cache_id]); - cache->last_bo[cache_id] = bo; - drm_intel_bo_reference(cache->last_bo[cache_id]); - cache->brw->state.dirty.cache |= 1 << cache_id; -} - static int brw_cache_item_equals(const struct brw_cache_item *a, const struct brw_cache_item *b) @@ -145,12 +129,13 @@ rehash(struct brw_cache *cache) /** * Returns the buffer object matching cache_id and key, or NULL. */ -drm_intel_bo * +bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - void *aux_return) + uint32_t *inout_offset, void *out_aux) { + struct brw_context *brw = cache->brw; struct brw_cache_item *item; struct brw_cache_item lookup; GLuint hash; @@ -164,19 +149,116 @@ brw_search_cache(struct brw_cache *cache, item = search_cache(cache, hash, &lookup); if (item == NULL) - return NULL; + return false; - if (aux_return) - *(void **)aux_return = (void *)((char *)item->key + item->key_size); + *(void **)out_aux = ((char *)item->key + item->key_size); - update_cache_last(cache, cache_id, item->bo); + if (item->offset != *inout_offset) { + brw->state.dirty.cache |= (1 << cache_id); + *inout_offset = item->offset; + } - drm_intel_bo_reference(item->bo); - return item->bo; + return true; } +static void +brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) +{ + struct brw_context *brw = cache->brw; + struct intel_context *intel = &brw->intel; + drm_intel_bo *new_bo; + + new_bo = drm_intel_bo_alloc(intel->bufmgr, "program cache", new_size, 64); + + /* Copy any existing data that needs to be saved. */ + if (cache->next_offset != 0) { + drm_intel_bo_map(cache->bo, false); + drm_intel_bo_subdata(new_bo, 0, cache->next_offset, cache->bo->virtual); + drm_intel_bo_unmap(cache->bo); + } + + drm_intel_bo_unreference(cache->bo); + cache->bo = new_bo; + cache->bo_used_by_gpu = false; + + /* Since we have a new BO in place, we need to signal the units + * that depend on it (state base address on gen5+, or unit state before). + */ + brw->state.dirty.brw |= BRW_NEW_PROGRAM_CACHE; +} + +/** + * Attempts to find an item in the cache with identical data and aux + * data to use + */ +static bool +brw_try_upload_using_copy(struct brw_cache *cache, + struct brw_cache_item *result_item, + const void *data, + const void *aux) +{ + int i; + struct brw_cache_item *item; + + for (i = 0; i < cache->size; i++) { + for (item = cache->items[i]; item; item = item->next) { + const void *item_aux = item->key + item->key_size; + int ret; + + if (item->cache_id != result_item->cache_id || + item->size != result_item->size || + item->aux_size != result_item->aux_size) { + continue; + } + + if (memcmp(item_aux, aux, item->aux_size) != 0) { + continue; + } + + drm_intel_bo_map(cache->bo, false); + ret = memcmp(cache->bo->virtual + item->offset, data, item->size); + drm_intel_bo_unmap(cache->bo); + if (ret) + continue; + + result_item->offset = item->offset; + + return true; + } + } + + return false; +} + +static void +brw_upload_item_data(struct brw_cache *cache, + struct brw_cache_item *item, + const void *data) +{ + /* Allocate space in the cache BO for our new program. */ + if (cache->next_offset + item->size > cache->bo->size) { + uint32_t new_size = cache->bo->size * 2; + + while (cache->next_offset + item->size > new_size) + new_size *= 2; + + brw_cache_new_bo(cache, new_size); + } + + /* If we would block on writing to an in-use program BO, just + * recreate it. + */ + if (cache->bo_used_by_gpu) { + brw_cache_new_bo(cache, cache->bo->size); + } + + item->offset = cache->next_offset; + + /* Programs are always 64-byte aligned, so set up the next one now */ + cache->next_offset = ALIGN(item->offset + item->size, 64); +} -drm_intel_bo * +void brw_upload_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, @@ -185,23 +267,31 @@ brw_upload_cache(struct brw_cache *cache, GLuint data_size, const void *aux, GLuint aux_size, - void *aux_return) + uint32_t *out_offset, + void *out_aux) { struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); GLuint hash; void *tmp; - drm_intel_bo *bo; item->cache_id = cache_id; + item->size = data_size; item->key = key; item->key_size = key_size; + item->aux_size = aux_size; hash = hash_key(item); item->hash = hash; - /* Create the buffer object to contain the data */ - bo = drm_intel_bo_alloc(cache->brw->intel.bufmgr, - cache->name[cache_id], data_size, 1 << 6); - + /* If we can find a matching prog/prog_data combo in the cache + * already, then reuse the existing stuff. This will mean not + * flagging CACHE_NEW_* when transitioning between the two + * equivalent hash keys. This is notably useful for programs + * generating shaders at runtime, where multiple shaders may + * compile to the thing in our backend. + */ + if (!brw_try_upload_using_copy(cache, item, data, aux)) { + brw_upload_item_data(cache, item, data); + } /* Set up the memory containing the key and aux_data */ tmp = malloc(key_size + aux_size); @@ -211,9 +301,6 @@ brw_upload_cache(struct brw_cache *cache, item->key = tmp; - item->bo = bo; - drm_intel_bo_reference(bo); - if (cache->n_items > cache->size * 1.5) rehash(cache); @@ -222,34 +309,18 @@ brw_upload_cache(struct brw_cache *cache, cache->items[hash] = item; cache->n_items++; - if (aux_return) { - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - } - - DBG("upload %s: %d bytes to cache id %d\n", - cache->name[cache_id], - data_size, cache_id); - /* Copy data to the buffer */ - drm_intel_bo_subdata(bo, 0, data_size, data); - - update_cache_last(cache, cache_id, bo); + drm_intel_bo_subdata(cache->bo, item->offset, data_size, data); - return bo; -} - -static void -brw_init_cache_id(struct brw_cache *cache, - const char *name, - enum brw_cache_id id) -{ - cache->name[id] = strdup(name); + *out_offset = item->offset; + *(void **)out_aux = (void *)((char *)item->key + item->key_size); + cache->brw->state.dirty.cache |= 1 << cache_id; } - void brw_init_caches(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct brw_cache *cache = &brw->cache; cache->brw = brw; @@ -259,36 +330,15 @@ brw_init_caches(struct brw_context *brw) cache->items = (struct brw_cache_item **) calloc(1, cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(cache, "CC_VP", BRW_CC_VP); - brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT); - brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG); - brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER); - brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT); - brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG); - brw_init_cache_id(cache, "SF_VP", BRW_SF_VP); - - brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT); - - brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT); - - brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG); - - brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT); - - brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG); - brw_init_cache_id(cache, "CLIP_VP", BRW_CLIP_VP); - - brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT); - - brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG); - brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE); - brw_init_cache_id(cache, "COLOR_CALC_STATE", BRW_COLOR_CALC_STATE); - brw_init_cache_id(cache, "DEPTH_STENCIL_STATE", BRW_DEPTH_STENCIL_STATE); + cache->bo = drm_intel_bo_alloc(intel->bufmgr, + "program cache", + 4096, 64); } static void brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { + struct intel_context *intel = &brw->intel; struct brw_cache_item *c, *next; GLuint i; @@ -297,7 +347,6 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { next = c->next; - drm_intel_bo_unreference(c->bo); free((void *)c->key); free(c); } @@ -306,9 +355,18 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) cache->n_items = 0; + /* Start putting programs into the start of the BO again, since + * we'll never find the old results. + */ + cache->next_offset = 0; + + /* We need to make sure that the programs get regenerated, since + * any offsets leftover in brw_context will no longer be valid. + */ brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; + intel_batchbuffer_flush(intel); } void @@ -325,15 +383,10 @@ brw_state_cache_check_size(struct brw_context *brw) static void brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { - GLuint i; DBG("%s\n", __FUNCTION__); brw_clear_cache(brw, cache); - for (i = 0; i < BRW_MAX_CACHE; i++) { - drm_intel_bo_unreference(cache->last_bo[i]); - free(cache->name[i]); - } free(cache->items); cache->items = NULL; cache->size = 0; diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index ff06cb3a91e..7a3a88f04f5 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -459,21 +459,19 @@ static void dump_blend_state(struct brw_context *brw) } -static void brw_debug_prog(const char *name, drm_intel_bo *prog) +static void brw_debug_prog(struct brw_context *brw, + const char *name, uint32_t prog_offset) { unsigned int i; uint32_t *data; - if (prog == NULL) - return; - - drm_intel_bo_map(prog, GL_FALSE); + drm_intel_bo_map(brw->cache.bo, false); - data = prog->virtual; + data = brw->cache.bo->virtual + prog_offset; - for (i = 0; i < prog->size / 4 / 4; i++) { + for (i = 0; i < brw->cache.bo->size / 4 / 4; i++) { fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", - name, (unsigned int)prog->offset + i * 4 * 4, + name, (unsigned int)brw->cache.bo->offset + i * 4 * 4, data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); /* Stop at the end of the program. It'd be nice to keep track of the actual * intended program size instead of guessing like this. @@ -485,7 +483,7 @@ static void brw_debug_prog(const char *name, drm_intel_bo *prog) break; } - drm_intel_bo_unmap(prog); + drm_intel_bo_unmap(brw->cache.bo); } @@ -518,17 +516,19 @@ void brw_debug_batch(struct intel_context *intel) if (intel->gen < 6) state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, sizeof(struct brw_vs_unit_state)); - brw_debug_prog("VS prog", brw->vs.prog_bo); + brw_debug_prog(brw, "VS prog", brw->vs.prog_offset); if (intel->gen < 6) state_struct_out("GS", intel->batch.bo, brw->gs.state_offset, sizeof(struct brw_gs_unit_state)); - brw_debug_prog("GS prog", brw->gs.prog_bo); + if (brw->gs.prog_active) { + brw_debug_prog(brw, "GS prog", brw->gs.prog_offset); + } if (intel->gen < 6) { state_struct_out("SF", intel->batch.bo, brw->sf.state_offset, sizeof(struct brw_sf_unit_state)); - brw_debug_prog("SF prog", brw->sf.prog_bo); + brw_debug_prog(brw, "SF prog", brw->sf.prog_offset); } if (intel->gen >= 7) dump_sf_clip_viewport_state(brw); @@ -540,7 +540,7 @@ void brw_debug_batch(struct intel_context *intel) if (intel->gen < 6) state_struct_out("WM", intel->batch.bo, brw->wm.state_offset, sizeof(struct brw_wm_unit_state)); - brw_debug_prog("WM prog", brw->wm.prog_bo); + brw_debug_prog(brw, "WM prog", brw->wm.prog_offset); if (intel->gen >= 6) { dump_cc_viewport_state(brw); diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 6a4c112dcf5..76ffa0daefe 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -47,11 +47,11 @@ static const struct brw_tracked_state *gen4_atoms[] = &brw_check_fallback, &brw_wm_input_sizes, - &brw_vs_prog, - &brw_gs_prog, - &brw_clip_prog, - &brw_sf_prog, - &brw_wm_prog, + &brw_vs_prog, /* must do before GS prog, state base address. */ + &brw_gs_prog, /* must do before state base address */ + &brw_clip_prog, /* must do before state base address */ + &brw_sf_prog, /* must do before state base address */ + &brw_wm_prog, /* must do before state base address */ /* Once all the programs are done, we know how large urb entry * sizes need to be and can decide if we need to change the urb @@ -110,9 +110,9 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_check_fallback, &brw_wm_input_sizes, - &brw_vs_prog, - &brw_gs_prog, - &brw_wm_prog, + &brw_vs_prog, /* must do before state base address */ + &brw_gs_prog, /* must do before state base address */ + &brw_wm_prog, /* must do before state base address */ &gen6_clip_vp, &gen6_sf_vp, @@ -365,6 +365,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_PRIMITIVE), DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), + DEFINE_BIT(BRW_NEW_PROGRAM_CACHE), DEFINE_BIT(BRW_NEW_PSP), DEFINE_BIT(BRW_NEW_WM_SURFACES), DEFINE_BIT(BRW_NEW_INDICES), @@ -378,6 +379,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE), + DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 4a3a2bfada2..f462f32b19a 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -88,7 +88,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, GLuint align_w = 4; mt->total_height = 0; - intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + intel_get_texture_alignment_unit(mt->format, &align_w, &align_h); if (mt->compressed) { mt->total_width = ALIGN(width, align_w); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index d6a53995531..a9ad5311fe3 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -105,12 +105,11 @@ static void do_vs_prog( struct brw_context *brw, /* constant_map */ aux_size += c.vp->program.Base.Parameters->NumParameters; - drm_intel_bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_upload_cache(&brw->cache, BRW_VS_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, aux_size, - &brw->vs.prog_data); + brw_upload_cache(&brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, aux_size, + &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); } @@ -145,14 +144,19 @@ static void brw_upload_vs_prog(struct brw_context *brw) } } - /* Make an early check for the key. - */ - drm_intel_bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG, - &key, sizeof(key), - &brw->vs.prog_data); - if (brw->vs.prog_bo == NULL) + /* BRW_NEW_VERTICES */ + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (vp->program.Base.InputsRead & (1 << i) && + brw->vb.inputs[i].glarray->Type == GL_FIXED) { + key.gl_fixed_input_size[i] = brw->vb.inputs[i].glarray->Size; + } + } + + if (!brw_search_cache(&brw->cache, BRW_VS_PROG, + &key, sizeof(key), + &brw->vs.prog_offset, &brw->vs.prog_data)) { do_vs_prog(brw, vp, &key); + } brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + sizeof(*brw->vs.prog_data)); } @@ -164,7 +168,8 @@ const struct brw_tracked_state brw_vs_prog = { .dirty = { .mesa = (_NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT | _NEW_BUFFERS), - .brw = BRW_NEW_VERTEX_PROGRAM, + .brw = (BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_VERTICES), .cache = 0 }, .prepare = brw_upload_vs_prog diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 7ca84a54b01..432994a8534 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -41,6 +41,10 @@ struct brw_vs_prog_key { GLuint program_string_id; + /** + * Number of channels of the vertex attribute that need GL_FIXED rescaling + */ + uint8_t gl_fixed_input_size[VERT_ATTRIB_MAX]; GLuint nr_userclip:4; GLuint copy_edgeflag:1; GLuint point_coord_replace:8; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 7d5eb353eee..9d733344a26 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1635,7 +1635,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) else m = brw_message_reg(4); - brw_DP4(p, brw_writemask(m, (1 << (i & 7))),pos, c->userplane[i]); + brw_DP4(p, brw_writemask(m, (1 << (i & 3))),pos, c->userplane[i]); } } } else if ((c->prog_data.outputs_written & @@ -1878,6 +1878,26 @@ get_predicate(const struct prog_instruction *inst) } } +static void +brw_vs_rescale_gl_fixed(struct brw_vs_compile *c) +{ + struct brw_compile *p = &c->func; + int i; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (!(c->prog_data.inputs_read & (1 << i))) + continue; + + if (c->key.gl_fixed_input_size[i] != 0) { + struct brw_reg reg = c->regs[PROGRAM_INPUT][i]; + + brw_MUL(p, + brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1), + reg, brw_imm_f(1.0 / 65536.0)); + } + } +} + /* Emit the vertex program instructions here. */ void brw_vs_emit(struct brw_vs_compile *c ) @@ -1937,6 +1957,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) */ brw_vs_alloc_regs(c); + brw_vs_rescale_gl_fixed(c); + if (c->needs_stack) brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 1eee5b7e5de..d5010a21e80 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -36,18 +36,6 @@ #include "brw_defines.h" #include "main/macros.h" -struct brw_vs_unit_key { - unsigned int total_grf; - unsigned int urb_entry_read_length; - unsigned int curb_entry_read_length; - - unsigned int curbe_offset; - - unsigned int nr_urb_entries, urb_size; - - unsigned int nr_surfaces; -}; - static void brw_prepare_vs_unit(struct brw_context *brw) { @@ -58,8 +46,14 @@ brw_prepare_vs_unit(struct brw_context *brw) vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset); memset(vs, 0, sizeof(*vs)); - /* CACHE_NEW_VS_PROG */ - vs->thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */ + vs->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->vs.state_offset + + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_offset + + (vs->thread0.grf_reg_count << 1)) >> 6; + vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, @@ -152,13 +146,6 @@ brw_prepare_vs_unit(struct brw_context *brw) */ vs->vs6.vs_enable = 1; - /* Emit VS program relocation */ - drm_intel_bo_emit_reloc(intel->batch.bo, (brw->vs.state_offset + - offsetof(struct brw_vs_unit_state, - thread0)), - brw->vs.prog_bo, vs->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); - brw->state.dirty.cache |= CACHE_NEW_VS_UNIT; } @@ -166,6 +153,7 @@ const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 69650e1df77..8612e743265 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -69,14 +69,8 @@ static void brw_destroy_context( struct intel_context *intel ) ralloc_free(brw->wm.compile_data); dri_bo_release(&brw->curbe.curbe_bo); - dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.const_bo); - dri_bo_release(&brw->gs.prog_bo); - dri_bo_release(&brw->clip.prog_bo); - dri_bo_release(&brw->sf.prog_bo); - dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.const_bo); - dri_bo_release(&brw->cc.prog_bo); free(brw->curbe.last_buf); free(brw->curbe.next_buf); @@ -122,13 +116,20 @@ static void brw_new_batch( struct intel_context *intel ) * This is probably not as severe as on 915, since almost all of our state * is just in referenced buffers. */ - brw->state.dirty.brw |= BRW_NEW_CONTEXT; + brw->state.dirty.brw |= BRW_NEW_CONTEXT | BRW_NEW_BATCH; - brw->state.dirty.mesa |= ~0; - brw->state.dirty.brw |= ~0; - brw->state.dirty.cache |= ~0; + /* Assume that the last command before the start of our batch was a + * primitive, for safety. + */ + intel->batch.need_workaround_flush = true; brw->vb.nr_current_buffers = 0; + + /* Mark that the current program cache BO has been used by the GPU. + * It will be reallocated if we need to put new programs in for the + * next batch. + */ + brw->cache.bo_used_by_gpu = true; } static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 1aebd12df49..b0dfdd536aa 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -273,12 +273,11 @@ bool do_wm_prog(struct brw_context *brw, */ program = brw_get_program(&c->func, &program_size); - drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache(&brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - program, program_size, - &c->prog_data, sizeof(c->prog_data), - &brw->wm.prog_data); + brw_upload_cache(&brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + program, program_size, + &c->prog_data, sizeof(c->prog_data), + &brw->wm.prog_offset, &brw->wm.prog_data); return true; } @@ -389,6 +388,8 @@ static void brw_wm_populate_key( struct brw_context *brw, * all 4 channels. */ if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { + key->compare_funcs[i] = sampler->CompareFunc; + if (sampler->DepthMode == GL_ALPHA) { swizzles[0] = SWIZZLE_ZERO; swizzles[1] = SWIZZLE_ZERO; @@ -477,13 +478,9 @@ static void brw_prepare_wm_prog(struct brw_context *brw) brw_wm_populate_key(brw, &key); - /* Make an early check for the key. - */ - drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, - &key, sizeof(key), - &brw->wm.prog_data); - if (brw->wm.prog_bo == NULL) { + if (!brw_search_cache(&brw->cache, BRW_WM_PROG, + &key, sizeof(key), + &brw->wm.prog_offset, &brw->wm.prog_data)) { bool success = do_wm_prog(brw, ctx->Shader.CurrentFragmentProgram, fp, &key); assert(success); diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index e244b55a083..29082c19088 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -68,6 +68,18 @@ struct brw_wm_prog_key { GLuint clamp_fragment_color:1; GLuint line_aa:2; + /** + * Per-sampler comparison functions: + * + * If comparison mode is GL_COMPARE_R_TO_TEXTURE, then this is set to one + * of GL_NEVER, GL_LESS, GL_EQUAL, GL_LEQUAL, GL_GREATER, GL_NOTEQUAL, + * GL_GEQUAL, or GL_ALWAYS. Otherwise (comparison mode is GL_NONE), this + * field is irrelevant so it's left as GL_NONE (0). + * + * While this is a GLenum, all possible values fit in 16-bits. + */ + uint16_t compare_funcs[BRW_MAX_TEX_UNIT]; + GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index ef98f8126dc..506e2bdff5b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -90,13 +90,25 @@ brw_prepare_wm_unit(struct brw_context *brw) brw->wm.prog_data->first_curbe_grf_16); } - /* CACHE_NEW_WM_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_WM_PROG */ wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks; wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16; - wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ - /* reloc */ - wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset + - brw->wm.prog_data->prog_offset_16) >> 6; + + wm->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_offset + + (wm->thread0.grf_reg_count << 1)) >> 6; + + wm->wm9.kernel_start_pointer_2 = + brw_program_reloc(brw, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, wm9), + brw->wm.prog_offset + + brw->wm.prog_data->prog_offset_16 + + (wm->wm9.grf_reg_count_2 << 1)) >> 6; + wm->thread1.depth_coef_urb_read_offset = 1; wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -214,23 +226,6 @@ brw_prepare_wm_unit(struct brw_context *brw) if (unlikely(INTEL_DEBUG & DEBUG_STATS) || intel->stats_wm) wm->wm4.stats_enable = 1; - /* Emit WM program relocation */ - drm_intel_bo_emit_reloc(intel->batch.bo, - brw->wm.state_offset + - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo, wm->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); - - if (brw->wm.prog_data->prog_offset_16) { - drm_intel_bo_emit_reloc(intel->batch.bo, - brw->wm.state_offset + - offsetof(struct brw_wm_unit_state, wm9), - brw->wm.prog_bo, - ((wm->wm9.grf_reg_count_2 << 1) + - brw->wm.prog_data->prog_offset_16), - I915_GEM_DOMAIN_INSTRUCTION, 0); - } - /* Emit scratch space relocation */ if (brw->wm.prog_data->total_scratch != 0) { drm_intel_bo_emit_reloc(intel->batch.bo, @@ -265,6 +260,7 @@ const struct brw_tracked_state brw_wm_unit = { _NEW_BUFFERS), .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_WM_SURFACES), diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index f560bc3fa66..89fea9cc952 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -73,7 +73,7 @@ translate_tex_target(GLenum target) uint32_t brw_format_for_mesa_format(gl_format mesa_format) { - uint32_t table[MESA_FORMAT_COUNT] = + static const uint32_t table[MESA_FORMAT_COUNT] = { [MESA_FORMAT_L8] = BRW_SURFACEFORMAT_L8_UNORM, [MESA_FORMAT_I8] = BRW_SURFACEFORMAT_I8_UNORM, @@ -477,7 +477,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, format << BRW_SURFACE_FORMAT_SHIFT); /* reloc */ - surf[1] = (intel_region_tile_offsets(region, &tile_x, &tile_y) + + surf[1] = (intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y) + region->buffer->offset); surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 2b16d6cdc01..294d5a5e644 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -183,7 +183,8 @@ static void upload_cc_state_pointers(struct brw_context *brw) const struct brw_tracked_state gen6_cc_state_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS), .cache = (CACHE_NEW_BLEND_STATE | CACHE_NEW_COLOR_CALC_STATE | CACHE_NEW_DEPTH_STENCIL_STATE) diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c index c1d0a739394..d29f0290727 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -45,7 +45,7 @@ upload_gs_state(struct brw_context *brw) ADVANCE_BATCH(); // GS should never be used on Gen6. Disable it. - assert(brw->gs.prog_bo == NULL); + assert(!brw->gs.prog_active); BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); /* prog_bo */ @@ -65,8 +65,7 @@ upload_gs_state(struct brw_context *brw) const struct brw_tracked_state gen6_gs_state = { .dirty = { .mesa = _NEW_TRANSFORM, - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_URB_FENCE | + .brw = (BRW_NEW_URB_FENCE | BRW_NEW_CONTEXT), .cache = CACHE_NEW_GS_PROG }, diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c index 4cdec699df6..89326872faa 100644 --- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c @@ -50,7 +50,8 @@ upload_sampler_state_pointers(struct brw_context *brw) const struct brw_tracked_state gen6_sampler_state = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS), .cache = CACHE_NEW_SAMPLER }, .emit = upload_sampler_state_pointers, diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index fad3ca0dd04..7492e508864 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -31,7 +31,7 @@ #include "intel_batchbuffer.h" static void -gen6_prepare_scissor_state(struct brw_context *brw) +gen6_upload_scissor_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; @@ -89,5 +89,5 @@ const struct brw_tracked_state gen6_scissor_state = { .brw = BRW_NEW_BATCH, .cache = 0, }, - .prepare = gen6_prepare_scissor_state, + .emit = gen6_upload_scissor_state, }; diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 84028e4e758..5bb731dc8fd 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -100,10 +100,11 @@ upload_sf_state(struct brw_context *brw) int i; /* _NEW_BUFFER */ GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; - int attr = 0; + int attr = 0, input_index = 0; int urb_start; int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); float point_size; + uint16_t attr_overrides[FRAG_ATTRIB_MAX]; /* _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) @@ -230,19 +231,43 @@ upload_sf_state(struct brw_context *brw) (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT); } - if (ctx->Point.PointSprite) { - for (i = 0; i < 8; i++) { - if (ctx->Point.CoordReplace[i]) - dw16 |= (1 << i); - } - } - /* flat shading */ if (ctx->Light.ShadeModel == GL_FLAT) { dw17 |= ((brw->fragment_program->Base.InputsRead & (FRAG_BIT_COL0 | FRAG_BIT_COL1)) >> ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1)); } + /* Create the mapping from the FS inputs we produce to the VS outputs + * they source from. + */ + for (; attr < FRAG_ATTRIB_MAX; attr++) { + if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr))) + continue; + + /* _NEW_POINT */ + if (ctx->Point.PointSprite && + (attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7) && + ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) { + dw16 |= (1 << input_index); + } + + if (attr == FRAG_ATTRIB_PNTC) + dw16 |= (1 << input_index); + + /* The hardware can only do the overrides on 16 overrides at a + * time, and the other up to 16 have to be lined up so that the + * input index = the output index. We'll need to do some + * tweaking to make sure that's the case. + */ + assert(input_index < 16 || attr == input_index); + + attr_overrides[input_index++] = get_attr_override(brw, attr, + two_side_color); + } + + for (; input_index < FRAG_ATTRIB_MAX; input_index++) + attr_overrides[input_index] = 0; + BEGIN_BATCH(20); OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); OUT_BATCH(dw1); @@ -253,24 +278,7 @@ upload_sf_state(struct brw_context *brw) OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */ OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */ for (i = 0; i < 8; i++) { - uint32_t attr_overrides = 0; - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color); - attr++; - break; - } - } - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16; - attr++; - break; - } - } - OUT_BATCH(attr_overrides); + OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16); } OUT_BATCH(dw16); /* point sprite texcoord bitmask */ OUT_BATCH(dw17); /* constant interp bitmask */ diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index 62645a6a30f..b4105111c8c 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -64,7 +64,7 @@ upload_urb(struct brw_context *brw) assert(brw->urb.nr_vs_entries % 4 == 0); assert(brw->urb.nr_gs_entries % 4 == 0); /* GS requirement */ - assert(!brw->gs.prog_bo || brw->urb.vs_size < 5); + assert(!brw->gs.prog_active || brw->urb.vs_size < 5); BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index 4116bdb96de..c6c55c926c7 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -122,7 +122,8 @@ static void upload_viewport_state_pointers(struct brw_context *brw) const struct brw_tracked_state gen6_viewport_state = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS), .cache = (CACHE_NEW_CLIP_VP | CACHE_NEW_SF_VP | CACHE_NEW_CC_VP) diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index b46368e36e2..022e23e12b0 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -110,7 +110,7 @@ const struct brw_tracked_state gen6_vs_constants = { .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_BATCH | BRW_NEW_VERTEX_PROGRAM), - .cache = 0, + .cache = CACHE_NEW_VS_PROG, }, .prepare = gen6_prepare_vs_push_constants, }; @@ -147,7 +147,7 @@ upload_vs_state(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->vs.prog_offset); OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); @@ -165,8 +165,7 @@ upload_vs_state(struct brw_context *brw) const struct brw_tracked_state gen6_vs_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_VS_SURFACES | + .brw = (BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE | BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM | diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 43e651db3ef..9ef6133e2b9 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -39,6 +39,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; + /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); @@ -48,6 +49,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) /* XXX: Should this happen somewhere before to get our state flag set? */ _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* CACHE_NEW_VS_PROG */ if (brw->wm.prog_data->nr_params != 0) { float *constants; unsigned int i; @@ -83,7 +85,7 @@ const struct brw_tracked_state gen6_wm_constants = { .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM), - .cache = 0, + .cache = CACHE_NEW_VS_PROG, }, .prepare = gen6_prepare_wm_push_constants, }; @@ -97,7 +99,7 @@ upload_wm_state(struct brw_context *brw) brw_fragment_program_const(brw->fragment_program); uint32_t dw2, dw4, dw5, dw6; - /* CACHE_NEW_WM_PROG */ + /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); @@ -157,7 +159,7 @@ upload_wm_state(struct brw_context *brw) if (ctx->Line.StippleFlag) dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE; - /* _NEW_POLYGONSTIPPLE */ + /* _NEW_POLYGON */ if (ctx->Polygon.StippleFlag) dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE; @@ -183,7 +185,7 @@ upload_wm_state(struct brw_context *brw) BEGIN_BATCH(9); OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); if (brw->wm.prog_data->total_scratch) { OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, @@ -195,21 +197,19 @@ upload_wm_state(struct brw_context *brw) OUT_BATCH(dw5); OUT_BATCH(dw6); OUT_BATCH(0); /* kernel 1 pointer */ - if (brw->wm.prog_data->prog_offset_16) { - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->wm.prog_data->prog_offset_16); - } else { - OUT_BATCH(0); /* kernel 2 pointer */ - } + /* kernel 2 pointer */ + OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16); ADVANCE_BATCH(); } const struct brw_tracked_state gen6_wm_state = { .dirty = { - .mesa = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS | - _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON), - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_FRAGMENT_PROGRAM | + .mesa = (_NEW_LINE | + _NEW_COLOR | + _NEW_BUFFERS | + _NEW_PROGRAM_CONSTANTS | + _NEW_POLYGON), + .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NR_WM_SURFACES | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c b/src/mesa/drivers/dri/i965/gen7_disable.c index 4e9461739d0..a44d31596b9 100644 --- a/src/mesa/drivers/dri/i965/gen7_disable.c +++ b/src/mesa/drivers/dri/i965/gen7_disable.c @@ -31,7 +31,7 @@ disable_stages(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - assert(brw->gs.prog_bo == NULL); + assert(!brw->gs.prog_active); /* Disable the Geometry Shader (GS) Unit */ BEGIN_BATCH(7); diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c index dd0ccd27193..7544f961da9 100644 --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c @@ -113,7 +113,7 @@ static void emit_depthbuffer(struct brw_context *brw) struct intel_region *region = drb->region; uint32_t tile_x, tile_y, offset; - offset = intel_region_tile_offsets(region, &tile_x, &tile_y); + offset = intel_renderbuffer_tile_offsets(drb, &tile_x, &tile_y); assert(region->tiling == I915_TILING_Y); diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 3a614693dfc..2b650e9bc45 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -78,7 +78,7 @@ upload_urb(struct brw_context *brw) assert(brw->urb.nr_vs_entries % 8 == 0); assert(brw->urb.nr_gs_entries % 8 == 0); /* GS requirement */ - assert(!brw->gs.prog_bo); + assert(!brw->gs.prog_active); BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2)); diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index ae7a1d6c35c..0fad3d2fb68 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -67,7 +67,7 @@ upload_vs_state(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->vs.prog_offset); OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 6a64eb8a2d3..17f75354f1d 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -36,6 +36,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; + /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); @@ -45,7 +46,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) /* XXX: Should this happen somewhere before to get our state flag set? */ _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); - /* BRW_NEW_FRAGMENT_PROGRAM */ + /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params != 0) { float *constants; unsigned int i; @@ -80,7 +81,7 @@ const struct brw_tracked_state gen7_wm_constants = { .dirty = { .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM), - .cache = 0, + .cache = CACHE_NEW_WM_PROG, }, .prepare = gen7_prepare_wm_constants, }; @@ -104,7 +105,7 @@ upload_wm_state(struct brw_context *brw) if (ctx->Line.StippleFlag) dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE; - /* _NEW_POLYGONSTIPPLE */ + /* _NEW_POLYGON */ if (ctx->Polygon.StippleFlag) dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE; @@ -227,24 +228,21 @@ upload_ps_state(struct brw_context *brw) BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(0); /* kernel 1 pointer */ - if (brw->wm.prog_data->prog_offset_16) { - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->wm.prog_data->prog_offset_16); - } else { - OUT_BATCH(0); /* kernel 2 pointer */ - } + OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16); ADVANCE_BATCH(); } const struct brw_tracked_state gen7_ps_state = { .dirty = { - .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_POLYGONSTIPPLE | + .mesa = (_NEW_LINE | + _NEW_POLYGON | + _NEW_POLYGONSTIPPLE | _NEW_PROGRAM_CONSTANTS), .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_FRAGMENT_PROGRAM | diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 00b562f2176..9994b67bfc5 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -273,7 +273,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, surf->ss0.surface_type = BRW_SURFACE_2D; /* reloc */ - surf->ss1.base_addr = intel_region_tile_offsets(region, &tile_x, &tile_y); + surf->ss1.base_addr = intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y); surf->ss1.base_addr += region->buffer->offset; /* reloc */ assert(brw->has_surface_tile_offset); diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 77edc3a6bfe..735382902d1 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -53,6 +53,22 @@ static void clear_cache( struct intel_context *intel ) } void +intel_batchbuffer_init(struct intel_context *intel) +{ + intel_batchbuffer_reset(intel); + + if (intel->gen == 6) { + /* We can't just use brw_state_batch to get a chunk of space for + * the gen6 workaround because it involves actually writing to + * the buffer, and the kernel doesn't let us write to the batch. + */ + intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr, + "gen6 workaround", + 4096, 4096); + } +} + +void intel_batchbuffer_reset(struct intel_context *intel) { if (intel->batch.last_bo != NULL) { @@ -76,6 +92,7 @@ intel_batchbuffer_free(struct intel_context *intel) { drm_intel_bo_unreference(intel->batch.last_bo); drm_intel_bo_unreference(intel->batch.bo); + drm_intel_bo_unreference(intel->batch.workaround_bo); clear_cache(intel); } @@ -276,6 +293,43 @@ emit: item->header = intel->batch.emit; } +/** + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for + * implementing two workarounds on gen6. From section 1.4.7.1 + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: + * + * [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. + * + * XXX: There is also a workaround that would appear to apply to this + * workaround, but it doesn't appear to be necessary so far: + * + * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + */ +void +intel_emit_post_sync_nonzero_flush(struct intel_context *intel) +{ + if (!intel->batch.need_workaround_flush) + return; + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); + OUT_RELOC(intel->batch.workaround_bo, + I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0); + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + intel->batch.need_workaround_flush = false; +} + /* Emit a pipelined flush to either flush render and texture cache for * reading from a FBO-drawn texture, or flush so that frontbuffer * render appears on the screen in DRI1. @@ -294,15 +348,17 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel) OUT_BATCH(0); ADVANCE_BATCH(); } else { - BEGIN_BATCH(8); - /* XXX workaround: issue any post sync != 0 before write - * cache flush = 1 - */ - OUT_BATCH(_3DSTATE_PIPE_CONTROL); - OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_BATCH(0); /* write address */ - OUT_BATCH(0); /* write data */ + if (intel->gen == 6) { + /* Hardware workaround: SNB B-Spec says: + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache + * Flush Enable =1, a PIPE_CONTROL with any non-zero + * post-sync-op is required. + */ + intel_emit_post_sync_nonzero_flush(intel); + } + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL); OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | PIPE_CONTROL_WRITE_FLUSH | diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index a0a5c9841c6..fb4134d889e 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -9,6 +9,7 @@ #define BATCH_RESERVED 16 +void intel_batchbuffer_init(struct intel_context *intel); void intel_batchbuffer_reset(struct intel_context *intel); void intel_batchbuffer_free(struct intel_context *intel); @@ -38,6 +39,7 @@ GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, uint32_t write_domain, uint32_t offset); void intel_batchbuffer_emit_mi_flush(struct intel_context *intel); +void intel_emit_post_sync_nonzero_flush(struct intel_context *intel); static INLINE uint32_t float_as_int(float f) { diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 5aac1f6fa24..30be1b9382f 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -280,10 +280,10 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) write_buffer = intel_region_buffer(intel, irb->region, all ? INTEL_WRITE_FULL : INTEL_WRITE_PART); - x1 = cx + irb->region->draw_x; - y1 = cy + irb->region->draw_y; - x2 = cx + cw + irb->region->draw_x; - y2 = cy + ch + irb->region->draw_y; + x1 = cx + irb->draw_x; + y1 = cy + irb->draw_y; + x2 = cx + cw + irb->draw_x; + y2 = cy + ch + irb->draw_y; pitch = irb->region->pitch; cpp = irb->region->cpp; diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index 7eb50edc6b4..a52a07cd93a 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -28,7 +28,9 @@ #include "intel_context.h" #include "intel_buffers.h" #include "intel_fbo.h" + #include "main/framebuffer.h" +#include "main/renderbuffer.h" /** * Return pointer to current color drawing region, or NULL. @@ -100,6 +102,28 @@ intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb) return; } + /* + * If intel_context is using separate stencil, but the depth attachment + * (gl_framebuffer.Attachment[BUFFER_DEPTH]) has a packed depth/stencil + * format, then we must install the real depth buffer at fb->_DepthBuffer + * and set fb->_DepthBuffer->Wrapped before calling _mesa_update_framebuffer. + * Otherwise, _mesa_update_framebuffer will create and install a swras + * depth wrapper instead. + * + * Ditto for stencil. + */ + irbDepth = intel_get_renderbuffer(fb, BUFFER_DEPTH); + if (irbDepth && irbDepth->Base.Format == MESA_FORMAT_X8_Z24) { + _mesa_reference_renderbuffer(&fb->_DepthBuffer, &irbDepth->Base); + irbDepth->Base.Wrapped = fb->Attachment[BUFFER_DEPTH].Renderbuffer; + } + + irbStencil = intel_get_renderbuffer(fb, BUFFER_STENCIL); + if (irbStencil && irbStencil->Base.Format == MESA_FORMAT_S8) { + _mesa_reference_renderbuffer(&fb->_StencilBuffer, &irbStencil->Base); + irbStencil->Base.Wrapped = fb->Attachment[BUFFER_STENCIL].Renderbuffer; + } + /* Do this here, not core Mesa, since this function is called from * many places within the driver. */ @@ -165,47 +189,33 @@ intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb) FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE); } - /*** - *** Get depth buffer region and check if we need a software fallback. - ***/ - if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) { - irbDepth = intel_renderbuffer(fb->_DepthBuffer->Wrapped); - if (irbDepth && irbDepth->region) { - assert(!fb_has_hiz || irbDepth->Base.Format != MESA_FORMAT_S8_Z24); - FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); - depthRegion = irbDepth->region; - } - else { - FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_TRUE); - depthRegion = NULL; - } - } - else { - /* not using depth buffer */ + /* Check for depth fallback. */ + if (irbDepth && irbDepth->region) { + assert(!fb_has_hiz || irbDepth->Base.Format != MESA_FORMAT_S8_Z24); + FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); + depthRegion = irbDepth->region; + } else if (irbDepth && !irbDepth->region) { + FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_TRUE); + depthRegion = NULL; + } else { /* !irbDepth */ + /* No fallback is needed because there is no depth buffer. */ FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); depthRegion = NULL; } - /*** - *** Stencil buffer - ***/ - if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) { - irbStencil = intel_renderbuffer(fb->_StencilBuffer->Wrapped); - if (irbStencil && irbStencil->region) { - if (!intel->has_separate_stencil) - assert(irbStencil->Base.Format == MESA_FORMAT_S8_Z24); - if (fb_has_hiz || intel->must_use_separate_stencil) - assert(irbStencil->Base.Format == MESA_FORMAT_S8); - if (irbStencil->Base.Format == MESA_FORMAT_S8) - assert(intel->has_separate_stencil); - FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); - } - else { - FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_TRUE); - } - } - else { - /* XXX FBO: instead of FALSE, pass ctx->Stencil._Enabled ??? */ + /* Check for stencil fallback. */ + if (irbStencil && irbStencil->region) { + if (!intel->has_separate_stencil) + assert(irbStencil->Base.Format == MESA_FORMAT_S8_Z24); + if (fb_has_hiz || intel->must_use_separate_stencil) + assert(irbStencil->Base.Format == MESA_FORMAT_S8); + if (irbStencil->Base.Format == MESA_FORMAT_S8) + assert(intel->has_separate_stencil); + FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); + } else if (irbStencil && !irbStencil->region) { + FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_TRUE); + } else { /* !irbStencil */ + /* No fallback is needed because there is no stencil buffer. */ FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 0c2ba413ad7..70aee52bd14 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -504,10 +504,7 @@ intelInvalidateState(struct gl_context * ctx, GLuint new_state) struct intel_context *intel = intel_context(ctx); _swrast_InvalidateState(ctx, new_state); - _swsetup_InvalidateState(ctx, new_state); _vbo_InvalidateState(ctx, new_state); - _tnl_InvalidateState(ctx, new_state); - _tnl_invalidate_vertex_state(ctx, new_state); intel->NewGLState |= new_state; @@ -663,7 +660,7 @@ intelInitContext(struct intel_context *intel, ctx->TextureFormatSupported[MESA_FORMAT_AL1616] = GL_TRUE; /* Depth and stencil */ - ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = !intel->must_use_separate_stencil; + ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = GL_TRUE; ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = intel->has_separate_stencil; ctx->TextureFormatSupported[MESA_FORMAT_S8] = intel->has_separate_stencil; @@ -854,7 +851,7 @@ intelInitContext(struct intel_context *intel, if (INTEL_DEBUG & DEBUG_BUFMGR) dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE); - intel_batchbuffer_reset(intel); + intel_batchbuffer_init(intel); intel_fbo_init(intel); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 80dee4ef38e..148fb0c2c9a 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -181,6 +181,9 @@ struct intel_context drm_intel_bo *bo; /** Last BO submitted to the hardware. Used for glFinish(). */ drm_intel_bo *last_bo; + /** BO for post-sync nonzero writes for gen6 workaround. */ + drm_intel_bo *workaround_bo; + bool need_workaround_flush; struct cached_batch_item *cached_items; diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 3fd987abd8c..64c996ca5cd 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -172,6 +172,7 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { "GL_ARB_point_sprite", NULL }, { "GL_ARB_seamless_cube_map", NULL }, + { "GL_ARB_shader_texture_lod", NULL }, { "GL_ARB_shadow", NULL }, #ifdef TEXTURE_FLOAT_ENABLED { "GL_ARB_texture_float", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 83f622d437e..90c3909d1d8 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -35,7 +35,7 @@ #include "main/renderbuffer.h" #include "main/context.h" #include "main/teximage.h" -#include "main/texrender.h" +#include "swrast/swrast.h" #include "drivers/common/meta.h" #include "intel_context.h" @@ -82,6 +82,12 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb) if (intel && irb->hiz_region) { intel_region_release(&irb->hiz_region); } + if (intel && irb->wrapped_depth) { + _mesa_reference_renderbuffer(&irb->wrapped_depth, NULL); + } + if (intel && irb->wrapped_stencil) { + _mesa_reference_renderbuffer(&irb->wrapped_stencil, NULL); + } free(irb); } @@ -105,7 +111,7 @@ intel_get_pointer(struct gl_context * ctx, struct gl_renderbuffer *rb, * Called via glRenderbufferStorageEXT() to set the format and allocate * storage for a user-created renderbuffer. */ -static GLboolean +GLboolean intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, GLenum internalFormat, GLuint width, GLuint height) @@ -141,6 +147,8 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer break; } + rb->Width = width; + rb->Height = height; rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat); rb->DataType = intel_mesa_format_to_rb_datatype(rb->Format); cpp = _mesa_get_format_bytes(rb->Format); @@ -190,32 +198,63 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer width, height / 2, GL_TRUE); + if (!irb->region) + return false; + + } else if (irb->Base.Format == MESA_FORMAT_S8_Z24 + && intel->must_use_separate_stencil) { + + bool ok = true; + struct gl_renderbuffer *depth_rb; + struct gl_renderbuffer *stencil_rb; + + depth_rb = intel_create_wrapped_renderbuffer(ctx, width, height, + MESA_FORMAT_X8_Z24); + stencil_rb = intel_create_wrapped_renderbuffer(ctx, width, height, + MESA_FORMAT_S8); + ok = depth_rb && stencil_rb; + ok = ok && intel_alloc_renderbuffer_storage(ctx, depth_rb, + depth_rb->InternalFormat, + width, height); + ok = ok && intel_alloc_renderbuffer_storage(ctx, stencil_rb, + stencil_rb->InternalFormat, + width, height); + + if (!ok) { + if (depth_rb) { + intel_delete_renderbuffer(depth_rb); + } + if (stencil_rb) { + intel_delete_renderbuffer(stencil_rb); + } + return false; + } + + depth_rb->Wrapped = rb; + stencil_rb->Wrapped = rb; + _mesa_reference_renderbuffer(&irb->wrapped_depth, depth_rb); + _mesa_reference_renderbuffer(&irb->wrapped_stencil, stencil_rb); + } else { irb->region = intel_region_alloc(intel->intelScreen, tiling, cpp, width, height, GL_TRUE); - } - - if (!irb->region) - return GL_FALSE; /* out of memory? */ - - ASSERT(irb->region->buffer); - - if (intel->vtbl.is_hiz_depth_format(intel, rb->Format)) { - irb->hiz_region = intel_region_alloc(intel->intelScreen, - I915_TILING_Y, - irb->region->cpp, - irb->region->width, - irb->region->height, - GL_TRUE); - if (!irb->hiz_region) { - intel_region_release(&irb->region); - return GL_FALSE; + if (!irb->region) + return false; + + if (intel->vtbl.is_hiz_depth_format(intel, rb->Format)) { + irb->hiz_region = intel_region_alloc(intel->intelScreen, + I915_TILING_Y, + irb->region->cpp, + irb->region->width, + irb->region->height, + GL_TRUE); + if (!irb->hiz_region) { + intel_region_release(&irb->region); + return false; + } } } - rb->Width = width; - rb->Height = height; - return GL_TRUE; } @@ -366,6 +405,37 @@ intel_create_renderbuffer(gl_format format) } +struct gl_renderbuffer* +intel_create_wrapped_renderbuffer(struct gl_context * ctx, + int width, int height, + gl_format format) +{ + /* + * The name here is irrelevant, as long as its nonzero, because the + * renderbuffer never gets entered into Mesa's renderbuffer hash table. + */ + GLuint name = ~0; + + struct intel_renderbuffer *irb = CALLOC_STRUCT(intel_renderbuffer); + if (!irb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer"); + return NULL; + } + + struct gl_renderbuffer *rb = &irb->Base; + _mesa_init_renderbuffer(rb, name); + rb->ClassID = INTEL_RB_CLASS; + rb->_BaseFormat = _mesa_get_format_base_format(format); + rb->Format = format; + rb->InternalFormat = rb->_BaseFormat; + rb->DataType = intel_mesa_format_to_rb_datatype(format); + rb->Width = width; + rb->Height = height; + + return rb; +} + + /** * Create a new renderbuffer object. * Typically called via glBindRenderbufferEXT(). @@ -427,6 +497,10 @@ intel_framebuffer_renderbuffer(struct gl_context * ctx, intel_draw_buffer(ctx, fb); } +static bool +intel_update_tex_wrapper_regions(struct intel_context *intel, + struct intel_renderbuffer *irb, + struct intel_texture_image *intel_image); static GLboolean intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, @@ -453,6 +527,49 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; + if (intel_image->stencil_rb) { + /* The tex image has packed depth/stencil format, but is using separate + * stencil. */ + + bool ok; + struct intel_renderbuffer *depth_irb = + intel_renderbuffer(intel_image->depth_rb); + + /* Update the hiz region if necessary. */ + ok = intel_update_tex_wrapper_regions(intel, depth_irb, intel_image); + if (!ok) { + return false; + } + + /* The tex image shares its embedded depth and stencil renderbuffers with + * the renderbuffer wrapper. */ + if (irb->wrapped_depth != intel_image->depth_rb) { + _mesa_reference_renderbuffer(&irb->wrapped_depth, + intel_image->depth_rb); + } + if (irb->wrapped_stencil != intel_image->stencil_rb) { + _mesa_reference_renderbuffer(&irb->wrapped_stencil, + intel_image->stencil_rb); + } + + return true; + + } else { + return intel_update_tex_wrapper_regions(intel, irb, intel_image); + } +} + +/** + * FIXME: The handling of the hiz region is broken for mipmapped depth textures + * FIXME: because intel_finalize_mipmap_tree is unaware of it. + */ +static bool +intel_update_tex_wrapper_regions(struct intel_context *intel, + struct intel_renderbuffer *irb, + struct intel_texture_image *intel_image) +{ + struct gl_renderbuffer *rb = &irb->Base; + /* Point the renderbuffer's region to the texture's region. */ if (irb->region != intel_image->mt->region) { intel_region_release(&irb->region); @@ -460,14 +577,14 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, } /* Allocate the texture's hiz region if necessary. */ - if (intel->vtbl.is_hiz_depth_format(intel, texImage->TexFormat) + if (intel->vtbl.is_hiz_depth_format(intel, rb->Format) && !intel_image->mt->hiz_region) { intel_image->mt->hiz_region = intel_region_alloc(intel->intelScreen, I915_TILING_Y, - _mesa_get_format_bytes(texImage->TexFormat), - texImage->Width, - texImage->Height, + _mesa_get_format_bytes(rb->Format), + rb->Width, + rb->Height, GL_TRUE); if (!intel_image->mt->hiz_region) return GL_FALSE; @@ -512,9 +629,10 @@ intel_wrap_texture(struct gl_context * ctx, struct gl_texture_image *texImage) return irb; } -static void -intel_set_draw_offset_for_image(struct intel_texture_image *intel_image, - int zoffset) +void +intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, + struct intel_texture_image *intel_image, + int zoffset) { struct intel_mipmap_tree *mt = intel_image->mt; unsigned int dst_x, dst_y; @@ -526,9 +644,45 @@ intel_set_draw_offset_for_image(struct intel_texture_image *intel_image, zoffset, &dst_x, &dst_y); - mt->region->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp; - mt->region->draw_x = dst_x; - mt->region->draw_y = dst_y; + irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp; + irb->draw_x = dst_x; + irb->draw_y = dst_y; +} + +/** + * Rendering to tiled buffers requires that the base address of the + * buffer be aligned to a page boundary. We generally render to + * textures by pointing the surface at the mipmap image level, which + * may not be aligned to a tile boundary. + * + * This function returns an appropriately-aligned base offset + * according to the tiling restrictions, plus any required x/y offset + * from there. + */ +uint32_t +intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, + uint32_t *tile_x, + uint32_t *tile_y) +{ + int cpp = irb->region->cpp; + uint32_t pitch = irb->region->pitch * cpp; + + if (irb->region->tiling == I915_TILING_NONE) { + *tile_x = 0; + *tile_y = 0; + return irb->draw_x * cpp + irb->draw_y * pitch; + } else if (irb->region->tiling == I915_TILING_X) { + *tile_x = irb->draw_x % (512 / cpp); + *tile_y = irb->draw_y % 8; + return ((irb->draw_y / 8) * (8 * pitch) + + (irb->draw_x - *tile_x) / (512 / cpp) * 4096); + } else { + assert(irb->region->tiling == I915_TILING_Y); + *tile_x = irb->draw_x % (128 / cpp); + *tile_y = irb->draw_y % 32; + return ((irb->draw_y / 32) * (32 * pitch) + + (irb->draw_x - *tile_x) / (128 / cpp) * 4096); + } } /** @@ -542,54 +696,50 @@ intel_render_texture(struct gl_context * ctx, struct gl_framebuffer *fb, struct gl_renderbuffer_attachment *att) { - struct gl_texture_image *newImage - = att->Texture->Image[att->CubeMapFace][att->TextureLevel]; + struct gl_texture_image *image = _mesa_get_attachment_teximage(att); struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); - struct intel_texture_image *intel_image; + struct intel_texture_image *intel_image = intel_texture_image(image); (void) fb; - ASSERT(newImage); - - intel_image = intel_texture_image(newImage); if (!intel_image->mt) { /* Fallback on drawing to a texture that doesn't have a miptree * (has a border, width/height 0, etc.) */ _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); - _mesa_render_texture(ctx, fb, att); + _swrast_render_texture(ctx, fb, att); return; } else if (!irb) { - irb = intel_wrap_texture(ctx, newImage); + irb = intel_wrap_texture(ctx, image); if (irb) { /* bind the wrapper to the attachment point */ _mesa_reference_renderbuffer(&att->Renderbuffer, &irb->Base); } else { /* fallback to software rendering */ - _mesa_render_texture(ctx, fb, att); + _swrast_render_texture(ctx, fb, att); return; } } - if (!intel_update_wrapper(ctx, irb, newImage)) { + if (!intel_update_wrapper(ctx, irb, image)) { _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); - _mesa_render_texture(ctx, fb, att); + _swrast_render_texture(ctx, fb, att); return; } DBG("Begin render texture tid %lx tex=%u w=%d h=%d refcount=%d\n", _glthread_GetID(), - att->Texture->Name, newImage->Width, newImage->Height, + att->Texture->Name, image->Width, image->Height, irb->Base.RefCount); - intel_set_draw_offset_for_image(intel_image, att->Zoffset); + intel_renderbuffer_set_draw_offset(irb, intel_image, att->Zoffset); intel_image->used_as_render_target = GL_TRUE; #ifndef I915 if (!brw_context(ctx)->has_surface_tile_offset && - (intel_image->mt->region->draw_offset & 4095) != 0) { + (irb->draw_offset & 4095) != 0) { /* Original gen4 hardware couldn't draw to a non-tile-aligned * destination in a miptree unless you actually setup your * renderbuffer as a miptree and used the fragile @@ -600,22 +750,15 @@ intel_render_texture(struct gl_context * ctx, struct intel_context *intel = intel_context(ctx); struct intel_mipmap_tree *old_mt = intel_image->mt; struct intel_mipmap_tree *new_mt; - int comp_byte = 0, texel_bytes; - - if (_mesa_is_format_compressed(intel_image->base.TexFormat)) - comp_byte = intel_compressed_num_bytes(intel_image->base.TexFormat); - texel_bytes = _mesa_get_format_bytes(intel_image->base.TexFormat); - - new_mt = intel_miptree_create(intel, newImage->TexObject->Target, - intel_image->base._BaseFormat, - intel_image->base.InternalFormat, + new_mt = intel_miptree_create(intel, image->TexObject->Target, + intel_image->base.TexFormat, intel_image->level, intel_image->level, intel_image->base.Width, intel_image->base.Height, intel_image->base.Depth, - texel_bytes, comp_byte, GL_TRUE); + GL_TRUE); intel_miptree_image_copy(intel, new_mt, @@ -625,7 +768,7 @@ intel_render_texture(struct gl_context * ctx, intel_miptree_release(intel, &intel_image->mt); intel_image->mt = new_mt; - intel_set_draw_offset_for_image(intel_image, att->Zoffset); + intel_renderbuffer_set_draw_offset(irb, intel_image, att->Zoffset); intel_region_release(&irb->region); intel_region_reference(&irb->region, intel_image->mt->region); @@ -693,16 +836,9 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) else depth_stencil_are_same = false; - bool fb_has_combined_depth_stencil_format = - (depthRb && depthRb->Base.Format == MESA_FORMAT_S8_Z24) || - (stencilRb && stencilRb->Base.Format == MESA_FORMAT_S8_Z24); - - bool fb_has_hiz = intel_framebuffer_has_hiz(fb); - - if ((intel->must_use_separate_stencil || fb_has_hiz) - && (depth_stencil_are_same || fb_has_combined_depth_stencil_format)) { - fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; - } else if (!intel->has_separate_stencil && depthRb && stencilRb && !depth_stencil_are_same) { + if (!intel->has_separate_stencil + && depthRb && stencilRb + && !depth_stencil_are_same) { fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index e9929b095ff..cbf29c86257 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -33,6 +33,7 @@ #include "intel_screen.h" struct intel_context; +struct intel_texture_image; /** * Intel renderbuffer, derived from gl_renderbuffer. @@ -44,6 +45,21 @@ struct intel_renderbuffer /** Only used by depth renderbuffers for which HiZ is enabled. */ struct intel_region *hiz_region; + + /** + * \name Packed depth/stencil unwrappers + * + * If the intel_context is using separate stencil and this renderbuffer has + * a a packed depth/stencil format, then wrapped_depth and wrapped_stencil + * are the real renderbuffers. + */ + struct gl_renderbuffer *wrapped_depth; + struct gl_renderbuffer *wrapped_stencil; + + /** \} */ + + GLuint draw_offset; /**< Offset of drawing address within the region */ + GLuint draw_x, draw_y; /**< Offset of drawing within the region */ }; @@ -73,15 +89,47 @@ intel_renderbuffer(struct gl_renderbuffer *rb) /** - * Return a framebuffer's renderbuffer, named by a BUFFER_x index. + * \brief Return the framebuffer attachment specified by attIndex. + * + * If the framebuffer lacks the specified attachment, then return null. + * + * If the attached renderbuffer is a wrapper, then return wrapped + * renderbuffer. */ static INLINE struct intel_renderbuffer * -intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex) +intel_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex) { - if (attIndex >= 0) - return intel_renderbuffer(fb->Attachment[attIndex].Renderbuffer); - else + struct gl_renderbuffer *rb; + struct intel_renderbuffer *irb; + + /* XXX: Who passes -1 to intel_get_renderbuffer? */ + if (attIndex < 0) + return NULL; + + rb = fb->Attachment[attIndex].Renderbuffer; + if (!rb) return NULL; + + irb = intel_renderbuffer(rb); + if (!irb) + return NULL; + + switch (attIndex) { + case BUFFER_DEPTH: + if (irb->wrapped_depth) { + irb = intel_renderbuffer(irb->wrapped_depth); + } + break; + case BUFFER_STENCIL: + if (irb->wrapped_stencil) { + irb = intel_renderbuffer(irb->wrapped_stencil); + } + break; + default: + break; + } + + return irb; } /** @@ -122,6 +170,16 @@ intel_renderbuffer_set_hiz_region(struct intel_context *intel, extern struct intel_renderbuffer * intel_create_renderbuffer(gl_format format); +struct gl_renderbuffer* +intel_create_wrapped_renderbuffer(struct gl_context * ctx, + int width, int height, + gl_format format); + +GLboolean +intel_alloc_renderbuffer_storage(struct gl_context * ctx, + struct gl_renderbuffer *rb, + GLenum internalFormat, + GLuint width, GLuint height); extern void intel_fbo_init(struct intel_context *intel); @@ -130,6 +188,15 @@ intel_fbo_init(struct intel_context *intel); extern void intel_flip_renderbuffers(struct gl_framebuffer *fb); +void +intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, + struct intel_texture_image *intel_image, + int zoffset); + +uint32_t +intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, + uint32_t *tile_x, + uint32_t *tile_y); static INLINE struct intel_region * intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex) @@ -141,5 +208,4 @@ intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex) return NULL; } - #endif /* INTEL_FBO_H */ diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index e62905de7c3..4e711de1ce1 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -29,6 +29,7 @@ #include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_tex_layout.h" +#include "intel_tex.h" #include "main/enums.h" #include "main/formats.h" @@ -55,30 +56,34 @@ target_to_target(GLenum target) static struct intel_mipmap_tree * intel_miptree_create_internal(struct intel_context *intel, GLenum target, - GLenum internal_format, + gl_format format, GLuint first_level, GLuint last_level, GLuint width0, GLuint height0, - GLuint depth0, GLuint cpp, GLuint compress_byte, + GLuint depth0, uint32_t tiling) { GLboolean ok; struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); + int compress_byte = 0; DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(internal_format), + _mesa_get_format_name(format), first_level, last_level, mt); + if (_mesa_is_format_compressed(format)) + compress_byte = intel_compressed_num_bytes(format); + mt->target = target_to_target(target); - mt->internal_format = internal_format; + mt->format = format; mt->first_level = first_level; mt->last_level = last_level; mt->width0 = width0; mt->height0 = height0; mt->depth0 = depth0; - mt->cpp = compress_byte ? compress_byte : cpp; + mt->cpp = compress_byte ? compress_byte : _mesa_get_format_bytes(mt->format); mt->compressed = compress_byte ? 1 : 0; mt->refcount = 1; @@ -104,19 +109,19 @@ intel_miptree_create_internal(struct intel_context *intel, struct intel_mipmap_tree * intel_miptree_create(struct intel_context *intel, GLenum target, - GLenum base_format, - GLenum internal_format, + gl_format format, GLuint first_level, GLuint last_level, GLuint width0, GLuint height0, - GLuint depth0, GLuint cpp, GLuint compress_byte, + GLuint depth0, GLboolean expect_accelerated_upload) { struct intel_mipmap_tree *mt; uint32_t tiling = I915_TILING_NONE; + GLenum base_format = _mesa_get_format_base_format(format); - if (intel->use_texture_tiling && compress_byte == 0) { + if (intel->use_texture_tiling && !_mesa_is_format_compressed(format)) { if (intel->gen >= 4 && (base_format == GL_DEPTH_COMPONENT || base_format == GL_DEPTH_STENCIL_EXT)) @@ -125,9 +130,9 @@ intel_miptree_create(struct intel_context *intel, tiling = I915_TILING_X; } - mt = intel_miptree_create_internal(intel, target, internal_format, + mt = intel_miptree_create_internal(intel, target, format, first_level, last_level, width0, - height0, depth0, cpp, compress_byte, + height0, depth0, tiling); /* * pitch == 0 || height == 0 indicates the null texture @@ -156,17 +161,15 @@ intel_miptree_create(struct intel_context *intel, struct intel_mipmap_tree * intel_miptree_create_for_region(struct intel_context *intel, GLenum target, - GLenum internal_format, + gl_format format, struct intel_region *region, - GLuint depth0, - GLuint compress_byte) + GLuint depth0) { struct intel_mipmap_tree *mt; - mt = intel_miptree_create_internal(intel, target, internal_format, + mt = intel_miptree_create_internal(intel, target, format, 0, 0, region->width, region->height, 1, - region->cpp, compress_byte, I915_TILING_NONE); if (!mt) return mt; @@ -223,7 +226,6 @@ GLboolean intel_miptree_match_image(struct intel_mipmap_tree *mt, struct gl_texture_image *image) { - GLboolean isCompressed = _mesa_is_format_compressed(image->TexFormat); struct intel_texture_image *intelImage = intel_texture_image(image); GLuint level = intelImage->level; @@ -231,13 +233,7 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt, if (image->Border) return GL_FALSE; - if (image->InternalFormat != mt->internal_format || - isCompressed != mt->compressed) - return GL_FALSE; - - if (!isCompressed && - !mt->compressed && - _mesa_get_format_bytes(image->TexFormat) != mt->cpp) + if (image->TexFormat != mt->format) return GL_FALSE; /* Test image dimensions against the base level image adjusted for @@ -389,26 +385,32 @@ intel_miptree_image_data(struct intel_context *intel, GLuint i; for (i = 0; i < depth; i++) { - GLuint dst_x, dst_y, height; + GLuint dst_x, dst_y, height, width; intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y); height = dst->level[level].height; - if(dst->compressed) - height = (height + 3) / 4; + width = dst->level[level].width; + if (dst->compressed) { + unsigned int align_w, align_h; + + intel_get_texture_alignment_unit(dst->format, &align_w, &align_h); + height = (height + align_h - 1) / align_h; + width = ALIGN(width, align_w); + } DBG("%s: %d/%d %p/%d -> (%d, %d)/%d (%d, %d)\n", __FUNCTION__, face, level, src, src_row_pitch * dst->cpp, dst_x, dst_y, dst->region->pitch * dst->cpp, - dst->level[level].width, height); + width, height); intel_region_data(intel, dst->region, 0, dst_x, dst_y, src, src_row_pitch, 0, 0, /* source x, y */ - dst->level[level].width, height); /* width, height */ + width, height); src = (char *)src + src_image_pitch * dst->cpp; } @@ -434,8 +436,7 @@ intel_miptree_image_copy(struct intel_context *intel, if (dst->compressed) { GLuint align_w, align_h; - intel_get_texture_alignment_unit(dst->internal_format, - &align_w, &align_h); + intel_get_texture_alignment_unit(dst->format, &align_w, &align_h); height = (height + 3) / 4; width = ALIGN(width, align_w); } diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index 325e3916981..ea865904f68 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -91,7 +91,7 @@ struct intel_mipmap_tree /* Effectively the key: */ GLenum target; - GLenum internal_format; + gl_format format; GLuint first_level; GLuint last_level; @@ -136,24 +136,20 @@ struct intel_mipmap_tree struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel, GLenum target, - GLenum base_format, - GLenum internal_format, + gl_format format, GLuint first_level, GLuint last_level, GLuint width0, GLuint height0, GLuint depth0, - GLuint cpp, - GLuint compress_byte, GLboolean expect_accelerated_upload); struct intel_mipmap_tree * intel_miptree_create_for_region(struct intel_context *intel, GLenum target, - GLenum internal_format, + gl_format format, struct intel_region *region, - GLuint depth0, - GLuint compress_byte); + GLuint depth0); int intel_miptree_pitch_align (struct intel_context *intel, struct intel_mipmap_tree *mt, diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index e83f1bfab94..88258d5b42d 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -40,37 +40,6 @@ #define FILE_DEBUG_FLAG DEBUG_PIXEL -static struct intel_region * -copypix_src_region(struct intel_context *intel, GLenum type) -{ - struct intel_renderbuffer *depth; - - depth = (struct intel_renderbuffer *) - &intel->ctx.DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; - - switch (type) { - case GL_COLOR: - return intel_readbuf_region(intel); - case GL_DEPTH: - /* Don't think this is really possible execpt at 16bpp, when we - * have no stencil. */ - if (depth && depth->region->cpp == 2) - return depth->region; - case GL_STENCIL: - /* Don't think this is really possible. */ - break; - case GL_DEPTH_STENCIL_EXT: - /* Does it matter whether it is stencil/depth or depth/stencil? - */ - return depth->region; - default: - break; - } - - return NULL; -} - - /** * Check if any fragment operations are in effect which might effect * glCopyPixels. Differs from intel_check_blit_fragment_ops in that @@ -109,8 +78,6 @@ do_blit_copypixels(struct gl_context * ctx, GLint dstx, GLint dsty, GLenum type) { struct intel_context *intel = intel_context(ctx); - struct intel_region *dst; - struct intel_region *src; struct gl_framebuffer *fb = ctx->DrawBuffer; struct gl_framebuffer *read_fb = ctx->ReadBuffer; GLint orig_dstx; @@ -118,14 +85,56 @@ do_blit_copypixels(struct gl_context * ctx, GLint orig_srcx; GLint orig_srcy; GLboolean flip = GL_FALSE; + struct intel_renderbuffer *draw_irb = NULL; + struct intel_renderbuffer *read_irb = NULL; + + /* Update draw buffer bounds */ + _mesa_update_state(ctx); + + switch (type) { + case GL_COLOR: + if (fb->_NumColorDrawBuffers != 1) { + fallback_debug("glCopyPixels() fallback: MRT\n"); + return GL_FALSE; + } - if (type == GL_DEPTH || type == GL_STENCIL) { - fallback_debug("glCopyPixels() fallback: GL_DEPTH || GL_STENCIL\n"); + draw_irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); + read_irb = intel_renderbuffer(read_fb->_ColorReadBuffer); + break; + case GL_DEPTH_STENCIL_EXT: + draw_irb = intel_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer); + read_irb = + intel_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer); + break; + case GL_DEPTH: + fallback_debug("glCopyPixels() fallback: GL_DEPTH\n"); + return GL_FALSE; + case GL_STENCIL: + fallback_debug("glCopyPixels() fallback: GL_STENCIL\n"); + return GL_FALSE; + default: + fallback_debug("glCopyPixels(): Unknown type\n"); return GL_FALSE; } - /* Update draw buffer bounds */ - _mesa_update_state(ctx); + if (!draw_irb) { + fallback_debug("glCopyPixels() fallback: missing draw buffer\n"); + return GL_FALSE; + } + + if (!read_irb) { + fallback_debug("glCopyPixels() fallback: missing read buffer\n"); + return GL_FALSE; + } + + if (draw_irb->Base.Format != read_irb->Base.Format && + !(draw_irb->Base.Format == MESA_FORMAT_XRGB8888 && + read_irb->Base.Format == MESA_FORMAT_ARGB8888)) { + fallback_debug("glCopyPixels() fallback: mismatched formats (%s -> %s\n", + _mesa_get_format_name(read_irb->Base.Format), + _mesa_get_format_name(draw_irb->Base.Format)); + return GL_FALSE; + } /* Copypixels can be more than a straight copy. Ensure all the * extra operations are disabled: @@ -136,12 +145,6 @@ do_blit_copypixels(struct gl_context * ctx, intel_prepare_render(intel); - dst = intel_drawbuf_region(intel); - src = copypix_src_region(intel, type); - - if (!src || !dst) - return GL_FALSE; - intel_flush(&intel->ctx); /* Clip to destination buffer. */ @@ -179,9 +182,14 @@ do_blit_copypixels(struct gl_context * ctx, flip = !flip; } + srcx += read_irb->draw_x; + srcy += read_irb->draw_y; + dstx += draw_irb->draw_x; + dsty += draw_irb->draw_y; + if (!intel_region_copy(intel, - dst, 0, dstx, dsty, - src, 0, srcx, srcy, + draw_irb->region, 0, dstx, dsty, + read_irb->region, 0, srcx, srcy, width, height, flip, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY)) { diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 0253bbc2aa0..a4da1ce4fa5 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -524,38 +524,3 @@ intel_region_buffer(struct intel_context *intel, return region->buffer; } - -/** - * Rendering to tiled buffers requires that the base address of the - * buffer be aligned to a page boundary. We generally render to - * textures by pointing the surface at the mipmap image level, which - * may not be aligned to a tile boundary. - * - * This function returns an appropriately-aligned base offset - * according to the tiling restrictions, plus any required x/y offset - * from there. - */ -uint32_t -intel_region_tile_offsets(struct intel_region *region, - uint32_t *tile_x, - uint32_t *tile_y) -{ - uint32_t pitch = region->pitch * region->cpp; - - if (region->tiling == I915_TILING_NONE) { - *tile_x = 0; - *tile_y = 0; - return region->draw_x * region->cpp + region->draw_y * pitch; - } else if (region->tiling == I915_TILING_X) { - *tile_x = region->draw_x % (512 / region->cpp); - *tile_y = region->draw_y % 8; - return ((region->draw_y / 8) * (8 * pitch) + - (region->draw_x - *tile_x) / (512 / region->cpp) * 4096); - } else { - assert(region->tiling == I915_TILING_Y); - *tile_x = region->draw_x % (128 / region->cpp); - *tile_y = region->draw_y % 32; - return ((region->draw_y / 32) * (32 * pitch) + - (region->draw_x - *tile_x) / (128 / region->cpp) * 4096); - } -} diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h index a8a300d863c..91f7121436e 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.h +++ b/src/mesa/drivers/dri/intel/intel_regions.h @@ -62,9 +62,6 @@ struct intel_region GLubyte *map; /**< only non-NULL when region is actually mapped */ GLuint map_refcount; /**< Reference count for mapping */ - GLuint draw_offset; /**< Offset of drawing address within the region */ - GLuint draw_x, draw_y; /**< Offset of drawing within the region */ - uint32_t tiling; /**< Which tiling mode the region is in */ struct intel_buffer_object *pbo; /* zero-copy uploads */ @@ -142,10 +139,6 @@ drm_intel_bo *intel_region_buffer(struct intel_context *intel, struct intel_region *region, GLuint flag); -uint32_t intel_region_tile_offsets(struct intel_region *region, - uint32_t *tile_x, - uint32_t *tile_y); - void _mesa_copy_rect(GLubyte * dst, GLuint cpp, GLuint dst_pitch, diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index e915ca04fe0..2a3a601ddba 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -282,13 +282,38 @@ intel_query_image(__DRIimage *image, int attrib, int *value) } } +static __DRIimage * +intel_dup_image(__DRIimage *orig_image, void *loaderPrivate) +{ + __DRIimage *image; + + image = CALLOC(sizeof *image); + if (image == NULL) + return NULL; + + image->region = NULL; + intel_region_reference(&image->region, orig_image->region); + if (image->region == NULL) { + FREE(image); + return NULL; + } + + image->internal_format = orig_image->internal_format; + image->format = orig_image->format; + image->data_type = orig_image->data_type; + image->data = loaderPrivate; + + return image; +} + static struct __DRIimageExtensionRec intelImageExtension = { { __DRI_IMAGE, __DRI_IMAGE_VERSION }, intel_create_image_from_name, intel_create_image_from_renderbuffer, intel_destroy_image, intel_create_image, - intel_query_image + intel_query_image, + intel_dup_image }; static const __DRIextension *intelScreenExtensions[] = { diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index a4a1d6bd6eb..153803fba09 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -55,21 +55,18 @@ intel_set_span_functions(struct intel_context *intel, #define LOCAL_VARS \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ - const GLint yScale = rb->Name ? 1 : -1; \ - const GLint yBias = rb->Name ? 0 : rb->Height - 1; \ int minx = 0, miny = 0; \ int maxx = rb->Width; \ int maxy = rb->Height; \ - int pitch = irb->region->pitch * irb->region->cpp; \ - void *buf = irb->region->buffer->virtual; \ + int pitch = rb->RowStride * irb->region->cpp; \ + void *buf = rb->Data; \ GLuint p; \ - (void) p; \ - (void)buf; (void)pitch; /* unused for non-gttmap. */ \ + (void) p; #define HW_CLIPLOOP() #define HW_ENDCLIPLOOP() -#define Y_FLIP(_y) ((_y) * yScale + yBias) +#define Y_FLIP(_y) (_y) #define HW_LOCK() @@ -151,7 +148,7 @@ intel_set_span_functions(struct intel_context *intel, * x | y | byte offset * -------------------------- * 0 | 0 | 0 - * 0 | 0 | 1 + * 0 | 1 | 1 * 1 | 0 | 2 * 1 | 1 | 3 * ... | ... | ... @@ -180,7 +177,15 @@ intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) { struct intel_renderbuffer *irb = intel_renderbuffer(rb); - if (irb == NULL || irb->region == NULL) + if (!irb) + return; + + if (irb->wrapped_depth) + intel_renderbuffer_map(intel, irb->wrapped_depth); + if (irb->wrapped_stencil) + intel_renderbuffer_map(intel, irb->wrapped_stencil); + + if (!irb->region) return; drm_intel_gem_bo_map_gtt(irb->region->buffer); @@ -188,10 +193,16 @@ intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) rb->Data = irb->region->buffer->virtual; rb->RowStride = irb->region->pitch; - /* Flip orientation if it's the window system buffer */ if (!rb->Name) { + /* Flip orientation of the window system buffer */ rb->Data += rb->RowStride * (irb->region->height - 1) * irb->region->cpp; rb->RowStride = -rb->RowStride; + } else { + /* Adjust the base pointer of a texture image drawbuffer to the image + * within the miptree region (all else has draw_x/y = 0). + */ + rb->Data += irb->draw_x * irb->region->cpp; + rb->Data += irb->draw_y * rb->RowStride * irb->region->cpp; } intel_set_span_functions(intel, rb); @@ -203,7 +214,15 @@ intel_renderbuffer_unmap(struct intel_context *intel, { struct intel_renderbuffer *irb = intel_renderbuffer(rb); - if (irb == NULL || irb->region == NULL) + if (!irb) + return; + + if (irb->wrapped_depth) + intel_renderbuffer_unmap(intel, irb->wrapped_depth); + if (irb->wrapped_stencil) + intel_renderbuffer_unmap(intel, irb->wrapped_stencil); + + if (!irb->region) return; drm_intel_gem_bo_unmap_gtt(irb->region->buffer); @@ -214,71 +233,26 @@ intel_renderbuffer_unmap(struct intel_context *intel, rb->RowStride = 0; } -/** - * Map or unmap all the renderbuffers which we may need during - * software rendering. - * XXX in the future, we could probably convey extra information to - * reduce the number of mappings needed. I.e. if doing a glReadPixels - * from the depth buffer, we really only need one mapping. - * - * XXX Rewrite this function someday. - * We can probably just loop over all the renderbuffer attachments, - * map/unmap all of them, and not worry about the _ColorDrawBuffers - * _ColorReadBuffer, _DepthBuffer or _StencilBuffer fields. - */ static void -intel_map_unmap_framebuffer(struct intel_context *intel, - struct gl_framebuffer *fb, - GLboolean map) +intel_framebuffer_map(struct intel_context *intel, struct gl_framebuffer *fb) { - GLuint i; - - /* color draw buffers */ - for (i = 0; i < fb->_NumColorDrawBuffers; i++) { - if (map) - intel_renderbuffer_map(intel, fb->_ColorDrawBuffers[i]); - else - intel_renderbuffer_unmap(intel, fb->_ColorDrawBuffers[i]); - } - - /* color read buffer */ - if (map) - intel_renderbuffer_map(intel, fb->_ColorReadBuffer); - else - intel_renderbuffer_unmap(intel, fb->_ColorReadBuffer); + int i; - /* check for render to textures */ for (i = 0; i < BUFFER_COUNT; i++) { - struct gl_renderbuffer_attachment *att = - fb->Attachment + i; - struct gl_texture_object *tex = att->Texture; - if (tex) { - /* render to texture */ - ASSERT(att->Renderbuffer); - if (map) - intel_tex_map_images(intel, intel_texture_object(tex)); - else - intel_tex_unmap_images(intel, intel_texture_object(tex)); - } + intel_renderbuffer_map(intel, fb->Attachment[i].Renderbuffer); } - /* depth buffer (Note wrapper!) */ - if (fb->_DepthBuffer) { - if (map) - intel_renderbuffer_map(intel, fb->_DepthBuffer->Wrapped); - else - intel_renderbuffer_unmap(intel, fb->_DepthBuffer->Wrapped); - } + intel_check_front_buffer_rendering(intel); +} - /* stencil buffer (Note wrapper!) */ - if (fb->_StencilBuffer) { - if (map) - intel_renderbuffer_map(intel, fb->_StencilBuffer->Wrapped); - else - intel_renderbuffer_unmap(intel, fb->_StencilBuffer->Wrapped); - } +static void +intel_framebuffer_unmap(struct intel_context *intel, struct gl_framebuffer *fb) +{ + int i; - intel_check_front_buffer_rendering(intel); + for (i = 0; i < BUFFER_COUNT; i++) { + intel_renderbuffer_unmap(intel, fb->Attachment[i].Renderbuffer); + } } /** @@ -305,9 +279,10 @@ intelSpanRenderStart(struct gl_context * ctx) } } - intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_TRUE); - if (ctx->ReadBuffer != ctx->DrawBuffer) - intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_TRUE); + intel_framebuffer_map(intel, ctx->DrawBuffer); + if (ctx->ReadBuffer != ctx->DrawBuffer) { + intel_framebuffer_map(intel, ctx->ReadBuffer); + } } /** @@ -329,9 +304,10 @@ intelSpanRenderFinish(struct gl_context * ctx) } } - intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_FALSE); - if (ctx->ReadBuffer != ctx->DrawBuffer) - intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_FALSE); + intel_framebuffer_unmap(intel, ctx->DrawBuffer); + if (ctx->ReadBuffer != ctx->DrawBuffer) { + intel_framebuffer_unmap(intel, ctx->ReadBuffer); + } } diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 2c3eab20fda..21c4a1dddba 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -1,4 +1,5 @@ #include "swrast/swrast.h" +#include "main/renderbuffer.h" #include "main/texobj.h" #include "main/teximage.h" #include "main/mipmap.h" @@ -59,6 +60,14 @@ intelFreeTextureImageData(struct gl_context * ctx, struct gl_texture_image *texI _mesa_free_texmemory(texImage->Data); texImage->Data = NULL; } + + if (intelImage->depth_rb) { + _mesa_reference_renderbuffer(&intelImage->depth_rb, NULL); + } + + if (intelImage->stencil_rb) { + _mesa_reference_renderbuffer(&intelImage->stencil_rb, NULL); + } } /** @@ -75,6 +84,7 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target, /* sw path: need to map texture images */ struct intel_context *intel = intel_context(ctx); struct intel_texture_object *intelObj = intel_texture_object(texObj); + struct gl_texture_image *first_image = texObj->Image[0][texObj->BaseLevel]; fallback_debug("%s - fallback to swrast\n", __FUNCTION__); @@ -82,7 +92,7 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target, _mesa_generate_mipmap(ctx, target, texObj); intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); - { + if (!_mesa_is_format_compressed(first_image->TexFormat)) { GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; GLuint face, i; /* Update the level information in our private data in the new images, diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 62d4169acd1..eda07a43dee 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -152,7 +152,7 @@ intel_copy_texsubimage(struct intel_context *intel, dst_bo, 0, intelImage->mt->region->tiling, - irb->region->draw_x + x, irb->region->draw_y + y, + irb->draw_x + x, irb->draw_y + y, image_x + dstx, image_y + dsty, width, height, GL_COPY)) { diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index bc39f4ad83f..269faefa1c0 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -8,6 +8,7 @@ #include "main/context.h" #include "main/formats.h" #include "main/pbo.h" +#include "main/renderbuffer.h" #include "main/texcompress.h" #include "main/texstore.h" #include "main/texgetimage.h" @@ -21,6 +22,7 @@ #include "intel_tex.h" #include "intel_blit.h" #include "intel_fbo.h" +#include "intel_span.h" #define FILE_DEBUG_FLAG DEBUG_TEXTURE @@ -54,8 +56,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel, GLuint width = intelImage->base.Width; GLuint height = intelImage->base.Height; GLuint depth = intelImage->base.Depth; - GLuint i, comp_byte = 0; - GLuint texelBytes; + GLuint i; DBG("%s\n", __FUNCTION__); @@ -108,22 +109,14 @@ intel_miptree_create_for_teximage(struct intel_context *intel, } } - if (_mesa_is_format_compressed(intelImage->base.TexFormat)) - comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat); - - texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat); - return intel_miptree_create(intel, intelObj->base.Target, - intelImage->base._BaseFormat, - intelImage->base.InternalFormat, + intelImage->base.TexFormat, firstLevel, lastLevel, width, height, depth, - texelBytes, - comp_byte, expect_accelerated_upload); } @@ -286,6 +279,130 @@ try_pbo_zcopy(struct intel_context *intel, return GL_TRUE; } +/** + * \param scatter Scatter if true. Gather if false. + * + * \see intel_tex_image_x8z24_scatter + * \see intel_tex_image_x8z24_gather + */ +static void +intel_tex_image_s8z24_scattergather(struct intel_context *intel, + struct intel_texture_image *intel_image, + bool scatter) +{ + struct gl_context *ctx = &intel->ctx; + struct gl_renderbuffer *depth_rb = intel_image->depth_rb; + struct gl_renderbuffer *stencil_rb = intel_image->stencil_rb; + + int w = intel_image->base.Width; + int h = intel_image->base.Height; + + uint32_t depth_row[w]; + uint8_t stencil_row[w]; + + intel_renderbuffer_map(intel, depth_rb); + intel_renderbuffer_map(intel, stencil_rb); + + if (scatter) { + for (int y = 0; y < h; ++y) { + depth_rb->GetRow(ctx, depth_rb, w, 0, y, depth_row); + for (int x = 0; x < w; ++x) { + stencil_row[x] = depth_row[x] >> 24; + } + stencil_rb->PutRow(ctx, stencil_rb, w, 0, y, stencil_row, NULL); + } + } else { /* gather */ + for (int y = 0; y < h; ++y) { + depth_rb->GetRow(ctx, depth_rb, w, 0, y, depth_row); + stencil_rb->GetRow(ctx, stencil_rb, w, 0, y, stencil_row); + for (int x = 0; x < w; ++x) { + uint32_t s8_x24 = stencil_row[x] << 24; + uint32_t x8_z24 = depth_row[x] & 0x00ffffff; + depth_row[x] = s8_x24 | x8_z24; + } + depth_rb->PutRow(ctx, depth_rb, w, 0, y, depth_row, NULL); + } + } + + intel_renderbuffer_unmap(intel, depth_rb); + intel_renderbuffer_unmap(intel, stencil_rb); +} + +/** + * Copy the x8 bits from intel_image->depth_rb to intel_image->stencil_rb. + */ +static void +intel_tex_image_s8z24_scatter(struct intel_context *intel, + struct intel_texture_image *intel_image) +{ + intel_tex_image_s8z24_scattergather(intel, intel_image, true); +} + +/** + * Copy the data in intel_image->stencil_rb to the x8 bits in + * intel_image->depth_rb. + */ +static void +intel_tex_image_s8z24_gather(struct intel_context *intel, + struct intel_texture_image *intel_image) +{ + intel_tex_image_s8z24_scattergather(intel, intel_image, false); +} + +static bool +intel_tex_image_s8z24_create_renderbuffers(struct intel_context *intel, + struct intel_texture_image *image) +{ + struct gl_context *ctx = &intel->ctx; + + bool ok = true; + int width = image->base.Width; + int height = image->base.Height; + struct gl_renderbuffer *drb; + struct gl_renderbuffer *srb; + struct intel_renderbuffer *idrb; + struct intel_renderbuffer *isrb; + + assert(intel->has_separate_stencil); + assert(image->base.TexFormat == MESA_FORMAT_S8_Z24); + assert(image->mt != NULL); + + drb = intel_create_wrapped_renderbuffer(ctx, width, height, + MESA_FORMAT_X8_Z24); + srb = intel_create_wrapped_renderbuffer(ctx, width, height, + MESA_FORMAT_S8); + + if (!drb || !srb) { + if (drb) { + drb->Delete(drb); + } + if (srb) { + srb->Delete(srb); + } + return false; + } + + idrb = intel_renderbuffer(drb); + isrb = intel_renderbuffer(srb); + + intel_region_reference(&idrb->region, image->mt->region); + ok = intel_alloc_renderbuffer_storage(ctx, srb, GL_STENCIL_INDEX8, + width, height); + + if (!ok) { + drb->Delete(drb); + srb->Delete(srb); + return false; + } + + intel_renderbuffer_set_draw_offset(idrb, image, 0); + intel_renderbuffer_set_draw_offset(isrb, image, 0); + + _mesa_reference_renderbuffer(&image->depth_rb, drb); + _mesa_reference_renderbuffer(&image->stencil_rb, srb); + + return true; +} static void intelTexImage(struct gl_context * ctx, @@ -323,18 +440,7 @@ intelTexImage(struct gl_context * ctx, } } - /* Release the reference to a potentially orphaned buffer. - * Release any old malloced memory. - */ - if (intelImage->mt) { - intel_miptree_release(intel, &intelImage->mt); - assert(!texImage->Data); - } - else if (texImage->Data) { - _mesa_free_texmemory(texImage->Data); - texImage->Data = NULL; - } - + ctx->Driver.FreeTexImageData(ctx, texImage); assert(!intelImage->mt); if (intelObj->mt && @@ -493,6 +599,12 @@ intelTexImage(struct gl_context * ctx, _mesa_unmap_teximage_pbo(ctx, unpack); + if (intel->must_use_separate_stencil + && texImage->TexFormat == MESA_FORMAT_S8_Z24) { + intel_tex_image_s8z24_create_renderbuffers(intel, intelImage); + intel_tex_image_s8z24_scatter(intel, intelImage); + } + if (intelImage->mt) { if (pixels != NULL) intel_miptree_image_unmap(intel, intelImage->mt); @@ -609,6 +721,14 @@ intel_get_tex_image(struct gl_context * ctx, GLenum target, GLint level, assert(intelImage->base.Data); } + if (intelImage->stencil_rb) { + /* + * The texture has packed depth/stencil format, but uses separate + * stencil. The texture's embedded stencil buffer contains the real + * stencil data, so copy that into the miptree. + */ + intel_tex_image_s8z24_gather(intel, intelImage); + } if (compressed) { _mesa_get_compressed_teximage(ctx, target, level, pixels, @@ -692,8 +812,8 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, texFormat = MESA_FORMAT_ARGB8888; } - mt = intel_miptree_create_for_region(intel, target, - internalFormat, rb->region, 1, 0); + mt = intel_miptree_create_for_region(intel, target, texFormat, + rb->region, 1); if (mt == NULL) return; @@ -756,9 +876,8 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, if (image == NULL) return; - mt = intel_miptree_create_for_region(intel, target, - image->internal_format, - image->region, 1, 0); + mt = intel_miptree_create_for_region(intel, target, image->format, + image->region, 1); if (mt == NULL) return; diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c index d39733b6c5a..9d8152375d8 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.c +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c @@ -35,39 +35,26 @@ #include "intel_context.h" #include "main/macros.h" -void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h) +void +intel_get_texture_alignment_unit(gl_format format, + unsigned int *w, unsigned int *h) { - switch (internalFormat) { - case GL_COMPRESSED_RGB_FXT1_3DFX: - case GL_COMPRESSED_RGBA_FXT1_3DFX: - *w = 8; - *h = 4; - break; - - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - *w = 4; - *h = 4; - break; - - default: - *w = 4; - *h = 2; - break; - } + if (_mesa_is_format_compressed(format)) { + /* The hardware alignment requirements for compressed textures + * happen to match the block boundaries. + */ + _mesa_get_format_block_size(format, w, h); + } else { + *w = 4; + *h = 2; + } } void i945_miptree_layout_2d(struct intel_context *intel, struct intel_mipmap_tree *mt, uint32_t tiling, int nr_images) { - GLuint align_h = 2, align_w = 4; + GLuint align_h, align_w; GLuint level; GLuint x = 0; GLuint y = 0; @@ -75,7 +62,7 @@ void i945_miptree_layout_2d(struct intel_context *intel, GLuint height = mt->height0; mt->total_width = mt->width0; - intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + intel_get_texture_alignment_unit(mt->format, &align_w, &align_h); if (mt->compressed) { mt->total_width = ALIGN(mt->width0, align_w); @@ -110,11 +97,9 @@ void i945_miptree_layout_2d(struct intel_context *intel, intel_miptree_set_level_info(mt, level, nr_images, x, y, width, height, 1); + img_height = ALIGN(height, align_h); if (mt->compressed) - img_height = MAX2(1, height/4); - else - img_height = ALIGN(height, align_h); - + img_height /= align_h; /* Because the images are packed better, the final offset * might not be the maximal one: diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.h b/src/mesa/drivers/dri/intel/intel_tex_layout.h index 1c8c53e5459..b52e5a48855 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.h +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.h @@ -41,4 +41,5 @@ static INLINE GLuint minify( GLuint d ) extern void i945_miptree_layout_2d(struct intel_context *intel, struct intel_mipmap_tree *mt, uint32_t tiling, int nr_images); -extern void intel_get_texture_alignment_unit(GLenum, GLuint *, GLuint *); +void intel_get_texture_alignment_unit(gl_format format, + unsigned int *w, unsigned int *h); diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h index e93ef4a4727..a9ae2ec5429 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_obj.h +++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h @@ -63,6 +63,36 @@ struct intel_texture_image */ struct intel_mipmap_tree *mt; GLboolean used_as_render_target; + + /** + * \name Renderbuffers for faking packed depth/stencil + * + * These renderbuffers are non-null only if the intel_context is using + * separate stencil and this texture has a packed depth/stencil format. When + * glFramebufferTexture is called on this image, the resultant renderbuffer + * wrapper reuses these renderbuffers as its own. + * + * \see intel_wrap_texture + * \see intel_tex_image_s8z24_create_renderbuffers + * \see intel_tex_image_s8z24_scatter + * \see intel_tex_image_s8z24_gather + * + * \{ + */ + + /** + * The depth buffer has format X8_Z24. The x8 bits are undefined unless + * intel_tex_image_s8z24_gather has been immediately called. The depth buffer + * resuses the image miptree's region and hiz_region as its own. + */ + struct gl_renderbuffer *depth_rb; + + /** + * The stencil buffer has format S8 and keeps its data in its own region. + */ + struct gl_renderbuffer *stencil_rb; + + /** \} */ }; static INLINE struct intel_texture_object * diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 27f2646ebf5..7135a6276fe 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -77,8 +77,6 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) struct gl_texture_object *tObj = intel->ctx.Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); - int comp_byte = 0; - int cpp; GLuint face, i; GLuint nr_faces = 0; struct intel_texture_image *firstImage; @@ -101,13 +99,6 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) return GL_FALSE; } - if (_mesa_is_format_compressed(firstImage->base.TexFormat)) { - comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat); - cpp = comp_byte; - } - else - cpp = _mesa_get_format_bytes(firstImage->base.TexFormat); - /* Check tree can hold all active levels. Check tree matches * target, imageFormat, etc. * @@ -118,14 +109,12 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) */ if (intelObj->mt && (intelObj->mt->target != intelObj->base.Target || - intelObj->mt->internal_format != firstImage->base.InternalFormat || + intelObj->mt->format != firstImage->base.TexFormat || intelObj->mt->first_level != tObj->BaseLevel || intelObj->mt->last_level < intelObj->_MaxLevel || intelObj->mt->width0 != firstImage->base.Width || intelObj->mt->height0 != firstImage->base.Height || - intelObj->mt->depth0 != firstImage->base.Depth || - intelObj->mt->cpp != cpp || - intelObj->mt->compressed != _mesa_is_format_compressed(firstImage->base.TexFormat))) { + intelObj->mt->depth0 != firstImage->base.Depth)) { intel_miptree_release(intel, &intelObj->mt); } @@ -135,15 +124,12 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) if (!intelObj->mt) { intelObj->mt = intel_miptree_create(intel, intelObj->base.Target, - firstImage->base._BaseFormat, - firstImage->base.InternalFormat, + firstImage->base.TexFormat, tObj->BaseLevel, intelObj->_MaxLevel, firstImage->base.Width, firstImage->base.Height, firstImage->base.Depth, - cpp, - comp_byte, GL_TRUE); if (!intelObj->mt) return GL_FALSE; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c index 36e68c99181..dcfd316c49f 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c @@ -688,8 +688,10 @@ nouveau_generate_mipmap(struct gl_context *ctx, GLenum target, _mesa_generate_mipmap(ctx, target, t); nouveau_teximage_unmap(ctx, base); - store_mipmap(ctx, target, t->BaseLevel + 1, - get_last_level(t), t); + if (!_mesa_is_format_compressed(base->TexFormat)) { + store_mipmap(ctx, target, t->BaseLevel + 1, + get_last_level(t), t); + } } else { _mesa_meta_GenerateMipmap(ctx, target, t); diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index 931a9ecf8fe..a512c9d112a 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -47,9 +47,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define insert_at_tail_if(atom_list, atom) \ do { \ - struct radeon_state_atom* __atom = (atom); \ - if (__atom->check) \ - insert_at_tail((atom_list), __atom); \ + struct radeon_state_atom* current_atom = (atom); \ + if (current_atom->check) \ + insert_at_tail((atom_list), current_atom); \ } while(0) void r200SetUpAtomList( r200ContextPtr rmesa ) diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 2bd3b62bfdb..0f7a7a46b71 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -329,7 +329,7 @@ static void r700RunRenderPrimitiveImmediate(struct gl_context * ctx, int start, { context_t *context = R700_CONTEXT(ctx); BATCH_LOCALS(&context->radeon); - int type, i; + int type; uint32_t num_indices, total_emit = 0; uint32_t vgt_draw_initiator = 0; uint32_t vgt_index_type = 0; @@ -370,22 +370,7 @@ static void r700RunRenderPrimitiveImmediate(struct gl_context * ctx, int start, vgt_num_indices = num_indices; SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - if (start == 0) - { - SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - } - else - { - if (num_indices > 0xffff) - { - total_emit += num_indices; - } - else - { - total_emit += (num_indices + 1) / 2; - } - SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - } + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ @@ -406,45 +391,13 @@ static void r700RunRenderPrimitiveImmediate(struct gl_context * ctx, int start, /* offset */ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2)); R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); - R600_OUT_BATCH(0); //VTX_BASE_VTX_LOC + R600_OUT_BATCH(start); //VTX_BASE_VTX_LOC R600_OUT_BATCH(0); //VTX_START_INST_LOC // draw packet - if(start == 0) - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - } - else - { - if (num_indices > 0xffff) - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - for (i = start; i < (start + num_indices); i++) - { - R600_OUT_BATCH(i); - } - } - else - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - for (i = start; i < (start + num_indices); i += 2) - { - if ((i + 1) == (start + num_indices)) - { - R600_OUT_BATCH(i); - } - else - { - R600_OUT_BATCH(((i + 1) << 16) | (i)); - } - } - } - } + + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); END_BATCH(); COMMIT_BATCH(); @@ -469,12 +422,7 @@ static GLuint r700PredictRenderSize(struct gl_context* ctx, else { for (i = 0; i < nr_prims; ++i) { - if (prim[i].start == 0) - dwords += 14; - else if (prim[i].count > 0xffff) - dwords += prim[i].count + 14; - else - dwords += ((prim[i].count + 1) / 2) + 14; + dwords += 14; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index d3c9257fb66..92c1854f098 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -35,7 +35,7 @@ #include "main/framebuffer.h" #include "main/renderbuffer.h" #include "main/context.h" -#include "main/texrender.h" +#include "swrast/swrast.h" #include "drivers/common/meta.h" #include "radeon_common.h" @@ -557,7 +557,7 @@ radeon_render_texture(struct gl_context * ctx, /* Fallback on drawing to a texture without a miptree. */ _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); - _mesa_render_texture(ctx, fb, att); + _swrast_render_texture(ctx, fb, att); return; } else if (!rrb) { @@ -568,14 +568,14 @@ radeon_render_texture(struct gl_context * ctx, } else { /* fallback to software rendering */ - _mesa_render_texture(ctx, fb, att); + _swrast_render_texture(ctx, fb, att); return; } } if (!radeon_update_wrapper(ctx, rrb, newImage)) { _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); - _mesa_render_texture(ctx, fb, att); + _swrast_render_texture(ctx, fb, att); return; } diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 6cf843406f9..676fafd4560 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1658,52 +1658,105 @@ radeonCreateScreen2(__DRIscreen *sPriv) screen->group_bytes = 512; else screen->group_bytes = 256; - if (IS_R600_CLASS(screen) && (sPriv->drm_version.minor >= 6) && - (screen->chip_family < CHIP_FAMILY_CEDAR)) { - ret = radeonGetParam(sPriv, RADEON_INFO_TILE_CONFIG, &temp); - if (ret) - fprintf(stderr, "failed to get tiling info\n"); - else { - screen->tile_config = temp; - screen->r7xx_bank_op = 0; - switch((screen->tile_config & 0xe) >> 1) { - case 0: - screen->num_channels = 1; - break; - case 1: - screen->num_channels = 2; - break; - case 2: - screen->num_channels = 4; - break; - case 3: - screen->num_channels = 8; - break; - default: - fprintf(stderr, "bad channels\n"); - break; + if (IS_R600_CLASS(screen)) { + if ((sPriv->drm_version.minor >= 6) && + (screen->chip_family < CHIP_FAMILY_CEDAR)) { + ret = radeonGetParam(sPriv, RADEON_INFO_TILE_CONFIG, &temp); + if (ret) + fprintf(stderr, "failed to get tiling info\n"); + else { + screen->tile_config = temp; + screen->r7xx_bank_op = 0; + switch ((screen->tile_config & 0xe) >> 1) { + case 0: + screen->num_channels = 1; + break; + case 1: + screen->num_channels = 2; + break; + case 2: + screen->num_channels = 4; + break; + case 3: + screen->num_channels = 8; + break; + default: + fprintf(stderr, "bad channels\n"); + break; + } + switch ((screen->tile_config & 0x30) >> 4) { + case 0: + screen->num_banks = 4; + break; + case 1: + screen->num_banks = 8; + break; + default: + fprintf(stderr, "bad banks\n"); + break; + } + switch ((screen->tile_config & 0xc0) >> 6) { + case 0: + screen->group_bytes = 256; + break; + case 1: + screen->group_bytes = 512; + break; + default: + fprintf(stderr, "bad group_bytes\n"); + break; + } } - switch((screen->tile_config & 0x30) >> 4) { - case 0: - screen->num_banks = 4; - break; - case 1: - screen->num_banks = 8; - break; - default: - fprintf(stderr, "bad banks\n"); - break; - } - switch((screen->tile_config & 0xc0) >> 6) { - case 0: - screen->group_bytes = 256; - break; - case 1: - screen->group_bytes = 512; - break; - default: - fprintf(stderr, "bad group_bytes\n"); - break; + } else if ((sPriv->drm_version.minor >= 7) && + (screen->chip_family >= CHIP_FAMILY_CEDAR)) { + ret = radeonGetParam(sPriv, RADEON_INFO_TILE_CONFIG, &temp); + if (ret) + fprintf(stderr, "failed to get tiling info\n"); + else { + screen->tile_config = temp; + screen->r7xx_bank_op = 0; + switch (screen->tile_config & 0xf) { + case 0: + screen->num_channels = 1; + break; + case 1: + screen->num_channels = 2; + break; + case 2: + screen->num_channels = 4; + break; + case 3: + screen->num_channels = 8; + break; + default: + fprintf(stderr, "bad channels\n"); + break; + } + switch ((screen->tile_config & 0xf0) >> 4) { + case 0: + screen->num_banks = 4; + break; + case 1: + screen->num_banks = 8; + break; + case 2: + screen->num_banks = 16; + break; + default: + fprintf(stderr, "bad banks\n"); + break; + } + switch ((screen->tile_config & 0xf00) >> 8) { + case 0: + screen->group_bytes = 256; + break; + case 1: + screen->group_bytes = 512; + break; + default: + fprintf(stderr, "bad group_bytes\n"); + break; + } } } } diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 9ec53881bb2..ce0df32bfe4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -249,6 +249,7 @@ static void radeon_generate_mipmap(struct gl_context *ctx, GLenum target, radeonTexObj* t = radeon_tex_obj(texObj); GLuint nr_faces = (t->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; int i, face; + struct gl_texture_image *first_image; radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, "%s(%p, tex %p) Target type %s.\n", @@ -257,6 +258,13 @@ static void radeon_generate_mipmap(struct gl_context *ctx, GLenum target, _mesa_generate_mipmap(ctx, target, texObj); + /* For the compressed case, we don't need to do the + * non-TexImage recovery path below. + */ + first_image = texObj->Image[0][texObj->BaseLevel]; + if (_mesa_is_format_compressed(first_image->TexFormat)) + return; + for (face = 0; face < nr_faces; face++) { for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { radeon_texture_image *image; diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 63f53e2b080..a75c9c2e782 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -462,6 +462,27 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, /** + * Update the current drawbuffer's _ColorDrawBufferIndex[] list, etc. + * from the context's Color.DrawBuffer[] state. + * Use when changing contexts. + */ +void +_mesa_update_draw_buffers(struct gl_context *ctx) +{ + GLenum buffers[MAX_DRAW_BUFFERS]; + GLuint i; + + /* should be a window system FBO */ + assert(ctx->DrawBuffer->Name == 0); + + for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) + buffers[i] = ctx->Color.DrawBuffer[i]; + + _mesa_drawbuffers(ctx, ctx->Const.MaxDrawBuffers, buffers, NULL); +} + + +/** * Like \sa _mesa_drawbuffers(), this is a helper function for setting * GL_READ_BUFFER state in the context and current FBO. * \param ctx the rendering context diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index 1404112c411..8083bc3d353 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -50,6 +50,10 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, extern void _mesa_readbuffer(struct gl_context *ctx, GLenum buffer, GLint bufferIndex); +extern void +_mesa_update_draw_buffers(struct gl_context *ctx); + + extern void GLAPIENTRY _mesa_ReadBuffer( GLenum mode ); diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index ea13bdd6835..b83a5d621fa 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1430,7 +1430,8 @@ _mesa_make_current( struct gl_context *newCtx, } if (curCtx && - (curCtx->WinSysDrawBuffer || curCtx->WinSysReadBuffer) && /* make sure this context is valid for flushing */ + (curCtx->WinSysDrawBuffer || curCtx->WinSysReadBuffer) && + /* make sure this context is valid for flushing */ curCtx != newCtx) _mesa_flush(curCtx); @@ -1445,8 +1446,6 @@ _mesa_make_current( struct gl_context *newCtx, _glapi_set_dispatch(newCtx->CurrentDispatch); if (drawBuffer && readBuffer) { - /* TODO: check if newCtx and buffer's visual match??? */ - ASSERT(drawBuffer->Name == 0); ASSERT(readBuffer->Name == 0); _mesa_reference_framebuffer(&newCtx->WinSysDrawBuffer, drawBuffer); @@ -1457,23 +1456,12 @@ _mesa_make_current( struct gl_context *newCtx, * or not bound to a user-created FBO. */ if (!newCtx->DrawBuffer || newCtx->DrawBuffer->Name == 0) { - /* KW: merge conflict here, revisit. - */ - /* fix up the fb fields - these will end up wrong otherwise - * if the DRIdrawable changes, and everything relies on them. - * This is a bit messy (same as needed in _mesa_BindFramebufferEXT) - */ - unsigned int i; - GLenum buffers[MAX_DRAW_BUFFERS]; - _mesa_reference_framebuffer(&newCtx->DrawBuffer, drawBuffer); - - for(i = 0; i < newCtx->Const.MaxDrawBuffers; i++) { - buffers[i] = newCtx->Color.DrawBuffer[i]; - } - - _mesa_drawbuffers(newCtx, newCtx->Const.MaxDrawBuffers, - buffers, NULL); + /* Update the FBO's list of drawbuffers/renderbuffers. + * For winsys FBOs this comes from the GL state (which may have + * changed since the last time this FBO was bound). + */ + _mesa_update_draw_buffers(newCtx); } if (!newCtx->ReadBuffer || newCtx->ReadBuffer->Name == 0) { _mesa_reference_framebuffer(&newCtx->ReadBuffer, readBuffer); diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 2230b262336..8cc3fd49a34 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -78,9 +78,32 @@ static struct gl_renderbuffer DummyRenderbuffer; static struct gl_framebuffer IncompleteFramebuffer; -#define IS_CUBE_FACE(TARGET) \ - ((TARGET) >= GL_TEXTURE_CUBE_MAP_POSITIVE_X && \ - (TARGET) <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z) +static INLINE GLboolean +is_cube_face(GLenum target) +{ + return (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X && + target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z); +} + + +/** + * Is the given FBO a user-created FBO? + */ +static INLINE GLboolean +is_user_fbo(const struct gl_framebuffer *fb) +{ + return fb->Name != 0; +} + + +/** + * Is the given FBO a window system FBO (like an X window)? + */ +static INLINE GLboolean +is_winsys_fbo(const struct gl_framebuffer *fb) +{ + return fb->Name == 0; +} static void @@ -196,7 +219,7 @@ _mesa_get_attachment(struct gl_context *ctx, struct gl_framebuffer *fb, { GLuint i; - assert(fb->Name > 0); + assert(is_user_fbo(fb)); switch (attachment) { case GL_COLOR_ATTACHMENT0_EXT: @@ -244,7 +267,7 @@ static struct gl_renderbuffer_attachment * _mesa_get_fb0_attachment(struct gl_context *ctx, struct gl_framebuffer *fb, GLenum attachment) { - assert(fb->Name == 0); + assert(is_winsys_fbo(fb)); switch (attachment) { case GL_FRONT_LEFT: @@ -669,7 +692,7 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, GLint i; GLuint j; - assert(fb->Name != 0); + assert(is_user_fbo(fb)); numImages = 0; fb->Width = 0; @@ -968,10 +991,11 @@ _mesa_DeleteRenderbuffersEXT(GLsizei n, const GLuint *renderbuffers) _mesa_BindRenderbufferEXT(GL_RENDERBUFFER_EXT, 0); } - if (ctx->DrawBuffer->Name) { + if (is_user_fbo(ctx->DrawBuffer)) { detach_renderbuffer(ctx, ctx->DrawBuffer, rb); } - if (ctx->ReadBuffer->Name && ctx->ReadBuffer != ctx->DrawBuffer) { + if (is_user_fbo(ctx->ReadBuffer) + && ctx->ReadBuffer != ctx->DrawBuffer) { detach_renderbuffer(ctx, ctx->ReadBuffer, rb); } @@ -1203,7 +1227,7 @@ invalidate_rb(GLuint key, void *data, void *userData) struct gl_renderbuffer *rb = (struct gl_renderbuffer *) userData; /* If this is a user-created FBO */ - if (fb->Name) { + if (is_user_fbo(fb)) { GLuint i; for (i = 0; i < BUFFER_COUNT; i++) { struct gl_renderbuffer_attachment *att = fb->Attachment + i; @@ -1532,7 +1556,7 @@ check_begin_texture_render(struct gl_context *ctx, struct gl_framebuffer *fb) GLuint i; ASSERT(ctx->Driver.RenderTexture); - if (fb->Name == 0) + if (is_winsys_fbo(fb)) return; /* can't render to texture with winsys framebuffers */ for (i = 0; i < BUFFER_COUNT; i++) { @@ -1552,7 +1576,7 @@ check_begin_texture_render(struct gl_context *ctx, struct gl_framebuffer *fb) static void check_end_texture_render(struct gl_context *ctx, struct gl_framebuffer *fb) { - if (fb->Name == 0) + if (is_winsys_fbo(fb)) return; /* can't render to texture with winsys framebuffers */ if (ctx->Driver.FinishRenderTexture) { @@ -1805,7 +1829,7 @@ _mesa_CheckFramebufferStatusEXT(GLenum target) return 0; } - if (buffer->Name == 0) { + if (is_winsys_fbo(buffer)) { /* The window system / default framebuffer is always complete */ return GL_FRAMEBUFFER_COMPLETE_EXT; } @@ -1843,7 +1867,7 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, } /* check framebuffer binding */ - if (fb->Name == 0) { + if (is_winsys_fbo(fb)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glFramebufferTexture%sEXT", caller); return; @@ -1866,7 +1890,7 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, } else { err = (texObj->Target == GL_TEXTURE_CUBE_MAP) - ? !IS_CUBE_FACE(textarget) + ? !is_cube_face(textarget) : (texObj->Target != textarget); } } @@ -1970,7 +1994,7 @@ _mesa_FramebufferTexture2DEXT(GLenum target, GLenum attachment, if ((texture != 0) && (textarget != GL_TEXTURE_2D) && (textarget != GL_TEXTURE_RECTANGLE_ARB) && - (!IS_CUBE_FACE(textarget))) { + (!is_cube_face(textarget))) { _mesa_error(ctx, GL_INVALID_OPERATION, "glFramebufferTexture2DEXT(textarget=0x%x)", textarget); return; @@ -2034,7 +2058,7 @@ _mesa_FramebufferRenderbufferEXT(GLenum target, GLenum attachment, return; } - if (fb->Name == 0) { + if (is_winsys_fbo(fb)) { /* Can't attach new renderbuffers to a window system framebuffer */ _mesa_error(ctx, GL_INVALID_OPERATION, "glFramebufferRenderbufferEXT"); return; @@ -2111,7 +2135,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, return; } - if (buffer->Name == 0) { + if (is_winsys_fbo(buffer)) { /* the default / window-system FBO */ att = _mesa_get_fb0_attachment(ctx, buffer, attachment); } @@ -2143,7 +2167,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, switch (pname) { case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_EXT: - *params = buffer->Name == 0 ? GL_FRAMEBUFFER_DEFAULT : att->Type; + *params = is_winsys_fbo(buffer) ? GL_FRAMEBUFFER_DEFAULT : att->Type; return; case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_EXT: if (att->Type == GL_RENDERBUFFER_EXT) { diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index e9fcb545a1e..f2724dbca7e 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -1885,107 +1885,19 @@ next_mipmap_level_size(GLenum target, GLint border, } } - - - -/** - * Automatic mipmap generation. - * This is the fallback/default function for ctx->Driver.GenerateMipmap(). - * Generate a complete set of mipmaps from texObj's BaseLevel image. - * Stop at texObj's MaxLevel or when we get to the 1x1 texture. - * For cube maps, target will be one of - * GL_TEXTURE_CUBE_MAP_POSITIVE/NEGATIVE_X/Y/Z; never GL_TEXTURE_CUBE_MAP. - */ -void -_mesa_generate_mipmap(struct gl_context *ctx, GLenum target, - struct gl_texture_object *texObj) +static void +generate_mipmap_uncompressed(struct gl_context *ctx, GLenum target, + struct gl_texture_object *texObj, + const struct gl_texture_image *srcImage, + GLuint maxLevel) { - const struct gl_texture_image *srcImage; - gl_format convertFormat; - const GLubyte *srcData = NULL; - GLubyte *dstData = NULL; - GLint level, maxLevels; + GLint level; GLenum datatype; GLuint comps; - ASSERT(texObj); - srcImage = _mesa_select_tex_image(ctx, texObj, target, texObj->BaseLevel); - ASSERT(srcImage); - - maxLevels = _mesa_max_texture_levels(ctx, texObj->Target); - ASSERT(maxLevels > 0); /* bad target */ - - /* Find convertFormat - the format that do_row() will process */ - - if (_mesa_is_format_compressed(srcImage->TexFormat)) { - /* setup for compressed textures - need to allocate temporary - * image buffers to hold uncompressed images. - */ - GLuint row; - GLint components, size; - GLchan *dst; - - assert(texObj->Target == GL_TEXTURE_2D || - texObj->Target == GL_TEXTURE_CUBE_MAP_ARB); - - if (srcImage->_BaseFormat == GL_RGB) { - convertFormat = MESA_FORMAT_RGB888; - components = 3; - } else if (srcImage->_BaseFormat == GL_RED) { - convertFormat = MESA_FORMAT_R8; - components = 1; - } else if (srcImage->_BaseFormat == GL_RG) { - convertFormat = MESA_FORMAT_RG88; - components = 2; - } else if (srcImage->_BaseFormat == GL_RGBA) { - convertFormat = MESA_FORMAT_RGBA8888; - components = 4; - } else if (srcImage->_BaseFormat == GL_LUMINANCE) { - convertFormat = MESA_FORMAT_L8; - components = 1; - } else if (srcImage->_BaseFormat == GL_LUMINANCE_ALPHA) { - convertFormat = MESA_FORMAT_AL88; - components = 2; - } else { - _mesa_problem(ctx, "bad srcImage->_BaseFormat in _mesa_generate_mipmaps"); - return; - } - - /* allocate storage for uncompressed GL_RGB or GL_RGBA images */ - size = _mesa_bytes_per_pixel(srcImage->_BaseFormat, CHAN_TYPE) - * srcImage->Width * srcImage->Height * srcImage->Depth + 20; - /* 20 extra bytes, just be safe when calling last FetchTexel */ - srcData = (GLubyte *) malloc(size); - if (!srcData) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "generate mipmaps"); - return; - } - dstData = (GLubyte *) malloc(size / 2); /* 1/4 would probably be OK */ - if (!dstData) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "generate mipmaps"); - free((void *) srcData); - return; - } + _mesa_format_to_type_and_comps(srcImage->TexFormat, &datatype, &comps); - /* decompress base image here */ - dst = (GLchan *) srcData; - for (row = 0; row < srcImage->Height; row++) { - GLuint col; - for (col = 0; col < srcImage->Width; col++) { - srcImage->FetchTexelc(srcImage, col, row, 0, dst); - dst += components; - } - } - } - else { - /* uncompressed */ - convertFormat = srcImage->TexFormat; - } - - _mesa_format_to_type_and_comps(convertFormat, &datatype, &comps); - - for (level = texObj->BaseLevel; level < texObj->MaxLevel - && level < maxLevels - 1; level++) { + for (level = texObj->BaseLevel; level < maxLevel; level++) { /* generate image[level+1] from image[level] */ const struct gl_texture_image *srcImage; struct gl_texture_image *dstImage; @@ -2005,14 +1917,8 @@ _mesa_generate_mipmap(struct gl_context *ctx, GLenum target, nextLevel = next_mipmap_level_size(target, border, srcWidth, srcHeight, srcDepth, &dstWidth, &dstHeight, &dstDepth); - if (!nextLevel) { - /* all done */ - if (_mesa_is_format_compressed(srcImage->TexFormat)) { - free((void *) srcData); - free(dstData); - } + if (!nextLevel) return; - } /* get dest gl_texture_image */ dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1); @@ -2044,52 +1950,184 @@ _mesa_generate_mipmap(struct gl_context *ctx, GLenum target, } } - /* Setup src and dest data pointers */ - if (_mesa_is_format_compressed(dstImage->TexFormat)) { - /* srcData and dstData are already set */ - ASSERT(srcData); - ASSERT(dstData); - } - else { - srcData = (const GLubyte *) srcImage->Data; - dstData = (GLubyte *) dstImage->Data; - } - ASSERT(dstImage->TexFormat); ASSERT(dstImage->FetchTexelc); ASSERT(dstImage->FetchTexelf); _mesa_generate_mipmap_level(target, datatype, comps, border, - srcWidth, srcHeight, srcDepth, - srcData, srcImage->RowStride, - dstWidth, dstHeight, dstDepth, - dstData, dstImage->RowStride); - - - if (_mesa_is_format_compressed(dstImage->TexFormat)) { - GLubyte *temp; - /* compress image from dstData into dstImage->Data */ - const GLenum srcFormat = _mesa_get_format_base_format(convertFormat); - GLint dstRowStride - = _mesa_format_row_stride(dstImage->TexFormat, dstWidth); - - _mesa_texstore(ctx, 2, dstImage->_BaseFormat, - dstImage->TexFormat, - dstImage->Data, - 0, 0, 0, /* dstX/Y/Zoffset */ - dstRowStride, 0, /* strides */ - dstWidth, dstHeight, 1, /* size */ - srcFormat, CHAN_TYPE, - dstData, /* src data, actually */ - &ctx->DefaultPacking); - - /* swap src and dest pointers */ - temp = (GLubyte *) srcData; - srcData = dstData; - dstData = temp; + srcWidth, srcHeight, srcDepth, + srcImage->Data, srcImage->RowStride, + dstWidth, dstHeight, dstDepth, + dstImage->Data, dstImage->RowStride); + + } /* loop over mipmap levels */ +} + +static void +generate_mipmap_compressed(struct gl_context *ctx, GLenum target, + struct gl_texture_object *texObj, + const struct gl_texture_image *srcImage, + GLuint maxLevel) +{ + GLint level; + gl_format temp_format; + GLenum datatype; + GLuint comps; + GLuint row; + GLint components; + GLuint temp_src_stride, temp_dst_stride; /* in bytes */ + GLchan *temp_src = NULL, *temp_dst = NULL; + + /* Choose the format we will do _mesa_generate_mipmap_level() in, + * and uncompress the firstImage into a temporary of that format. + */ + assert(texObj->Target == GL_TEXTURE_2D || + texObj->Target == GL_TEXTURE_CUBE_MAP_ARB); + + if (srcImage->_BaseFormat == GL_RGB) { + temp_format = MESA_FORMAT_RGB888; + components = 3; + } else if (srcImage->_BaseFormat == GL_RED) { + temp_format = MESA_FORMAT_R8; + components = 1; + } else if (srcImage->_BaseFormat == GL_RG) { + temp_format = MESA_FORMAT_RG88; + components = 2; + } else if (srcImage->_BaseFormat == GL_RGBA) { + temp_format = MESA_FORMAT_RGBA8888; + components = 4; + } else if (srcImage->_BaseFormat == GL_LUMINANCE) { + temp_format = MESA_FORMAT_L8; + components = 1; + } else if (srcImage->_BaseFormat == GL_LUMINANCE_ALPHA) { + temp_format = MESA_FORMAT_AL88; + components = 2; + } else { + _mesa_problem(ctx, "bad srcImage->_BaseFormat in _mesa_generate_mipmaps"); + return; + } + + /* allocate storage for uncompressed GL_RGB or GL_RGBA images */ + temp_src_stride = _mesa_format_row_stride(temp_format, srcImage->Width); + /* 20 extra bytes, just be safe when calling last FetchTexel */ + temp_src = (GLubyte *) malloc(temp_src_stride * srcImage->Height + 20); + if (!temp_src) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "generate mipmaps"); + return; + } + + /* decompress base image to the temporary */ + for (row = 0; row < srcImage->Height; row++) { + GLuint col; + GLchan *dst = (GLchan *) temp_src + temp_src_stride * row; + for (col = 0; col < srcImage->Width; col++) { + srcImage->FetchTexelc(srcImage, col, row, 0, dst); + dst += components; + } + } + + _mesa_format_to_type_and_comps(temp_format, &datatype, &comps); + + for (level = texObj->BaseLevel; level < maxLevel; level++) { + /* generate image[level+1] from image[level] */ + const struct gl_texture_image *srcImage; + struct gl_texture_image *dstImage; + GLint srcWidth, srcHeight, srcDepth; + GLint dstWidth, dstHeight, dstDepth; + GLint border; + GLboolean nextLevel; + + /* get src image parameters */ + srcImage = _mesa_select_tex_image(ctx, texObj, target, level); + ASSERT(srcImage); + srcWidth = srcImage->Width; + srcHeight = srcImage->Height; + srcDepth = srcImage->Depth; + border = srcImage->Border; + + nextLevel = next_mipmap_level_size(target, border, + srcWidth, srcHeight, srcDepth, + &dstWidth, &dstHeight, &dstDepth); + if (!nextLevel) + break; + + temp_dst_stride = _mesa_format_row_stride(temp_format, dstWidth); + if (!temp_dst) { + temp_dst = (GLubyte *) malloc(temp_dst_stride * dstHeight); + if (!temp_dst) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "generate mipmaps"); + break; + } + } + + /* get dest gl_texture_image */ + dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1); + if (!dstImage) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps"); + return; } + _mesa_generate_mipmap_level(target, datatype, comps, border, + srcWidth, srcHeight, srcDepth, + temp_src, temp_src_stride / components, + dstWidth, dstHeight, dstDepth, + temp_dst, temp_dst_stride / components); + + /* initialize new image */ + _mesa_init_teximage_fields(ctx, target, dstImage, dstWidth, dstHeight, + dstDepth, border, srcImage->InternalFormat, + srcImage->TexFormat); + + ctx->Driver.TexImage2D(ctx, target, level + 1, + srcImage->InternalFormat, + dstWidth, dstHeight, border, + _mesa_get_format_base_format(temp_format), + GL_UNSIGNED_BYTE, + temp_dst, &ctx->DefaultPacking, texObj, dstImage); + + /* swap src and dest pointers */ + { + GLchan *temp = temp_src; + temp_src = temp_dst; + temp_dst = temp; + + temp_src_stride = temp_dst_stride; + } } /* loop over mipmap levels */ + + free((void *) temp_src); + free(temp_dst); +} + +/** + * Automatic mipmap generation. + * This is the fallback/default function for ctx->Driver.GenerateMipmap(). + * Generate a complete set of mipmaps from texObj's BaseLevel image. + * Stop at texObj's MaxLevel or when we get to the 1x1 texture. + * For cube maps, target will be one of + * GL_TEXTURE_CUBE_MAP_POSITIVE/NEGATIVE_X/Y/Z; never GL_TEXTURE_CUBE_MAP. + */ +void +_mesa_generate_mipmap(struct gl_context *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + const struct gl_texture_image *srcImage; + GLint maxLevel; + + ASSERT(texObj); + srcImage = _mesa_select_tex_image(ctx, texObj, target, texObj->BaseLevel); + ASSERT(srcImage); + + maxLevel = _mesa_max_texture_levels(ctx, texObj->Target) - 1; + ASSERT(maxLevel >= 0); /* bad target */ + + maxLevel = MIN2(maxLevel, texObj->MaxLevel); + + if (_mesa_is_format_compressed(srcImage->TexFormat)) { + generate_mipmap_compressed(ctx, target, texObj, srcImage, maxLevel); + } else { + generate_mipmap_uncompressed(ctx, target, texObj, srcImage, maxLevel); + } } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index eb2efc89aed..f018c75cc6a 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1435,8 +1435,7 @@ struct gl_texgen /** * Texture unit state. Contains enable flags, texture environment/function/ - * combiners, texgen state, pointers to current texture objects and - * post-filter color tables. + * combiners, texgen state, and pointers to current texture objects. */ struct gl_texture_unit { diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index d6470e351b8..a232a51c355 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -4683,7 +4683,7 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, GLenum srcType, const GLvoid *source, const struct gl_pixelstore_attrib *srcPacking ) { - GLfloat *depthTemp, *depthValues; + GLfloat *depthTemp = NULL, *depthValues; GLboolean needClamp = GL_FALSE; /* Look for special cases first. @@ -4729,16 +4729,16 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, /* general case path follows */ - depthTemp = (GLfloat *) malloc(n * sizeof(GLfloat)); - if (!depthTemp) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); - return; - } - if (dstType == GL_FLOAT) { depthValues = (GLfloat *) dest; } else { + depthTemp = (GLfloat *) malloc(n * sizeof(GLfloat)); + if (!depthTemp) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); + return; + } + depthValues = depthTemp; } @@ -4782,6 +4782,7 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, } zValues[i] = value & 0xffffff00; } + free(depthTemp); return; } else { diff --git a/src/mesa/main/renderbuffer.c b/src/mesa/main/renderbuffer.c index fa884c0de93..c36175c60e7 100644 --- a/src/mesa/main/renderbuffer.c +++ b/src/mesa/main/renderbuffer.c @@ -2567,26 +2567,3 @@ _mesa_reference_renderbuffer(struct gl_renderbuffer **ptr, *ptr = rb; } } - - -/** - * Create a new combined depth/stencil renderbuffer for implementing - * the GL_EXT_packed_depth_stencil extension. - * \return new depth/stencil renderbuffer - */ -struct gl_renderbuffer * -_mesa_new_depthstencil_renderbuffer(struct gl_context *ctx, GLuint name) -{ - struct gl_renderbuffer *dsrb; - - dsrb = _mesa_new_renderbuffer(ctx, name); - if (!dsrb) - return NULL; - - /* init fields not covered by _mesa_new_renderbuffer() */ - dsrb->InternalFormat = GL_DEPTH24_STENCIL8_EXT; - dsrb->Format = MESA_FORMAT_Z24_S8; - dsrb->AllocStorage = _mesa_soft_renderbuffer_storage; - - return dsrb; -} diff --git a/src/mesa/main/renderbuffer.h b/src/mesa/main/renderbuffer.h index 39d9b3035e6..53da5b03385 100644 --- a/src/mesa/main/renderbuffer.h +++ b/src/mesa/main/renderbuffer.h @@ -108,8 +108,5 @@ extern void _mesa_reference_renderbuffer(struct gl_renderbuffer **ptr, struct gl_renderbuffer *rb); -extern struct gl_renderbuffer * -_mesa_new_depthstencil_renderbuffer(struct gl_context *ctx, GLuint name); - #endif /* RENDERBUFFER_H */ diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 4696dbb526f..7ad50bcaddc 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -192,7 +192,10 @@ update_arrays( struct gl_context *ctx ) static void update_program_enables(struct gl_context *ctx) { - /* These _Enabled flags indicate if the program is enabled AND valid. */ + /* These _Enabled flags indicate if the user-defined ARB/NV vertex/fragment + * program is enabled AND valid. Similarly for ATI fragment shaders. + * GLSL shaders not relevant here. + */ ctx->VertexProgram._Enabled = ctx->VertexProgram.Enabled && ctx->VertexProgram.Current->Base.Instructions; ctx->FragmentProgram._Enabled = ctx->FragmentProgram.Enabled @@ -203,11 +206,12 @@ update_program_enables(struct gl_context *ctx) /** - * Update vertex/fragment program state. In particular, update these fields: - * ctx->VertexProgram._Current - * ctx->VertexProgram._TnlProgram, - * These point to the highest priority enabled vertex/fragment program or are - * NULL if fixed-function processing is to be done. + * Update the ctx->Vertex/Geometry/FragmentProgram._Current pointers to point + * to the current/active programs. Then call ctx->Driver.BindProgram() to + * tell the driver which programs to use. + * + * Programs may come from 3 sources: GLSL shaders, ARB/NV_vertex/fragment + * programs or programs derived from fixed-function state. * * This function needs to be called after texture state validation in case * we're generating a fragment program from fixed-function texture state. @@ -243,34 +247,33 @@ update_program(struct gl_context *ctx) */ if (fsProg && fsProg->LinkStatus && fsProg->FragmentProgram) { - /* Use shader programs */ + /* Use GLSL fragment shader */ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, fsProg->FragmentProgram); } else if (ctx->FragmentProgram._Enabled) { - /* use user-defined vertex program */ + /* Use user-defined fragment program */ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, ctx->FragmentProgram.Current); } else if (ctx->FragmentProgram._MaintainTexEnvProgram) { - /* Use fragment program generated from fixed-function state. - */ + /* Use fragment program generated from fixed-function state */ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, _mesa_get_fixed_func_fragment_program(ctx)); _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram, ctx->FragmentProgram._Current); } else { - /* no fragment program */ + /* No fragment program */ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL); } if (gsProg && gsProg->LinkStatus && gsProg->GeometryProgram) { - /* Use shader programs */ + /* Use GLSL geometry shader */ _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, gsProg->GeometryProgram); } else { - /* no fragment program */ + /* No geometry program */ _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL); } @@ -279,18 +282,17 @@ update_program(struct gl_context *ctx) * fragprog inputs. */ if (vsProg && vsProg->LinkStatus && vsProg->VertexProgram) { - /* Use shader programs */ + /* Use GLSL vertex shader */ _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, - vsProg->VertexProgram); + vsProg->VertexProgram); } else if (ctx->VertexProgram._Enabled) { - /* use user-defined vertex program */ + /* Use user-defined vertex program */ _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, ctx->VertexProgram.Current); } else if (ctx->VertexProgram._MaintainTnlProgram) { - /* Use vertex program generated from fixed-function state. - */ + /* Use vertex program generated from fixed-function state */ _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, _mesa_get_fixed_func_vertex_program(ctx)); _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, @@ -416,29 +418,44 @@ update_color(struct gl_context *ctx) ctx->Color._LogicOpEnabled = _mesa_rgba_logicop_enabled(ctx); } + +/** + * Update the ctx->Color._ClampFragmentColor field + */ static void update_clamp_fragment_color(struct gl_context *ctx) { - if(ctx->Color.ClampFragmentColor == GL_FIXED_ONLY_ARB) - ctx->Color._ClampFragmentColor = !ctx->DrawBuffer || !ctx->DrawBuffer->Visual.floatMode; + if (ctx->Color.ClampFragmentColor == GL_FIXED_ONLY_ARB) + ctx->Color._ClampFragmentColor = + !ctx->DrawBuffer || !ctx->DrawBuffer->Visual.floatMode; else ctx->Color._ClampFragmentColor = ctx->Color.ClampFragmentColor; } + +/** + * Update the ctx->Color._ClampVertexColor field + */ static void update_clamp_vertex_color(struct gl_context *ctx) { - if(ctx->Light.ClampVertexColor == GL_FIXED_ONLY_ARB) - ctx->Light._ClampVertexColor = !ctx->DrawBuffer || !ctx->DrawBuffer->Visual.floatMode; + if (ctx->Light.ClampVertexColor == GL_FIXED_ONLY_ARB) + ctx->Light._ClampVertexColor = + !ctx->DrawBuffer || !ctx->DrawBuffer->Visual.floatMode; else ctx->Light._ClampVertexColor = ctx->Light.ClampVertexColor; } + +/** + * Update the ctx->Color._ClampReadColor field + */ static void update_clamp_read_color(struct gl_context *ctx) { - if(ctx->Color.ClampReadColor == GL_FIXED_ONLY_ARB) - ctx->Color._ClampReadColor = !ctx->ReadBuffer || !ctx->ReadBuffer->Visual.floatMode; + if (ctx->Color.ClampReadColor == GL_FIXED_ONLY_ARB) + ctx->Color._ClampReadColor = + !ctx->ReadBuffer || !ctx->ReadBuffer->Visual.floatMode; else ctx->Color._ClampReadColor = ctx->Color.ClampReadColor; } diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 97d10122541..26c2ff98ba1 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -233,6 +233,7 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions, const GLint width = texImage->Width; const GLint height = texImage->Height; const GLint depth = texImage->Depth; + const GLenum dataType = _mesa_get_format_datatype(texImage->TexFormat); /* Normally, no pixel transfer ops are performed during glGetTexImage. * The only possible exception is component clamping to [0,1]. */ @@ -248,6 +249,19 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions, return; } + /* Clamping does not apply to GetTexImage (final conversion)? + * Looks like we need clamp though when going from format + * containing negative values to unsigned format. + */ + if (format == GL_LUMINANCE || format == GL_LUMINANCE_ALPHA) { + transferOps |= IMAGE_CLAMP_BIT; + } + else if (!type_with_negative_values(type) && + (dataType == GL_FLOAT || + dataType == GL_SIGNED_NORMALIZED)) { + transferOps |= IMAGE_CLAMP_BIT; + } + /* glGetTexImage always returns sRGB data for sRGB textures. Make sure the * fetch functions return sRGB data without linearizing it. */ @@ -262,20 +276,6 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions, width, height, format, type, img, row, 0); GLint col; - GLenum dataType = _mesa_get_format_datatype(texImage->TexFormat); - - /* clamp does not apply to GetTexImage (final conversion)? - * Looks like we need clamp though when going from format - * containing negative values to unsigned format. - */ - if (format == GL_LUMINANCE || format == GL_LUMINANCE_ALPHA) { - transferOps |= IMAGE_CLAMP_BIT; - } - else if (!type_with_negative_values(type) && - (dataType == GL_FLOAT || - dataType == GL_SIGNED_NORMALIZED)) { - transferOps |= IMAGE_CLAMP_BIT; - } for (col = 0; col < width; col++) { texImage->FetchTexelf(texImage, col, row, img, rgba[col]); diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 0827cb883e8..6f53686e7ff 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1685,11 +1685,15 @@ texture_error_check( struct gl_context *ctx, /* additional checks for depth textures */ if (_mesa_base_tex_format(ctx, internalFormat) == GL_DEPTH_COMPONENT) { - /* Only 1D, 2D and rectangular textures supported, not 3D or cubes */ + /* Only 1D, 2D, rect and array textures supported, not 3D or cubes */ if (target != GL_TEXTURE_1D && target != GL_PROXY_TEXTURE_1D && target != GL_TEXTURE_2D && target != GL_PROXY_TEXTURE_2D && + target != GL_TEXTURE_1D_ARRAY && + target != GL_PROXY_TEXTURE_1D_ARRAY && + target != GL_TEXTURE_2D_ARRAY && + target != GL_PROXY_TEXTURE_2D_ARRAY && target != GL_TEXTURE_RECTANGLE_ARB && target != GL_PROXY_TEXTURE_RECTANGLE_ARB) { if (!isProxy) @@ -3270,7 +3274,7 @@ compressedteximage(struct gl_context *ctx, GLuint dims, border, imageSize, &reason); if (error) { - _mesa_error(ctx, error, "glTexImage2D(%s)", reason); + _mesa_error(ctx, error, "glCompressedTexImage%uD(%s)", dims, reason); return; } diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index fdf12817c9a..565a3a2d8df 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -879,6 +879,8 @@ unbind_texobj_from_fbo(struct gl_context *ctx, for (j = 0; j < BUFFER_COUNT; j++) { if (fb->Attachment[j].Type == GL_TEXTURE && fb->Attachment[j].Texture == texObj) { + /* Vertices are already flushed by _mesa_DeleteTextures */ + ctx->NewState |= _NEW_BUFFERS; _mesa_remove_attachment(ctx, fb->Attachment + j); } } diff --git a/src/mesa/main/texrender.h b/src/mesa/main/texrender.h deleted file mode 100644 index cacd091160e..00000000000 --- a/src/mesa/main/texrender.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef TEXRENDER_H -#define TEXRENDER_H - -struct gl_context; -struct gl_framebuffer; -struct gl_renderbuffer_attachment; - -extern void -_mesa_render_texture(struct gl_context *ctx, - struct gl_framebuffer *fb, - struct gl_renderbuffer_attachment *att); - -extern void -_mesa_finish_render_texture(struct gl_context *ctx, - struct gl_renderbuffer_attachment *att); - - -#endif /* TEXRENDER_H */ diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 6da3e4eb7b4..5c925a3d314 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -3308,10 +3308,12 @@ _mesa_texstore_z24_s8(TEXSTORE_PARAMS) GLint img, row; ASSERT(dstFormat == MESA_FORMAT_Z24_S8); - ASSERT(srcFormat == GL_DEPTH_STENCIL_EXT || srcFormat == GL_DEPTH_COMPONENT); + ASSERT(srcFormat == GL_DEPTH_STENCIL_EXT || + srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX); ASSERT(srcFormat != GL_DEPTH_STENCIL_EXT || srcType == GL_UNSIGNED_INT_24_8_EXT); - if (srcFormat != GL_DEPTH_COMPONENT && ctx->Pixel.DepthScale == 1.0f && + if (srcFormat == GL_DEPTH_STENCIL && ctx->Pixel.DepthScale == 1.0f && ctx->Pixel.DepthBias == 0.0f && !srcPacking->SwapBytes) { /* simple path */ @@ -3322,7 +3324,8 @@ _mesa_texstore_z24_s8(TEXSTORE_PARAMS) srcWidth, srcHeight, srcDepth, srcFormat, srcType, srcAddr, srcPacking); } - else if (srcFormat == GL_DEPTH_COMPONENT) { + else if (srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX) { /* In case we only upload depth we need to preserve the stencil */ for (img = 0; img < srcDepth; img++) { GLuint *dstRow = (GLuint *) dstAddr diff --git a/src/mesa/main/version.h b/src/mesa/main/version.h index 2e6335846e3..0a0512c339d 100644 --- a/src/mesa/main/version.h +++ b/src/mesa/main/version.h @@ -33,9 +33,9 @@ struct gl_context; /* Mesa version */ #define MESA_MAJOR 7 -#define MESA_MINOR 11 +#define MESA_MINOR 12 #define MESA_PATCH 0 -#define MESA_VERSION_STRING "7.11-devel" +#define MESA_VERSION_STRING "7.12-devel" /* To make version comparison easy */ #define MESA_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 00869979dd8..67adb8f3dcd 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1296,8 +1296,11 @@ ir_to_mesa_visitor::visit(ir_expression *ir) emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); break; case ir_unop_i2f: + case ir_unop_u2f: case ir_unop_b2f: case ir_unop_b2i: + case ir_unop_i2u: + case ir_unop_u2i: /* Mesa IR lacks types, ints are stored as truncated floats. */ result_src = op[0]; break; @@ -1335,7 +1338,6 @@ ir_to_mesa_visitor::visit(ir_expression *ir) break; case ir_unop_bit_not: - case ir_unop_u2f: case ir_binop_lshift: case ir_binop_rshift: case ir_binop_bit_and: diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak index 9b2cb1a3c14..4b2ec08bbb0 100644 --- a/src/mesa/sources.mak +++ b/src/mesa/sources.mak @@ -92,7 +92,6 @@ MAIN_SOURCES = \ main/texobj.c \ main/texpal.c \ main/texparam.c \ - main/texrender.c \ main/texstate.c \ main/texstore.c \ main/texturebarrier.c \ @@ -145,6 +144,7 @@ SWRAST_SOURCES = \ swrast/s_stencil.c \ swrast/s_texcombine.c \ swrast/s_texfilter.c \ + swrast/s_texrender.c \ swrast/s_triangle.c \ swrast/s_zoom.c diff --git a/src/mesa/state_tracker/st_atom_blend.c b/src/mesa/state_tracker/st_atom_blend.c index d1844e1066f..4c9a2b95ac3 100644 --- a/src/mesa/state_tracker/st_atom_blend.c +++ b/src/mesa/state_tracker/st_atom_blend.c @@ -156,7 +156,7 @@ translate_logicop(GLenum logicop) * Figure out if colormasks are different per rt. */ static GLboolean -colormask_per_rt(struct gl_context *ctx) +colormask_per_rt(const struct gl_context *ctx) { /* a bit suboptimal have to compare lots of values */ unsigned i; @@ -172,7 +172,7 @@ colormask_per_rt(struct gl_context *ctx) * Figure out if blend enables/state are different per rt. */ static GLboolean -blend_per_rt(struct gl_context *ctx) +blend_per_rt(const struct gl_context *ctx) { if (ctx->Color.BlendEnabled && (ctx->Color.BlendEnabled != ((1 << ctx->Const.MaxDrawBuffers) - 1))) { @@ -190,13 +190,14 @@ static void update_blend( struct st_context *st ) { struct pipe_blend_state *blend = &st->state.blend; + const struct gl_context *ctx = st->ctx; unsigned num_state = 1; unsigned i, j; memset(blend, 0, sizeof(*blend)); - if (blend_per_rt(st->ctx) || colormask_per_rt(st->ctx)) { - num_state = st->ctx->Const.MaxDrawBuffers; + if (blend_per_rt(ctx) || colormask_per_rt(ctx)) { + num_state = ctx->Const.MaxDrawBuffers; blend->independent_blend_enable = 1; } /* Note it is impossible to correctly deal with EXT_blend_logic_op and @@ -205,52 +206,52 @@ update_blend( struct st_context *st ) and separate alpha/rgb logicop/blend support respectively. Neither possible in gallium nor most hardware. Assume these combinations don't happen. */ - if (st->ctx->Color.ColorLogicOpEnabled || - (st->ctx->Color.BlendEnabled && - st->ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) { + if (ctx->Color.ColorLogicOpEnabled || + (ctx->Color.BlendEnabled && + ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) { /* logicop enabled */ blend->logicop_enable = 1; - blend->logicop_func = translate_logicop(st->ctx->Color.LogicOp); + blend->logicop_func = translate_logicop(ctx->Color.LogicOp); } - else if (st->ctx->Color.BlendEnabled) { + else if (ctx->Color.BlendEnabled) { /* blending enabled */ for (i = 0, j = 0; i < num_state; i++) { - blend->rt[i].blend_enable = (st->ctx->Color.BlendEnabled >> i) & 0x1; + blend->rt[i].blend_enable = (ctx->Color.BlendEnabled >> i) & 0x1; - if (st->ctx->Extensions.ARB_draw_buffers_blend) + if (ctx->Extensions.ARB_draw_buffers_blend) j = i; blend->rt[i].rgb_func = - translate_blend(st->ctx->Color.Blend[j].EquationRGB); + translate_blend(ctx->Color.Blend[j].EquationRGB); - if (st->ctx->Color.Blend[i].EquationRGB == GL_MIN || - st->ctx->Color.Blend[i].EquationRGB == GL_MAX) { + if (ctx->Color.Blend[i].EquationRGB == GL_MIN || + ctx->Color.Blend[i].EquationRGB == GL_MAX) { /* Min/max are special */ blend->rt[i].rgb_src_factor = PIPE_BLENDFACTOR_ONE; blend->rt[i].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; } else { blend->rt[i].rgb_src_factor = - translate_blend(st->ctx->Color.Blend[j].SrcRGB); + translate_blend(ctx->Color.Blend[j].SrcRGB); blend->rt[i].rgb_dst_factor = - translate_blend(st->ctx->Color.Blend[j].DstRGB); + translate_blend(ctx->Color.Blend[j].DstRGB); } blend->rt[i].alpha_func = - translate_blend(st->ctx->Color.Blend[j].EquationA); + translate_blend(ctx->Color.Blend[j].EquationA); - if (st->ctx->Color.Blend[i].EquationA == GL_MIN || - st->ctx->Color.Blend[i].EquationA == GL_MAX) { + if (ctx->Color.Blend[i].EquationA == GL_MIN || + ctx->Color.Blend[i].EquationA == GL_MAX) { /* Min/max are special */ blend->rt[i].alpha_src_factor = PIPE_BLENDFACTOR_ONE; blend->rt[i].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; } else { blend->rt[i].alpha_src_factor = - translate_blend(st->ctx->Color.Blend[j].SrcA); + translate_blend(ctx->Color.Blend[j].SrcA); blend->rt[i].alpha_dst_factor = - translate_blend(st->ctx->Color.Blend[j].DstA); + translate_blend(ctx->Color.Blend[j].DstA); } } } @@ -260,25 +261,25 @@ update_blend( struct st_context *st ) /* Colormask - maybe reverse these bits? */ for (i = 0; i < num_state; i++) { - if (st->ctx->Color.ColorMask[i][0]) + if (ctx->Color.ColorMask[i][0]) blend->rt[i].colormask |= PIPE_MASK_R; - if (st->ctx->Color.ColorMask[i][1]) + if (ctx->Color.ColorMask[i][1]) blend->rt[i].colormask |= PIPE_MASK_G; - if (st->ctx->Color.ColorMask[i][2]) + if (ctx->Color.ColorMask[i][2]) blend->rt[i].colormask |= PIPE_MASK_B; - if (st->ctx->Color.ColorMask[i][3]) + if (ctx->Color.ColorMask[i][3]) blend->rt[i].colormask |= PIPE_MASK_A; } - if (st->ctx->Color.DitherFlag) + if (ctx->Color.DitherFlag) blend->dither = 1; - if (st->ctx->Multisample.Enabled) { + if (ctx->Multisample.Enabled) { /* unlike in gallium/d3d10 these operations are only performed if msaa is enabled */ - if (st->ctx->Multisample.SampleAlphaToCoverage) + if (ctx->Multisample.SampleAlphaToCoverage) blend->alpha_to_coverage = 1; - if (st->ctx->Multisample.SampleAlphaToOne) + if (ctx->Multisample.SampleAlphaToOne) blend->alpha_to_one = 1; } @@ -286,7 +287,7 @@ update_blend( struct st_context *st ) { struct pipe_blend_color bc; - COPY_4FV(bc.color, st->ctx->Color.BlendColorUnclamped); + COPY_4FV(bc.color, ctx->Color.BlendColorUnclamped); cso_set_blend_color(st->cso_context, &bc); } } diff --git a/src/mesa/state_tracker/st_atom_clip.c b/src/mesa/state_tracker/st_atom_clip.c index 16f7aaae6f4..1330db843a6 100644 --- a/src/mesa/state_tracker/st_atom_clip.c +++ b/src/mesa/state_tracker/st_atom_clip.c @@ -43,20 +43,21 @@ static void update_clip( struct st_context *st ) { struct pipe_clip_state clip; + const struct gl_context *ctx = st->ctx; GLuint i; memset(&clip, 0, sizeof(clip)); for (i = 0; i < PIPE_MAX_CLIP_PLANES; i++) { - if (st->ctx->Transform.ClipPlanesEnabled & (1 << i)) { + if (ctx->Transform.ClipPlanesEnabled & (1 << i)) { memcpy(clip.ucp[clip.nr], - st->ctx->Transform._ClipUserPlane[i], + ctx->Transform._ClipUserPlane[i], sizeof(clip.ucp[0])); clip.nr++; } } - clip.depth_clamp = st->ctx->Transform.DepthClamp != GL_FALSE; + clip.depth_clamp = ctx->Transform.DepthClamp != GL_FALSE; if (memcmp(&clip, &st->state.clip, sizeof(clip)) != 0) { st->state.clip = clip; diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 95b706cb96c..1f833d28212 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -94,7 +94,7 @@ create_color_map_texture(struct gl_context *ctx) const uint texSize = 256; /* simple, and usually perfect */ /* find an RGBA texture format */ - format = st_choose_format(pipe->screen, GL_RGBA, + format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); /* create texture for color map/table */ diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 06024ad2657..731338f9beb 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -120,87 +120,91 @@ gl_filter_to_img_filter(GLenum filter) } } -static void convert_sampler(struct st_context *st, - struct pipe_sampler_state *sampler, - GLuint texUnit) + +static void +convert_sampler(struct st_context *st, + struct pipe_sampler_state *sampler, + GLuint texUnit) { - struct gl_texture_object *texobj; - struct gl_sampler_object *msamp; - - texobj = st->ctx->Texture.Unit[texUnit]._Current; - if (!texobj) { - texobj = st_get_default_texture(st); - } - - msamp = _mesa_get_samplerobj(st->ctx, texUnit); - - memset(sampler, 0, sizeof(*sampler)); - sampler->wrap_s = gl_wrap_xlate(msamp->WrapS); - sampler->wrap_t = gl_wrap_xlate(msamp->WrapT); - sampler->wrap_r = gl_wrap_xlate(msamp->WrapR); - - sampler->min_img_filter = gl_filter_to_img_filter(msamp->MinFilter); - sampler->min_mip_filter = gl_filter_to_mip_filter(msamp->MinFilter); - sampler->mag_img_filter = gl_filter_to_img_filter(msamp->MagFilter); - - if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB) - sampler->normalized_coords = 1; - - sampler->lod_bias = st->ctx->Texture.Unit[texUnit].LodBias + - msamp->LodBias; - - sampler->min_lod = CLAMP(msamp->MinLod, - 0.0f, - (GLfloat) texobj->MaxLevel - texobj->BaseLevel); - sampler->max_lod = MIN2((GLfloat) texobj->MaxLevel - texobj->BaseLevel, - msamp->MaxLod); - if (sampler->max_lod < sampler->min_lod) { - /* The GL spec doesn't seem to specify what to do in this case. - * Swap the values. - */ - float tmp = sampler->max_lod; - sampler->max_lod = sampler->min_lod; - sampler->min_lod = tmp; - assert(sampler->min_lod <= sampler->max_lod); - } - - if (msamp->BorderColor.ui[0] || - msamp->BorderColor.ui[1] || - msamp->BorderColor.ui[2] || - msamp->BorderColor.ui[3]) { - struct gl_texture_image *teximg; - - teximg = texobj->Image[0][texobj->BaseLevel]; - - st_translate_color(msamp->BorderColor.f, - teximg ? teximg->_BaseFormat : GL_RGBA, - sampler->border_color); - } - - sampler->max_anisotropy = (msamp->MaxAnisotropy == 1.0 ? - 0 : (GLuint) msamp->MaxAnisotropy); - - /* only care about ARB_shadow, not SGI shadow */ - if (msamp->CompareMode == GL_COMPARE_R_TO_TEXTURE) { - sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE; - sampler->compare_func - = st_compare_func_to_pipe(msamp->CompareFunc); - } - - sampler->seamless_cube_map = - st->ctx->Texture.CubeMapSeamless || msamp->CubeMapSeamless; + struct gl_texture_object *texobj; + struct gl_context *ctx = st->ctx; + struct gl_sampler_object *msamp; + + texobj = ctx->Texture.Unit[texUnit]._Current; + if (!texobj) { + texobj = st_get_default_texture(st); + } + + msamp = _mesa_get_samplerobj(ctx, texUnit); + + memset(sampler, 0, sizeof(*sampler)); + sampler->wrap_s = gl_wrap_xlate(msamp->WrapS); + sampler->wrap_t = gl_wrap_xlate(msamp->WrapT); + sampler->wrap_r = gl_wrap_xlate(msamp->WrapR); + + sampler->min_img_filter = gl_filter_to_img_filter(msamp->MinFilter); + sampler->min_mip_filter = gl_filter_to_mip_filter(msamp->MinFilter); + sampler->mag_img_filter = gl_filter_to_img_filter(msamp->MagFilter); + + if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB) + sampler->normalized_coords = 1; + + sampler->lod_bias = ctx->Texture.Unit[texUnit].LodBias + msamp->LodBias; + + sampler->min_lod = CLAMP(msamp->MinLod, + 0.0f, + (GLfloat) texobj->MaxLevel - texobj->BaseLevel); + sampler->max_lod = MIN2((GLfloat) texobj->MaxLevel - texobj->BaseLevel, + msamp->MaxLod); + if (sampler->max_lod < sampler->min_lod) { + /* The GL spec doesn't seem to specify what to do in this case. + * Swap the values. + */ + float tmp = sampler->max_lod; + sampler->max_lod = sampler->min_lod; + sampler->min_lod = tmp; + assert(sampler->min_lod <= sampler->max_lod); + } + + if (msamp->BorderColor.ui[0] || + msamp->BorderColor.ui[1] || + msamp->BorderColor.ui[2] || + msamp->BorderColor.ui[3]) { + struct gl_texture_image *teximg; + + teximg = texobj->Image[0][texobj->BaseLevel]; + + st_translate_color(msamp->BorderColor.f, + teximg ? teximg->_BaseFormat : GL_RGBA, + sampler->border_color); + } + + sampler->max_anisotropy = (msamp->MaxAnisotropy == 1.0 ? + 0 : (GLuint) msamp->MaxAnisotropy); + + /* only care about ARB_shadow, not SGI shadow */ + if (msamp->CompareMode == GL_COMPARE_R_TO_TEXTURE) { + sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE; + sampler->compare_func + = st_compare_func_to_pipe(msamp->CompareFunc); + } + + sampler->seamless_cube_map = + ctx->Texture.CubeMapSeamless || msamp->CubeMapSeamless; } + static void update_vertex_samplers(struct st_context *st) { - struct gl_vertex_program *vprog = st->ctx->VertexProgram._Current; + const struct gl_context *ctx = st->ctx; + struct gl_vertex_program *vprog = ctx->VertexProgram._Current; GLuint su; st->state.num_vertex_samplers = 0; /* loop over sampler units (aka tex image units) */ - for (su = 0; su < st->ctx->Const.MaxVertexTextureImageUnits; su++) { + for (su = 0; su < ctx->Const.MaxVertexTextureImageUnits; su++) { struct pipe_sampler_state *sampler = st->state.vertex_samplers + su; if (vprog->Base.SamplersUsed & (1 << su)) { @@ -220,16 +224,18 @@ update_vertex_samplers(struct st_context *st) cso_single_vertex_sampler_done(st->cso_context); } + static void update_fragment_samplers(struct st_context *st) { - struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current; + const struct gl_context *ctx = st->ctx; + struct gl_fragment_program *fprog = ctx->FragmentProgram._Current; GLuint su; st->state.num_samplers = 0; /* loop over sampler units (aka tex image units) */ - for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) { + for (su = 0; su < ctx->Const.MaxTextureImageUnits; su++) { struct pipe_sampler_state *sampler = st->state.samplers + su; @@ -254,6 +260,7 @@ update_fragment_samplers(struct st_context *st) cso_single_sampler_done(st->cso_context); } + static void update_samplers(struct st_context *st) { @@ -261,6 +268,7 @@ update_samplers(struct st_context *st) update_vertex_samplers(st); } + const struct st_tracked_state st_update_sampler = { "st_update_sampler", /* name */ { /* dirty */ diff --git a/src/mesa/state_tracker/st_atom_scissor.c b/src/mesa/state_tracker/st_atom_scissor.c index 56b1383ae39..eb13877787b 100644 --- a/src/mesa/state_tracker/st_atom_scissor.c +++ b/src/mesa/state_tracker/st_atom_scissor.c @@ -44,7 +44,8 @@ static void update_scissor( struct st_context *st ) { struct pipe_scissor_state scissor; - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const struct gl_context *ctx = st->ctx; + const struct gl_framebuffer *fb = ctx->DrawBuffer; GLint miny, maxy; scissor.minx = 0; @@ -52,15 +53,15 @@ update_scissor( struct st_context *st ) scissor.maxx = fb->Width; scissor.maxy = fb->Height; - if (st->ctx->Scissor.Enabled) { + if (ctx->Scissor.Enabled) { /* need to be careful here with xmax or ymax < 0 */ - GLint xmax = MAX2(0, st->ctx->Scissor.X + st->ctx->Scissor.Width); - GLint ymax = MAX2(0, st->ctx->Scissor.Y + st->ctx->Scissor.Height); + GLint xmax = MAX2(0, ctx->Scissor.X + ctx->Scissor.Width); + GLint ymax = MAX2(0, ctx->Scissor.Y + ctx->Scissor.Height); - if (st->ctx->Scissor.X > (GLint)scissor.minx) - scissor.minx = st->ctx->Scissor.X; - if (st->ctx->Scissor.Y > (GLint)scissor.miny) - scissor.miny = st->ctx->Scissor.Y; + if (ctx->Scissor.X > (GLint)scissor.minx) + scissor.minx = ctx->Scissor.X; + if (ctx->Scissor.Y > (GLint)scissor.miny) + scissor.miny = ctx->Scissor.Y; if (xmax < (GLint) scissor.maxx) scissor.maxx = xmax; diff --git a/src/mesa/state_tracker/st_atom_stipple.c b/src/mesa/state_tracker/st_atom_stipple.c index ecdd9f06f6a..b3e0dc7f1b0 100644 --- a/src/mesa/state_tracker/st_atom_stipple.c +++ b/src/mesa/state_tracker/st_atom_stipple.c @@ -64,17 +64,18 @@ invert_stipple(GLuint dest[32], const GLuint src[32], GLuint winHeight) static void update_stipple( struct st_context *st ) { + const struct gl_context *ctx = st->ctx; const GLuint sz = sizeof(st->state.poly_stipple); - assert(sz == sizeof(st->ctx->PolygonStipple)); + assert(sz == sizeof(ctx->PolygonStipple)); - if (memcmp(st->state.poly_stipple, st->ctx->PolygonStipple, sz)) { + if (memcmp(st->state.poly_stipple, ctx->PolygonStipple, sz)) { /* state has changed */ struct pipe_poly_stipple newStipple; - memcpy(st->state.poly_stipple, st->ctx->PolygonStipple, sz); + memcpy(st->state.poly_stipple, ctx->PolygonStipple, sz); - invert_stipple(newStipple.stipple, st->ctx->PolygonStipple, - st->ctx->DrawBuffer->Height); + invert_stipple(newStipple.stipple, ctx->PolygonStipple, + ctx->DrawBuffer->Height); st->pipe->set_polygon_stipple(st->pipe, &newStipple); } diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 072eb977ebb..800a9f1f0e0 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -187,15 +187,16 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_ GLuint texUnit) { struct pipe_context *pipe = st->pipe; + struct gl_context *ctx = st->ctx; const struct gl_sampler_object *samp; struct gl_texture_object *texObj; struct st_texture_object *stObj; enum pipe_format st_view_format; GLboolean retval; - samp = _mesa_get_samplerobj(st->ctx, texUnit); + samp = _mesa_get_samplerobj(ctx, texUnit); - texObj = st->ctx->Texture.Unit[texUnit]._Current; + texObj = ctx->Texture.Unit[texUnit]._Current; if (!texObj) { texObj = st_get_default_texture(st); @@ -203,7 +204,7 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_ } stObj = st_texture_object(texObj); - retval = st_finalize_texture(st->ctx, st->pipe, texObj); + retval = st_finalize_texture(ctx, st->pipe, texObj); if (!retval) { /* out of mem */ return GL_FALSE; @@ -253,13 +254,14 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_ static void update_vertex_textures(struct st_context *st) { - struct gl_vertex_program *vprog = st->ctx->VertexProgram._Current; + const struct gl_context *ctx = st->ctx; + struct gl_vertex_program *vprog = ctx->VertexProgram._Current; GLuint su; st->state.num_vertex_textures = 0; /* loop over sampler units (aka tex image units) */ - for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) { + for (su = 0; su < ctx->Const.MaxTextureImageUnits; su++) { struct pipe_sampler_view *sampler_view = NULL; if (vprog->Base.SamplersUsed & (1 << su)) { GLboolean retval; @@ -277,9 +279,9 @@ update_vertex_textures(struct st_context *st) pipe_sampler_view_reference(&st->state.sampler_vertex_views[su], sampler_view); } - if (st->ctx->Const.MaxVertexTextureImageUnits > 0) { + if (ctx->Const.MaxVertexTextureImageUnits > 0) { GLuint numUnits = MIN2(st->state.num_vertex_textures, - st->ctx->Const.MaxVertexTextureImageUnits); + ctx->Const.MaxVertexTextureImageUnits); cso_set_vertex_sampler_views(st->cso_context, numUnits, st->state.sampler_vertex_views); @@ -289,13 +291,14 @@ update_vertex_textures(struct st_context *st) static void update_fragment_textures(struct st_context *st) { - struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current; + const struct gl_context *ctx = st->ctx; + struct gl_fragment_program *fprog = ctx->FragmentProgram._Current; GLuint su; st->state.num_textures = 0; /* loop over sampler units (aka tex image units) */ - for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) { + for (su = 0; su < ctx->Const.MaxTextureImageUnits; su++) { struct pipe_sampler_view *sampler_view = NULL; if (fprog->Base.SamplersUsed & (1 << su)) { GLboolean retval; @@ -338,22 +341,23 @@ const struct st_tracked_state st_update_vertex_texture = { static void finalize_textures(struct st_context *st) { - struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current; + struct gl_context *ctx = st->ctx; + struct gl_fragment_program *fprog = ctx->FragmentProgram._Current; const GLboolean prev_missing_textures = st->missing_textures; GLuint su; st->missing_textures = GL_FALSE; - for (su = 0; su < st->ctx->Const.MaxTextureCoordUnits; su++) { + for (su = 0; su < ctx->Const.MaxTextureCoordUnits; su++) { if (fprog->Base.SamplersUsed & (1 << su)) { const GLuint texUnit = fprog->Base.SamplerUnits[su]; struct gl_texture_object *texObj - = st->ctx->Texture.Unit[texUnit]._Current; + = ctx->Texture.Unit[texUnit]._Current; if (texObj) { GLboolean retval; - retval = st_finalize_texture(st->ctx, st->pipe, texObj); + retval = st_finalize_texture(ctx, st->pipe, texObj); if (!retval) { /* out of mem */ st->missing_textures = GL_TRUE; diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 965fbcd1d9e..d61d7ac22be 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -989,8 +989,9 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, /* can we write to stencil if not fallback */ if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) goto stencil_fallback; - + tex_format = st_choose_format(st->pipe->screen, base_format(format), + GL_NONE, GL_NONE, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) @@ -1399,13 +1400,14 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, /* srcFormat can't be used as a texture format */ if (type == GL_DEPTH) { texFormat = st_choose_format(screen, GL_DEPTH_COMPONENT, - st->internal_target, sample_count, - PIPE_BIND_DEPTH_STENCIL); + GL_NONE, GL_NONE, st->internal_target, + sample_count, PIPE_BIND_DEPTH_STENCIL); assert(texFormat != PIPE_FORMAT_NONE); } else { /* default color format */ - texFormat = st_choose_format(screen, GL_RGBA, st->internal_target, + texFormat = st_choose_format(screen, GL_RGBA, + GL_NONE, GL_NONE, st->internal_target, sample_count, PIPE_BIND_SAMPLER_VIEW); assert(texFormat != PIPE_FORMAT_NONE); } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 88f62902b25..6907cfc03cf 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -842,7 +842,7 @@ decompress_with_blit(struct gl_context * ctx, GLenum target, GLint level, else { /* format translation via floats */ GLuint row; - enum pipe_format format = util_format_linear(dst_texture->format); + enum pipe_format pformat = util_format_linear(dst_texture->format); for (row = 0; row < height; row++) { const GLbitfield transferOps = 0x0; /* bypassed for glGetTexImage() */ GLfloat rgba[4 * MAX_WIDTH]; @@ -854,7 +854,7 @@ decompress_with_blit(struct gl_context * ctx, GLenum target, GLint level, /* get float[4] rgba row from surface */ pipe_get_tile_rgba_format(pipe, tex_xfer, 0, row, width, 1, - format, rgba); + pformat, rgba); _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format, type, dest, &ctx->Pack, transferOps); @@ -1241,7 +1241,8 @@ fallback_copy_texsubimage(struct gl_context *ctx, GLenum target, GLint level, src_trans = pipe_get_transfer(pipe, strb->texture, - 0, 0, + strb->rtt_level, + strb->rtt_face + strb->rtt_slice, PIPE_TRANSFER_READ, srcX, srcY, width, height); diff --git a/src/mesa/state_tracker/st_cb_viewport.c b/src/mesa/state_tracker/st_cb_viewport.c index 049755e45c0..d4742eb897d 100644 --- a/src/mesa/state_tracker/st_cb_viewport.c +++ b/src/mesa/state_tracker/st_cb_viewport.c @@ -56,13 +56,20 @@ static void st_viewport(struct gl_context * ctx, GLint x, GLint y, if (!st->invalidate_on_gl_viewport) return; + /* + * Normally we'd want the state tracker manager to mark the drawables + * invalid only when needed. This will force the state tracker manager + * to revalidate the drawable, rather than just update the context with + * the latest cached drawable info. + */ + stdraw = st_ws_framebuffer(st->ctx->DrawBuffer); stread = st_ws_framebuffer(st->ctx->ReadBuffer); - if (stdraw) - p_atomic_set(&stdraw->revalidate, TRUE); - if (stread && stread != stdraw) - p_atomic_set(&stread->revalidate, TRUE); + if (stdraw && stdraw->iface) + stdraw->iface_stamp = p_atomic_read(&stdraw->iface->stamp) - 1; + if (stread && stread != stdraw && stread->iface) + stread->iface_stamp = p_atomic_read(&stread->iface->stamp) - 1; } void st_init_viewport_functions(struct dd_function_table *functions) diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index ff207039d78..0a322022149 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -204,6 +204,9 @@ struct st_context /* Active render condition. */ struct pipe_query *render_condition; unsigned condition_mode; + + int32_t draw_stamp; + int32_t read_stamp; }; @@ -227,7 +230,8 @@ struct st_framebuffer struct st_framebuffer_iface *iface; enum st_attachment_type statts[ST_ATTACHMENT_COUNT]; unsigned num_statts; - int32_t revalidate; + int32_t stamp; + int32_t iface_stamp; }; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 56955d357b1..5040c6fa5ab 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -33,7 +33,7 @@ * * We basically convert the VBO's vertex attribute/array information into * Gallium vertex state, bind the vertex buffer objects and call - * pipe->draw_elements(), pipe->draw_range_elements() or pipe->draw_arrays(). + * pipe->draw_vbo(). * * Authors: * Keith Whitwell <[email protected]> @@ -233,11 +233,26 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format, } +/** + * This is very similar to vbo_all_varyings_in_vbos() but we test + * the stride. See bug 38626. + */ +static GLboolean +all_varyings_in_vbos(const struct gl_client_array *arrays[]) +{ + GLuint i; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) + if (arrays[i]->StrideB && !_mesa_is_bufferobj(arrays[i]->BufferObj)) + return GL_FALSE; + + return GL_TRUE; +} + /** * Examine the active arrays to determine if we have interleaved * vertex arrays all living in one VBO, or all living in user space. - * \param userSpace returns whether the arrays are in user space. */ static GLboolean is_interleaved_arrays(const struct st_vertex_program *vp, @@ -247,8 +262,8 @@ is_interleaved_arrays(const struct st_vertex_program *vp, GLuint attr; const struct gl_buffer_object *firstBufObj = NULL; GLint firstStride = -1; - const GLubyte *client_addr = NULL; - GLboolean user_memory = GL_FALSE; + const GLubyte *firstPtr = NULL; + GLboolean userSpaceBuffer = GL_FALSE; for (attr = 0; attr < vpv->num_inputs; attr++) { const GLuint mesaAttr = vp->index_to_input[attr]; @@ -256,37 +271,26 @@ is_interleaved_arrays(const struct st_vertex_program *vp, const struct gl_buffer_object *bufObj = array->BufferObj; const GLsizei stride = array->StrideB; /* in bytes */ - if (firstStride < 0) { + if (attr == 0) { + /* save info about the first array */ firstStride = stride; - user_memory = !bufObj || !bufObj->Name; - } - else if (firstStride != stride) { - return GL_FALSE; - } - - if (!bufObj || !bufObj->Name) { - /* Try to detect if the client-space arrays are - * "close" to each other. - */ - if (!user_memory) { - return GL_FALSE; - } - if (!client_addr) { - client_addr = array->Ptr; - } - else if (abs(array->Ptr - client_addr) > firstStride) { - /* arrays start too far apart */ - return GL_FALSE; - } - } - else if (!firstBufObj) { - if (user_memory) { - return GL_FALSE; - } + firstPtr = array->Ptr; firstBufObj = bufObj; + userSpaceBuffer = !bufObj || !bufObj->Name; } - else if (bufObj != firstBufObj) { - return GL_FALSE; + else { + /* check if other arrays interleave with the first, in same buffer */ + if (stride != firstStride) + return GL_FALSE; /* strides don't match */ + + if (bufObj != firstBufObj) + return GL_FALSE; /* arrays in different VBOs */ + + if (abs(array->Ptr - firstPtr) > firstStride) + return GL_FALSE; /* arrays start too far apart */ + + if ((!bufObj || !_mesa_is_bufferobj(bufObj)) != userSpaceBuffer) + return GL_FALSE; /* mix of VBO and user-space arrays */ } } @@ -510,6 +514,7 @@ setup_index_buffer(struct gl_context *ctx, } } + /** * Prior to drawing, check that any uniforms referenced by the * current shader have been set. If a uniform has not been set, @@ -556,8 +561,8 @@ translate_prim(const struct gl_context *ctx, unsigned prim) assert(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY); /* Avoid quadstrips if it's easy to do so: - * Note: it's imporant to do the correct trimming if we change the prim type! - * We do that wherever this function is called. + * Note: it's important to do the correct trimming if we change the + * prim type! We do that wherever this function is called. */ if (prim == GL_QUAD_STRIP && ctx->Light.ShadeModel != GL_FLAT && @@ -650,7 +655,8 @@ st_draw_vbo(struct gl_context *ctx, struct pipe_draw_info info; unsigned i, num_instances = 1; GLboolean new_array = - st->dirty.st && (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM)) != 0; + st->dirty.st && + (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM | _NEW_BUFFER_OBJECT)) != 0; /* Mesa core state should have been validated already */ assert(ctx->NewState == 0x0); @@ -658,7 +664,7 @@ st_draw_vbo(struct gl_context *ctx, if (ib) { /* Gallium probably doesn't want this in some cases. */ if (!index_bounds_valid) - if (!vbo_all_varyings_in_vbos(arrays)) + if (!all_varyings_in_vbos(arrays)) vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); for (i = 0; i < nr_prims; i++) { @@ -738,8 +744,8 @@ st_draw_vbo(struct gl_context *ctx, } } - info.primitive_restart = st->ctx->Array.PrimitiveRestart; - info.restart_index = st->ctx->Array.RestartIndex; + info.primitive_restart = ctx->Array.PrimitiveRestart; + info.restart_index = ctx->Array.RestartIndex; /* do actual drawing */ for (i = 0; i < nr_prims; i++) { diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 35835712547..fa5d8f5050a 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -608,7 +608,7 @@ struct format_mapping * Multiple GL enums might map to multiple pipe_formats. * The first pipe format in the list that's supported is the one that's chosen. */ -static struct format_mapping format_map[] = { +static const struct format_mapping format_map[] = { /* Basic RGB, RGBA formats */ { { GL_RGB10, GL_RGB10_A2, 0 }, @@ -616,7 +616,7 @@ static struct format_mapping format_map[] = { }, { { 4, GL_RGBA, GL_RGBA8, 0 }, - { DEFAULT_RGBA_FORMATS, 0 } + { PIPE_FORMAT_R8G8B8A8_UNORM, DEFAULT_RGBA_FORMATS } }, { { GL_BGRA, 0 }, @@ -624,7 +624,7 @@ static struct format_mapping format_map[] = { }, { { 3, GL_RGB, GL_RGB8, 0 }, - { DEFAULT_RGB_FORMATS, 0 } + { DEFAULT_RGB_FORMATS } }, { { GL_RGB12, GL_RGB16, GL_RGBA12, GL_RGBA16, 0 }, @@ -1108,7 +1108,7 @@ static struct format_mapping format_map[] = { * Return first supported format from the given list. */ static enum pipe_format -find_supported_format(struct pipe_screen *screen, +find_supported_format(struct pipe_screen *screen, const enum pipe_format formats[], enum pipe_texture_target target, unsigned sample_count, @@ -1124,6 +1124,91 @@ find_supported_format(struct pipe_screen *screen, return PIPE_FORMAT_NONE; } +struct exact_format_mapping +{ + GLenum format; + GLenum type; + enum pipe_format pformat; +}; + +static const struct exact_format_mapping rgba8888_tbl[] = +{ + { GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_A8B8G8R8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_A8B8G8R8_UNORM }, + { GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_R8G8B8A8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_R8G8B8A8_UNORM }, + { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_A8R8G8B8_UNORM }, + { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_B8G8R8A8_UNORM }, + { GL_RGBA, GL_UNSIGNED_BYTE, PIPE_FORMAT_R8G8B8A8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_BYTE, PIPE_FORMAT_A8B8G8R8_UNORM }, + { GL_BGRA, GL_UNSIGNED_BYTE, PIPE_FORMAT_B8G8R8A8_UNORM }, + { 0, 0, 0 } +}; + +static const struct exact_format_mapping rgbx8888_tbl[] = +{ + { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_X8R8G8B8_UNORM }, + { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_B8G8R8X8_UNORM }, + { GL_BGRA, GL_UNSIGNED_BYTE, PIPE_FORMAT_B8G8R8X8_UNORM }, + /* No Mesa formats for these Gallium formats: + { GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_X8B8G8R8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_X8B8G8R8_UNORM }, + { GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_R8G8B8X8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_R8G8B8X8_UNORM }, + { GL_RGBA, GL_UNSIGNED_BYTE, PIPE_FORMAT_R8G8B8X8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_BYTE, PIPE_FORMAT_X8B8G8R8_UNORM }, + */ + { 0, 0, 0 } +}; + +static const struct exact_format_mapping rgba1010102_tbl[] = +{ + { GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV, PIPE_FORMAT_B10G10R10A2_UNORM }, + /* No Mesa formats for these Gallium formats: + { GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, PIPE_FORMAT_R10G10B10A2_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT_10_10_10_2, PIPE_FORMAT_R10G10B10A2_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT, PIPE_FORMAT_R10G10B10A2_UNORM }, + */ + { 0, 0, 0 } +}; + +/** + * If there is an exact pipe_format match for {internalFormat, format, type} + * return that, otherwise return PIPE_FORMAT_NONE so we can do fuzzy matching. + */ +static enum pipe_format +find_exact_format(GLint internalFormat, GLenum format, GLenum type) +{ + uint i; + const struct exact_format_mapping* tbl; + + if (format == GL_NONE || type == GL_NONE) + return PIPE_FORMAT_NONE; + + switch (internalFormat) { + case 4: + case GL_RGBA: + case GL_RGBA8: + tbl = rgba8888_tbl; + break; + case 3: + case GL_RGB: + case GL_RGB8: + tbl = rgbx8888_tbl; + break; + case GL_RGB10_A2: + tbl = rgba1010102_tbl; + break; + default: + return PIPE_FORMAT_NONE; + } + + for (i = 0; tbl[i].format; i++) + if (tbl[i].format == format && tbl[i].type == type) + return tbl[i].pformat; + + return PIPE_FORMAT_NONE; +} /** * Given an OpenGL internalFormat value for a texture or surface, return @@ -1140,11 +1225,13 @@ find_supported_format(struct pipe_screen *screen, */ enum pipe_format st_choose_format(struct pipe_screen *screen, GLenum internalFormat, + GLenum format, GLenum type, enum pipe_texture_target target, unsigned sample_count, unsigned bindings) { GET_CURRENT_CONTEXT(ctx); /* XXX this should be a function parameter */ int i, j; + enum pipe_format pf; /* can't render to compressed formats at this time */ if (_mesa_is_compressed_format(ctx, internalFormat) @@ -1152,6 +1239,13 @@ st_choose_format(struct pipe_screen *screen, GLenum internalFormat, return PIPE_FORMAT_NONE; } + /* search for exact matches */ + pf = find_exact_format(internalFormat, format, type); + if (pf != PIPE_FORMAT_NONE && + screen->is_format_supported(screen, pf, + target, sample_count, bindings)) + return pf; + /* search table for internalFormat */ for (i = 0; i < Elements(format_map); i++) { const struct format_mapping *mapping = &format_map[i]; @@ -1183,14 +1277,11 @@ st_choose_renderbuffer_format(struct pipe_screen *screen, usage = PIPE_BIND_DEPTH_STENCIL; else usage = PIPE_BIND_RENDER_TARGET; - return st_choose_format(screen, internalFormat, PIPE_TEXTURE_2D, + return st_choose_format(screen, internalFormat, GL_NONE, GL_NONE, PIPE_TEXTURE_2D, sample_count, usage); } -/** - * Called via ctx->Driver.chooseTextureFormat(). - */ gl_format st_ChooseTextureFormat_renderable(struct gl_context *ctx, GLint internalFormat, GLenum format, GLenum type, GLboolean renderable) @@ -1206,20 +1297,19 @@ st_ChooseTextureFormat_renderable(struct gl_context *ctx, GLint internalFormat, * that in advance. Specify potential render target flags now. */ bindings = PIPE_BIND_SAMPLER_VIEW; - if (renderable == GL_TRUE) { - if (_mesa_is_depth_format(internalFormat) || - _mesa_is_depth_or_stencil_format(internalFormat)) + if (renderable) { + if (_mesa_is_depth_or_stencil_format(internalFormat)) bindings |= PIPE_BIND_DEPTH_STENCIL; - else + else bindings |= PIPE_BIND_RENDER_TARGET; } - pFormat = st_choose_format(screen, internalFormat, + pFormat = st_choose_format(screen, internalFormat, format, type, PIPE_TEXTURE_2D, 0, bindings); if (pFormat == PIPE_FORMAT_NONE) { /* try choosing format again, this time without render target bindings */ - pFormat = st_choose_format(screen, internalFormat, + pFormat = st_choose_format(screen, internalFormat, format, type, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); } @@ -1231,6 +1321,10 @@ st_ChooseTextureFormat_renderable(struct gl_context *ctx, GLint internalFormat, return st_pipe_format_to_mesa_format(pFormat); } + +/** + * Called via ctx->Driver.ChooseTextureFormat(). + */ gl_format st_ChooseTextureFormat(struct gl_context *ctx, GLint internalFormat, GLenum format, GLenum type) diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h index 0fb570f6ee4..1c1f5965f66 100644 --- a/src/mesa/state_tracker/st_format.h +++ b/src/mesa/state_tracker/st_format.h @@ -52,8 +52,9 @@ st_pipe_format_to_mesa_format(enum pipe_format pipeFormat); extern enum pipe_format st_choose_format(struct pipe_screen *screen, GLenum internalFormat, + GLenum format, GLenum type, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage); + unsigned bindings); extern enum pipe_format st_choose_renderbuffer_format(struct pipe_screen *screen, diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index a68544ddac7..a8c4b5c3f49 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -139,23 +139,64 @@ buffer_index_to_attachment(gl_buffer_index index) } /** + * Make sure a context picks up the latest cached state of the + * drawables it binds to. + */ +static void +st_context_validate(struct st_context *st, + struct st_framebuffer *stdraw, + struct st_framebuffer *stread) +{ + if (stdraw && stdraw->stamp != st->draw_stamp) { + st->dirty.st |= ST_NEW_FRAMEBUFFER; + _mesa_resize_framebuffer(st->ctx, &stdraw->Base, + stdraw->Base.Width, + stdraw->Base.Height); + st->draw_stamp = stdraw->stamp; + } + + if (stread && stread->stamp != st->read_stamp) { + if (stread != stdraw) { + st->dirty.st |= ST_NEW_FRAMEBUFFER; + _mesa_resize_framebuffer(st->ctx, &stread->Base, + stread->Base.Width, + stread->Base.Height); + } + st->read_stamp = stread->stamp; + } +} + +/** * Validate a framebuffer to make sure up-to-date pipe_textures are used. + * The context we need to pass in is s dummy context needed only to be + * able to get a pipe context to create pipe surfaces, and to have a + * context to call _mesa_resize_framebuffer(): + * (That should probably be rethought, since those surfaces become + * drawable state, not context state, and can be freed by another pipe + * context). */ static void -st_framebuffer_validate(struct st_framebuffer *stfb, struct st_context *st) +st_framebuffer_validate(struct st_framebuffer *stfb, + struct st_context *st) { - struct pipe_context *pipe = st->pipe; struct pipe_resource *textures[ST_ATTACHMENT_COUNT]; uint width, height; unsigned i; boolean changed = FALSE; + int32_t new_stamp = p_atomic_read(&stfb->iface->stamp); - if (!p_atomic_read(&stfb->revalidate)) + if (stfb->iface_stamp == new_stamp) return; /* validate the fb */ - if (!stfb->iface->validate(stfb->iface, stfb->statts, stfb->num_statts, textures)) - return; + do { + if (!stfb->iface->validate(stfb->iface, stfb->statts, + stfb->num_statts, textures)) + return; + + stfb->iface_stamp = new_stamp; + new_stamp = p_atomic_read(&stfb->iface->stamp); + } while(stfb->iface_stamp != new_stamp); width = stfb->Base.Width; height = stfb->Base.Height; @@ -184,7 +225,7 @@ st_framebuffer_validate(struct st_framebuffer *stfb, struct st_context *st) memset(&surf_tmpl, 0, sizeof(surf_tmpl)); u_surface_default_template(&surf_tmpl, textures[i], PIPE_BIND_RENDER_TARGET); - ps = pipe->create_surface(pipe, textures[i], &surf_tmpl); + ps = st->pipe->create_surface(st->pipe, textures[i], &surf_tmpl); if (ps) { pipe_surface_reference(&strb->surface, ps); pipe_resource_reference(&strb->texture, ps->texture); @@ -204,14 +245,9 @@ st_framebuffer_validate(struct st_framebuffer *stfb, struct st_context *st) } if (changed) { - st->dirty.st |= ST_NEW_FRAMEBUFFER; + ++stfb->stamp; _mesa_resize_framebuffer(st->ctx, &stfb->Base, width, height); - - assert(stfb->Base.Width == width); - assert(stfb->Base.Height == height); } - - p_atomic_set(&stfb->revalidate, FALSE); } /** @@ -236,8 +272,7 @@ st_framebuffer_update_attachments(struct st_framebuffer *stfb) st_visual_have_buffers(stfb->iface->visual, 1 << statt)) stfb->statts[stfb->num_statts++] = statt; } - - p_atomic_set(&stfb->revalidate, TRUE); + stfb->stamp++; } /** @@ -443,6 +478,7 @@ st_framebuffer_create(struct st_framebuffer_iface *stfbi) &stfb->Base._ColorReadBufferIndex); stfb->iface = stfbi; + stfb->iface_stamp = p_atomic_read(&stfbi->stamp) - 1; /* add the color buffer */ idx = stfb->Base._ColorDrawBufferIndexes[0]; @@ -454,6 +490,7 @@ st_framebuffer_create(struct st_framebuffer_iface *stfbi) st_framebuffer_add_renderbuffer(stfb, BUFFER_DEPTH); st_framebuffer_add_renderbuffer(stfb, BUFFER_ACCUM); + stfb->stamp = 0; st_framebuffer_update_attachments(stfb); stfb->Base.Initialized = GL_TRUE; @@ -473,31 +510,6 @@ st_framebuffer_reference(struct st_framebuffer **ptr, } static void -st_context_notify_invalid_framebuffer(struct st_context_iface *stctxi, - struct st_framebuffer_iface *stfbi) -{ - struct st_context *st = (struct st_context *) stctxi; - struct st_framebuffer *stfb; - - /* either draw or read winsys fb */ - stfb = st_ws_framebuffer(st->ctx->WinSysDrawBuffer); - if (!stfb || stfb->iface != stfbi) - stfb = st_ws_framebuffer(st->ctx->WinSysReadBuffer); - - if (stfb && stfb->iface == stfbi) { - p_atomic_set(&stfb->revalidate, TRUE); - } - else { - /* This function is probably getting called when we've detected a - * change in a window's size but the currently bound context is - * not bound to that window. - * If the st_framebuffer_iface structure had a pointer to the - * corresponding st_framebuffer we'd be able to handle this. - */ - } -} - -static void st_context_flush(struct st_context_iface *stctxi, unsigned flags, struct pipe_fence_handle **fence) { @@ -696,8 +708,6 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, smapi->get_param(smapi, ST_MANAGER_BROKEN_INVALIDATE); st->iface.destroy = st_context_destroy; - st->iface.notify_invalid_framebuffer = - st_context_notify_invalid_framebuffer; st->iface.flush = st_context_flush; st->iface.teximage = st_context_teximage; st->iface.copy = st_context_copy; @@ -707,38 +717,58 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, return &st->iface; } +static struct st_context_iface * +st_api_get_current(struct st_api *stapi) +{ + GET_CURRENT_CONTEXT(ctx); + struct st_context *st = (ctx) ? ctx->st : NULL; + + return (st) ? &st->iface : NULL; +} + +static struct st_framebuffer * +st_framebuffer_reuse_or_create(struct gl_framebuffer *fb, + struct st_framebuffer_iface *stfbi) +{ + struct st_framebuffer *cur = st_ws_framebuffer(fb), *stfb = NULL; + + if (cur && cur->iface == stfbi) { + /* reuse the current stfb */ + st_framebuffer_reference(&stfb, cur); + } + else { + /* create a new one */ + stfb = st_framebuffer_create(stfbi); + } + + return stfb; +} + static boolean st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi, struct st_framebuffer_iface *stdrawi, struct st_framebuffer_iface *streadi) { struct st_context *st = (struct st_context *) stctxi; - struct st_framebuffer *stdraw, *stread, *stfb; + struct st_framebuffer *stdraw, *stread; boolean ret; _glapi_check_multithread(); if (st) { - /* reuse/create the draw fb */ - stfb = st_ws_framebuffer(st->ctx->WinSysDrawBuffer); - if (stfb && stfb->iface == stdrawi) { - stdraw = NULL; - st_framebuffer_reference(&stdraw, stfb); + /* reuse or create the draw fb */ + stdraw = st_framebuffer_reuse_or_create(st->ctx->WinSysDrawBuffer, + stdrawi); + if (streadi != stdrawi) { + /* do the same for the read fb */ + stread = st_framebuffer_reuse_or_create(st->ctx->WinSysReadBuffer, + streadi); } else { - stdraw = st_framebuffer_create(stdrawi); - } - - /* reuse/create the read fb */ - stfb = st_ws_framebuffer(st->ctx->WinSysReadBuffer); - if (!stfb || stfb->iface != streadi) - stfb = stdraw; - if (stfb && stfb->iface == streadi) { stread = NULL; - st_framebuffer_reference(&stread, stfb); - } - else { - stread = st_framebuffer_create(streadi); + /* reuse the draw fb for the read fb */ + if (stdraw) + st_framebuffer_reference(&stread, stdraw); } if (stdraw && stread) { @@ -757,6 +787,10 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi, } ret = _mesa_make_current(st->ctx, &stdraw->Base, &stread->Base); + + st->draw_stamp = stdraw->stamp - 1; + st->read_stamp = stread->stamp - 1; + st_context_validate(st, stdraw, stread); } else { struct gl_framebuffer *incomplete = _mesa_get_incomplete_framebuffer(); @@ -773,15 +807,6 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi, return ret; } -static struct st_context_iface * -st_api_get_current(struct st_api *stapi) -{ - GET_CURRENT_CONTEXT(ctx); - struct st_context *st = (ctx) ? ctx->st : NULL; - - return (st) ? &st->iface : NULL; -} - static st_proc_t st_api_get_proc_address(struct st_api *stapi, const char *procname) { @@ -857,6 +882,8 @@ st_manager_validate_framebuffers(struct st_context *st) st_framebuffer_validate(stdraw, st); if (stread && stread != stdraw) st_framebuffer_validate(stread, st); + + st_context_validate(st, stdraw, stread); } /** diff --git a/src/mesa/main/texrender.c b/src/mesa/swrast/s_texrender.c index a7641a5f9a4..52d03c92ac7 100644 --- a/src/mesa/main/texrender.c +++ b/src/mesa/swrast/s_texrender.c @@ -1,12 +1,12 @@ -#include "context.h" -#include "colormac.h" -#include "fbobject.h" -#include "macros.h" -#include "texfetch.h" -#include "teximage.h" -#include "texrender.h" -#include "renderbuffer.h" +#include "main/context.h" +#include "main/colormac.h" +#include "main/fbobject.h" +#include "main/macros.h" +#include "main/texfetch.h" +#include "main/teximage.h" +#include "main/renderbuffer.h" +#include "swrast/swrast.h" /* @@ -628,9 +628,9 @@ update_wrapper(struct gl_context *ctx, struct gl_renderbuffer_attachment *att) * \sa _mesa_framebuffer_renderbuffer */ void -_mesa_render_texture(struct gl_context *ctx, - struct gl_framebuffer *fb, - struct gl_renderbuffer_attachment *att) +_swrast_render_texture(struct gl_context *ctx, + struct gl_framebuffer *fb, + struct gl_renderbuffer_attachment *att) { (void) fb; @@ -642,8 +642,8 @@ _mesa_render_texture(struct gl_context *ctx, void -_mesa_finish_render_texture(struct gl_context *ctx, - struct gl_renderbuffer_attachment *att) +_swrast_finish_render_texture(struct gl_context *ctx, + struct gl_renderbuffer_attachment *att) { /* do nothing */ /* The renderbuffer texture wrapper will get deleted by the diff --git a/src/mesa/swrast/swrast.h b/src/mesa/swrast/swrast.h index 9b88c70220e..27b74c32486 100644 --- a/src/mesa/swrast/swrast.h +++ b/src/mesa/swrast/swrast.h @@ -206,6 +206,16 @@ extern void _swrast_eject_texture_images(struct gl_context *ctx); +extern void +_swrast_render_texture(struct gl_context *ctx, + struct gl_framebuffer *fb, + struct gl_renderbuffer_attachment *att); + +extern void +_swrast_finish_render_texture(struct gl_context *ctx, + struct gl_renderbuffer_attachment *att); + + /** * The driver interface for the software rasterizer. diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 5e9b2798c43..7959337decb 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -337,7 +337,8 @@ print_draw_arrays(struct gl_context *ctx, mode, start, count); for (i = 0; i < 32; i++) { - GLuint bufName = exec->array.inputs[i]->BufferObj->Name; + struct gl_buffer_object *bufObj = exec->array.inputs[i]->BufferObj; + GLuint bufName = bufObj->Name; GLint stride = exec->array.inputs[i]->Stride; printf("attr %2d: size %d stride %d enabled %d " "ptr %p Bufobj %u\n", @@ -350,9 +351,8 @@ print_draw_arrays(struct gl_context *ctx, bufName); if (bufName) { - struct gl_buffer_object *buf = _mesa_lookup_bufferobj(ctx, bufName); GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB, - GL_READ_ONLY_ARB, buf); + GL_READ_ONLY_ARB, bufObj); int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr; float *f = (float *) (p + offset); int *k = (int *) f; @@ -364,7 +364,7 @@ print_draw_arrays(struct gl_context *ctx, for (i = 0; i < n; i++) { printf(" float[%d] = 0x%08x %f\n", i, k[i], f[i]); } - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, buf); + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, bufObj); } } } diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c index 9068ae240a6..1de290ff602 100644 --- a/src/mesa/vbo/vbo_rebase.c +++ b/src/mesa/vbo/vbo_rebase.c @@ -78,8 +78,7 @@ GLboolean vbo_all_varyings_in_vbos( const struct gl_client_array *arrays[] ) GLuint i; for (i = 0; i < VERT_ATTRIB_MAX; i++) - if (arrays[i]->StrideB && - arrays[i]->BufferObj->Name == 0) + if (arrays[i]->BufferObj->Name == 0) return GL_FALSE; return GL_TRUE; @@ -90,8 +89,7 @@ GLboolean vbo_any_varyings_in_vbos( const struct gl_client_array *arrays[] ) GLuint i; for (i = 0; i < VERT_ATTRIB_MAX; i++) - if (arrays[i]->StrideB && - arrays[i]->BufferObj->Name != 0) + if (arrays[i]->BufferObj->Name != 0) return GL_TRUE; return GL_FALSE; |