diff options
Diffstat (limited to 'src/mesa')
53 files changed, 1225 insertions, 875 deletions
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 629ec0ffec5..c548e104203 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -2400,6 +2400,9 @@ _mesa_meta_GenerateMipmap(GLcontext *ctx, GLenum target, break; } + /* Set MaxLevel large enough to hold the new level when we allocate it */ + _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, dstLevel); + /* Create empty dest image */ if (target == GL_TEXTURE_1D) { _mesa_TexImage1D(target, dstLevel, srcImage->InternalFormat, diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index a0039e800d2..831981558d8 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -61,6 +61,7 @@ DRIVER_SOURCES = \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ + brw_state.c \ brw_state_batch.c \ brw_state_cache.c \ brw_state_dump.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index c9e42a1529b..cfce5d31405 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -36,7 +36,8 @@ #include "brw_util.h" #include "main/macros.h" -static void prepare_cc_vp( struct brw_context *brw ) +void +brw_update_cc_vp(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct brw_cc_viewport ccv; @@ -54,40 +55,9 @@ static void prepare_cc_vp( struct brw_context *brw ) } drm_intel_bo_unreference(brw->cc.vp_bo); - brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv), - NULL, 0); + brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv)); } -const struct brw_tracked_state brw_cc_vp = { - .dirty = { - .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, - .brw = BRW_NEW_CONTEXT, - .cache = 0 - }, - .prepare = prepare_cc_vp -}; - -struct brw_cc_unit_key { - GLboolean stencil, stencil_two_side, color_blend, alpha_enabled; - - GLenum stencil_func[2], stencil_fail_op[2]; - GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; - GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2]; - GLenum logic_op; - - GLenum blend_eq_rgb, blend_eq_a; - GLenum blend_src_rgb, blend_src_a; - GLenum blend_dst_rgb, blend_dst_a; - - GLenum alpha_func; - GLclampf alpha_ref; - - GLboolean dither; - - GLboolean depth_test, depth_write; - GLenum depth_func; -}; - /** * Modify blend function to force destination alpha to 1.0 * @@ -110,136 +80,83 @@ fix_xRGB_alpha(GLenum function) return function; } -static void -cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) +static void prepare_cc_unit(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; - const unsigned back = ctx->Stencil._BackFace; - - memset(key, 0, sizeof(*key)); - - key->stencil = ctx->Stencil._Enabled; - key->stencil_two_side = ctx->Stencil._TestTwoSide; - - if (key->stencil) { - key->stencil_func[0] = ctx->Stencil.Function[0]; - key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; - key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; - key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; - key->stencil_ref[0] = ctx->Stencil.Ref[0]; - key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; - key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; - } - if (key->stencil_two_side) { - key->stencil_func[1] = ctx->Stencil.Function[back]; - key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; - key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; - key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; - key->stencil_ref[1] = ctx->Stencil.Ref[back]; - key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; - key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; - } - - if (ctx->Color._LogicOpEnabled) - key->logic_op = ctx->Color.LogicOp; - else - key->logic_op = GL_COPY; - - key->color_blend = ctx->Color.BlendEnabled; - if (key->color_blend) { - key->blend_eq_rgb = ctx->Color.BlendEquationRGB; - key->blend_eq_a = ctx->Color.BlendEquationA; - key->blend_src_rgb = ctx->Color.BlendSrcRGB; - key->blend_dst_rgb = ctx->Color.BlendDstRGB; - key->blend_src_a = ctx->Color.BlendSrcA; - key->blend_dst_a = ctx->Color.BlendDstA; - - /* If the renderbuffer is XRGB, we have to frob the blend function to - * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA - * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO. - */ - if (ctx->DrawBuffer->Visual.alphaBits == 0) { - key->blend_src_rgb = fix_xRGB_alpha(key->blend_src_rgb); - key->blend_src_a = fix_xRGB_alpha(key->blend_src_a); - key->blend_dst_rgb = fix_xRGB_alpha(key->blend_dst_rgb); - key->blend_dst_a = fix_xRGB_alpha(key->blend_dst_a); - } - } - - key->alpha_enabled = ctx->Color.AlphaEnabled; - if (key->alpha_enabled) { - key->alpha_func = ctx->Color.AlphaFunc; - key->alpha_ref = ctx->Color.AlphaRef; - } - - key->dither = ctx->Color.DitherFlag; - - key->depth_test = ctx->Depth.Test; - if (key->depth_test) { - key->depth_func = ctx->Depth.Func; - key->depth_write = ctx->Depth.Mask; - } + brw_add_validated_bo(brw, brw->cc.vp_bo); } /** * Creates the state cache entry for the given CC unit key. */ -static drm_intel_bo * -cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) +static void upload_cc_unit(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct brw_cc_unit_state cc; - drm_intel_bo *bo; + void *map; memset(&cc, 0, sizeof(cc)); /* _NEW_STENCIL */ - if (key->stencil) { + if (ctx->Stencil._Enabled) { + const unsigned back = ctx->Stencil._BackFace; + cc.cc0.stencil_enable = 1; cc.cc0.stencil_func = - intel_translate_compare_func(key->stencil_func[0]); + intel_translate_compare_func(ctx->Stencil.Function[0]); cc.cc0.stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[0]); + intel_translate_stencil_op(ctx->Stencil.FailFunc[0]); cc.cc0.stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]); cc.cc0.stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); - cc.cc1.stencil_ref = key->stencil_ref[0]; - cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; - cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; + intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]); + cc.cc1.stencil_ref = ctx->Stencil.Ref[0]; + cc.cc1.stencil_write_mask = ctx->Stencil.WriteMask[0]; + cc.cc1.stencil_test_mask = ctx->Stencil.ValueMask[0]; - if (key->stencil_two_side) { + if (ctx->Stencil._TestTwoSide) { cc.cc0.bf_stencil_enable = 1; cc.cc0.bf_stencil_func = - intel_translate_compare_func(key->stencil_func[1]); + intel_translate_compare_func(ctx->Stencil.Function[back]); cc.cc0.bf_stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[1]); + intel_translate_stencil_op(ctx->Stencil.FailFunc[back]); cc.cc0.bf_stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]); cc.cc0.bf_stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); - cc.cc1.bf_stencil_ref = key->stencil_ref[1]; - cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; - cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; + intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]); + cc.cc1.bf_stencil_ref = ctx->Stencil.Ref[back]; + cc.cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back]; + cc.cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back]; } /* Not really sure about this: */ - if (key->stencil_write_mask[0] || - (key->stencil_two_side && key->stencil_write_mask[1])) + if (ctx->Stencil.WriteMask[0] || + (ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back])) cc.cc0.stencil_write_enable = 1; } /* _NEW_COLOR */ - if (key->logic_op != GL_COPY) { + if (ctx->Color._LogicOpEnabled && ctx->Color.LogicOp != GL_COPY) { cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; + cc.cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp); + } else if (ctx->Color.BlendEnabled) { + GLenum eqRGB = ctx->Color.BlendEquationRGB; + GLenum eqA = ctx->Color.BlendEquationA; + GLenum srcRGB = ctx->Color.BlendSrcRGB; + GLenum dstRGB = ctx->Color.BlendDstRGB; + GLenum srcA = ctx->Color.BlendSrcA; + GLenum dstA = ctx->Color.BlendDstA; + + /* If the renderbuffer is XRGB, we have to frob the blend function to + * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA + * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO. + */ + if (ctx->DrawBuffer->Visual.alphaBits == 0) { + srcRGB = fix_xRGB_alpha(srcRGB); + srcA = fix_xRGB_alpha(srcA); + dstRGB = fix_xRGB_alpha(dstRGB); + dstA = fix_xRGB_alpha(dstA); + } if (eqRGB == GL_MIN || eqRGB == GL_MAX) { srcRGB = dstRGB = GL_ONE; @@ -263,25 +180,27 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) eqA != eqRGB); } - if (key->alpha_enabled) { + if (ctx->Color.AlphaEnabled) { cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); + cc.cc3.alpha_test_func = + intel_translate_compare_func(ctx->Color.AlphaFunc); cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], ctx->Color.AlphaRef); } - if (key->dither) { + if (ctx->Color.DitherFlag) { cc.cc5.dither_enable = 1; cc.cc6.y_dither_offset = 0; cc.cc6.x_dither_offset = 0; } /* _NEW_DEPTH */ - if (key->depth_test) { + if (ctx->Depth.Test) { cc.cc2.depth_test = 1; - cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); - cc.cc2.depth_write_enable = key->depth_write; + cc.cc2.depth_test_function = + intel_translate_compare_func(ctx->Depth.Func); + cc.cc2.depth_write_enable = ctx->Depth.Mask; } /* CACHE_NEW_CC_VP */ @@ -290,43 +209,25 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) if (INTEL_DEBUG & DEBUG_STATS) cc.cc5.statistics_enable = 1; - bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, - key, sizeof(*key), - &brw->cc.vp_bo, 1, - &cc, sizeof(cc)); + map = brw_state_batch(brw, sizeof(cc), 64, + &brw->cc.state_bo, &brw->cc.state_offset); + memcpy(map, &cc, sizeof(cc)); + brw->state.dirty.cache |= CACHE_NEW_CC_UNIT; /* Emit CC viewport relocation */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_cc_unit_state, cc4), + drm_intel_bo_emit_reloc(brw->cc.state_bo, (brw->cc.state_offset + + offsetof(struct brw_cc_unit_state, + cc4)), brw->cc.vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0); - - return bo; -} - -static void prepare_cc_unit( struct brw_context *brw ) -{ - struct brw_cc_unit_key key; - - cc_unit_populate_key(brw, &key); - - drm_intel_bo_unreference(brw->cc.state_bo); - brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT, - &key, sizeof(key), - &brw->cc.vp_bo, 1, - NULL); - - if (brw->cc.state_bo == NULL) - brw->cc.state_bo = cc_unit_create_from_key(brw, &key); } const struct brw_tracked_state brw_cc_unit = { .dirty = { .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH, - .brw = 0, + .brw = BRW_NEW_BATCH, .cache = CACHE_NEW_CC_VP }, .prepare = prepare_cc_unit, + .emit = upload_cc_unit, }; - - - diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index e688431b126..6d064b822e5 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -34,8 +34,6 @@ #include "main/api_noop.h" #include "main/macros.h" #include "main/simple_list.h" -#include "program/shader_api.h" - #include "brw_context.h" #include "brw_defines.h" #include "brw_draw.h" @@ -54,6 +52,9 @@ static void brwInitDriverFunctions( struct dd_function_table *functions ) brwInitFragProgFuncs( functions ); brw_init_queryobj_functions(functions); + + functions->Enable = brw_enable; + functions->DepthRange = brw_depth_range; } GLboolean brwCreateContext( int api, @@ -187,6 +188,11 @@ GLboolean brwCreateContext( int api, brw_draw_init( brw ); + /* Now that most driver functions are hooked up, initialize some of the + * immediate state. + */ + brw_update_cc_vp(brw); + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d97634c1c60..cc4e6638e8b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -131,6 +131,7 @@ struct brw_context; #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_PSP 0x800 #define BRW_NEW_WM_SURFACES 0x1000 +#define BRW_NEW_BINDING_TABLE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 /** @@ -143,6 +144,8 @@ struct brw_context; #define BRW_NEW_NR_WM_SURFACES 0x40000 #define BRW_NEW_NR_VS_SURFACES 0x80000 #define BRW_NEW_INDEX_BUFFER 0x100000 +#define BRW_NEW_VS_CONSTBUF 0x200000 +#define BRW_NEW_WM_CONSTBUF 0x200000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -160,7 +163,6 @@ struct brw_state_flags { struct brw_vertex_program { struct gl_vertex_program program; GLuint id; - drm_intel_bo *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; }; @@ -172,7 +174,6 @@ struct brw_fragment_program { GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ GLboolean use_const_buffer; - drm_intel_bo *const_buffer; /** Program constant buffer/surface */ /** for debugging, which texture units are referenced */ GLbitfield tex_units_used; @@ -301,8 +302,6 @@ enum brw_cache_id { BRW_CLIP_VP, BRW_CLIP_UNIT, BRW_CLIP_PROG, - BRW_SS_SURFACE, - BRW_SS_SURF_BIND, BRW_MAX_CACHE }; @@ -376,8 +375,6 @@ struct brw_tracked_state { #define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) #define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) #define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) -#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) -#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) struct brw_cached_batch_item { struct header *header; @@ -460,12 +457,11 @@ struct brw_context * consisting of the vertex buffers, pipelined state pointers, * the CURBE, the depth buffer, and a query BO. */ - drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + 16]; + drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16]; int validated_bo_count; } state; - struct brw_cache cache; /** non-surface items */ - struct brw_cache surface_cache; /* surface items */ + struct brw_cache cache; struct brw_cached_batch_item *cached_batch_items; struct { @@ -594,10 +590,13 @@ struct brw_context drm_intel_bo *prog_bo; drm_intel_bo *state_bo; + drm_intel_bo *const_bo; /** Binding table of pointers to surf_bo entries */ drm_intel_bo *bind_bo; + uint32_t bind_bo_offset; drm_intel_bo *surf_bo[BRW_VS_MAX_SURF]; + uint32_t surf_offset[BRW_VS_MAX_SURF]; GLuint nr_surfaces; } vs; @@ -649,10 +648,13 @@ struct brw_context /** Binding table of pointers to surf_bo entries */ drm_intel_bo *bind_bo; + uint32_t bind_bo_offset; drm_intel_bo *surf_bo[BRW_WM_MAX_SURF]; + uint32_t surf_offset[BRW_WM_MAX_SURF]; drm_intel_bo *prog_bo; drm_intel_bo *state_bo; + drm_intel_bo *const_bo; } wm; @@ -667,6 +669,7 @@ struct brw_context drm_intel_bo *color_calc_state_bo; drm_intel_bo *state_bo; + uint32_t state_offset; } cc; struct { @@ -727,6 +730,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ); */ void brw_upload_urb_fence(struct brw_context *brw); +/* brw_cc.c */ +void brw_update_cc_vp(struct brw_context *brw); + /* brw_curbe.c */ void brw_upload_cs_urb_state(struct brw_context *brw); @@ -734,6 +740,10 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); +/* brw_state.c */ +void brw_enable(GLcontext * ctx, GLenum cap, GLboolean state); +void brw_depth_range(GLcontext *ctx, GLclampd nearval, GLclampd farval); + /*====================================================================== * Inline conversion functions. These are better-typed than the * macros used previously: diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 3d52f6f6047..8196d8ca625 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -182,8 +182,6 @@ static void prepare_constant_buffer(struct brw_context *brw) GLcontext *ctx = &brw->intel.ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - const struct brw_fragment_program *fp = - brw_fragment_program_const(brw->fragment_program); const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; @@ -200,8 +198,6 @@ static void prepare_constant_buffer(struct brw_context *brw) if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; - _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); - /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) buf[offset + i] = *brw->wm.prog_data->param[i]; @@ -244,14 +240,6 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - if (brw->vertex_program->IsNVProgram) - _mesa_load_tracked_matrices(ctx); - - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); - if (vp->use_const_buffer) { /* Load the subset of push constants that will get used when * we also have a pull constant buffer. diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 2d3556b8054..39bf5b63fc2 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -998,7 +998,7 @@ # define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) # define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) # define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) -# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 12) +# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11) # define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) # define GEN6_WM_USES_SOURCE_W (1 << 8) # define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 3e305c89686..16331cc3ac0 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -151,9 +151,6 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.start_instance_location = 0; prim_packet.base_vert_location = prim->basevertex; - /* Can't wrap here, since we rely on the validated state. */ - intel->no_batch_wrap = GL_TRUE; - /* If we're set to always flush, do it before and after the primitive emit. * We want to catch both missed flushes that hurt instruction/state cache * and missed flushes of the render cache as it heads to other parts of @@ -169,8 +166,6 @@ static void brw_emit_prim(struct brw_context *brw, if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(intel->batch); } - - intel->no_batch_wrap = GL_FALSE; } static void brw_merge_inputs( struct brw_context *brw, @@ -394,11 +389,14 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, } } + intel->no_batch_wrap = GL_TRUE; brw_upload_state(brw); } brw_emit_prim(brw, &prim[i], hw_prim); + intel->no_batch_wrap = GL_FALSE; + retval = GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 175899b0268..34dfe10cb93 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -286,6 +286,7 @@ static void brw_set_ff_sync_message(struct brw_context *brw, GLuint response_length, GLboolean end_of_thread) { + struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ @@ -298,8 +299,12 @@ static void brw_set_ff_sync_message(struct brw_context *brw, insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ insn->bits3.urb_gen5.msg_length = 1; insn->bits3.urb_gen5.end_of_thread = end_of_thread; - insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_gen5.end_of_thread = end_of_thread; + if (intel->gen >= 6) { + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; + } else { + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_gen5.end_of_thread = end_of_thread; + } } static void brw_set_urb_message( struct brw_context *brw, @@ -966,10 +971,25 @@ void brw_math_16( struct brw_compile *p, struct brw_reg src, GLuint precision ) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + if (intel->gen >= 6) { + insn = next_insn(p, BRW_OPCODE_MATH); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_src1(insn, brw_null_reg()); + return; + } + /* First instruction: */ brw_push_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 35908ee7b69..572175f463e 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -96,18 +96,12 @@ const struct brw_tracked_state brw_drawing_rect = { .emit = upload_drawing_rect }; -static void prepare_binding_table_pointers(struct brw_context *brw) -{ - brw_add_validated_bo(brw, brw->vs.bind_bo); - brw_add_validated_bo(brw, brw->wm.bind_bo); -} - /** * Upload the binding table pointers, which point each stage's array of surface * state pointers. * * The binding table pointers are relative to the surface state base address, - * which is 0. + * which points at the batchbuffer containing the streamed batch state. */ static void upload_binding_table_pointers(struct brw_context *brw) { @@ -115,24 +109,20 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - if (brw->vs.bind_bo != NULL) - OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ - else - OUT_BATCH(0); + OUT_BATCH(brw->vs.bind_bo_offset); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + OUT_BATCH(brw->wm.bind_bo_offset); ADVANCE_BATCH(); } const struct brw_tracked_state brw_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, - .cache = CACHE_NEW_SURF_BIND, + .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE, + .cache = 0, }, - .prepare = prepare_binding_table_pointers, .emit = upload_binding_table_pointers, }; @@ -141,7 +131,7 @@ const struct brw_tracked_state brw_binding_table_pointers = { * state pointers. * * The binding table pointers are relative to the surface state base address, - * which is 0. + * which points at the batchbuffer containing the streamed batch state. */ static void upload_gen6_binding_table_pointers(struct brw_context *brw) { @@ -153,22 +143,18 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw) GEN6_BINDING_TABLE_MODIFY_GS | GEN6_BINDING_TABLE_MODIFY_PS | (4 - 2)); - if (brw->vs.bind_bo != NULL) - OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ - else - OUT_BATCH(0); + OUT_BATCH(brw->vs.bind_bo_offset); /* vs */ OUT_BATCH(0); /* gs */ - OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */ ADVANCE_BATCH(); } const struct brw_tracked_state gen6_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, - .cache = CACHE_NEW_SURF_BIND, + .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE, + .cache = 0, }, - .prepare = prepare_binding_table_pointers, .emit = upload_gen6_binding_table_pointers, }; @@ -199,7 +185,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->cc.state_offset); ADVANCE_BATCH(); brw->state.dirty.brw |= BRW_NEW_PSP; @@ -213,7 +200,6 @@ static void prepare_psp_urb_cbs(struct brw_context *brw) brw_add_validated_bo(brw, brw->clip.state_bo); brw_add_validated_bo(brw, brw->sf.state_bo); brw_add_validated_bo(brw, brw->wm.state_bo); - brw_add_validated_bo(brw, brw->cc.state_bo); } static void upload_psp_urb_cbs(struct brw_context *brw ) @@ -590,23 +576,23 @@ const struct brw_tracked_state brw_invarient_state = { /** * Define the base addresses which some state is referenced from. * - * This allows us to avoid having to emit relocations in many places for - * cached state, and instead emit pointers inside of large, mostly-static - * state pools. This comes at the expense of memory, and more expensive cache - * misses. + * This allows us to avoid having to emit relocations for the objects, + * and is actually required for binding table pointers on gen6. + * + * Surface state base address covers binding table pointers and + * surface state objects, but not the surfaces that the surface state + * objects point to. */ static void upload_state_base_address( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; - /* Output the structure (brw_state_base_address) directly to the - * batchbuffer, so we can emit relocations inline. - */ if (intel->gen >= 6) { BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_BATCH(1); /* Surface state base address */ + OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0, + 1); /* Surface state base address */ OUT_BATCH(1); /* Dynamic state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* Instruction base address */ @@ -619,7 +605,8 @@ static void upload_state_base_address( struct brw_context *brw ) BEGIN_BATCH(8); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_BATCH(1); /* Surface state base address */ + OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0, + 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* Instruction base address */ OUT_BATCH(1); /* General state upper bound */ @@ -630,7 +617,8 @@ static void upload_state_base_address( struct brw_context *brw ) BEGIN_BATCH(6); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_BATCH(1); /* Surface state base address */ + OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0, + 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ @@ -641,7 +629,7 @@ static void upload_state_base_address( struct brw_context *brw ) const struct brw_tracked_state brw_state_base_address = { .dirty = { .mesa = 0, - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_BATCH, .cache = 0, }, .emit = upload_state_base_address diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index cc9ac6d5749..aeed24d4e14 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -31,10 +31,10 @@ #include "main/imports.h" #include "main/enums.h" +#include "main/shaderobj.h" #include "program/prog_parameter.h" #include "program/program.h" #include "program/programopt.h" -#include "program/shader_api.h" #include "tnl/tnl.h" #include "brw_context.h" @@ -95,20 +95,6 @@ static struct gl_program *brwNewProgram( GLcontext *ctx, static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { - if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - struct gl_fragment_program *fp = (struct gl_fragment_program *) prog; - struct brw_fragment_program *brw_fp = brw_fragment_program(fp); - - drm_intel_bo_unreference(brw_fp->const_buffer); - } - - if (prog->Target == GL_VERTEX_PROGRAM_ARB) { - struct gl_vertex_program *vp = (struct gl_vertex_program *) prog; - struct brw_vertex_program *brw_vp = brw_vertex_program(vp); - - drm_intel_bo_unreference(brw_vp->const_buffer); - } - _mesa_delete_program( ctx, prog ); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index a95acb4cf82..e290ca92f60 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -105,8 +105,7 @@ static void upload_sf_vp(struct brw_context *brw) } drm_intel_bo_unreference(brw->sf.vp_bo); - brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv), - NULL, 0); + brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv)); } const struct brw_tracked_state brw_sf_vp = { diff --git a/src/mesa/drivers/dri/i965/brw_state.c b/src/mesa/drivers/dri/i965/brw_state.c new file mode 100644 index 00000000000..1e77e427d38 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_state.c @@ -0,0 +1,49 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +#include "brw_context.h" + +void +brw_enable(GLcontext *ctx, GLenum cap, GLboolean state) +{ + struct brw_context *brw = brw_context(ctx); + + switch (cap) { + case GL_DEPTH_CLAMP: + brw_update_cc_vp(brw); + break; + } +} + +void +brw_depth_range(GLcontext *ctx, GLclampd nearval, GLclampd farval) +{ + struct brw_context *brw = brw_context(ctx); + + if (ctx->Transform.DepthClamp) + brw_update_cc_vp(brw); +} diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 85949215e82..40eece276b7 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -48,10 +48,11 @@ brw_add_validated_bo(struct brw_context *brw, drm_intel_bo *bo) const struct brw_tracked_state brw_blend_constant_color; const struct brw_tracked_state brw_cc_unit; -const struct brw_tracked_state brw_cc_vp; const struct brw_tracked_state brw_check_fallback; const struct brw_tracked_state brw_clip_prog; const struct brw_tracked_state brw_clip_unit; +const struct brw_tracked_state brw_vs_constants; +const struct brw_tracked_state brw_wm_constants; const struct brw_tracked_state brw_constant_buffer; const struct brw_tracked_state brw_curbe_offsets; const struct brw_tracked_state brw_invarient_state; @@ -80,6 +81,7 @@ const struct brw_tracked_state brw_wm_prog; const struct brw_tracked_state brw_wm_samplers; const struct brw_tracked_state brw_wm_constant_surface; const struct brw_tracked_state brw_wm_surfaces; +const struct brw_tracked_state brw_wm_binding_table; const struct brw_tracked_state brw_wm_unit; const struct brw_tracked_state brw_psp_urb_cbs; @@ -93,7 +95,6 @@ const struct brw_tracked_state brw_index_buffer; const struct brw_tracked_state gen6_binding_table_pointers; const struct brw_tracked_state gen6_blend_state; const struct brw_tracked_state gen6_cc_state_pointers; -const struct brw_tracked_state gen6_cc_vp; const struct brw_tracked_state gen6_clip_state; const struct brw_tracked_state gen6_clip_vp; const struct brw_tracked_state gen6_color_calc_state; @@ -108,20 +109,6 @@ const struct brw_tracked_state gen6_viewport_state; const struct brw_tracked_state gen6_vs_state; const struct brw_tracked_state gen6_wm_state; -/** - * Use same key for WM and VS surfaces. - */ -struct brw_surface_key { - GLenum target, depthmode; - drm_intel_bo *bo; - GLint format, internal_format; - GLint first_level, last_level; - GLint width, height, depth; - GLint pitch, cpp; - uint32_t tiling; - GLuint offset; -}; - /*********************************************************************** * brw_state.c */ @@ -137,9 +124,7 @@ void brw_clear_validated_bos(struct brw_context *brw); drm_intel_bo *brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, - GLuint size, - drm_intel_bo **reloc_bufs, - GLuint nr_reloc_bufs); + GLuint size); drm_intel_bo *brw_upload_cache(struct brw_cache *cache, enum brw_cache_id cache_id, @@ -173,7 +158,6 @@ void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); void brw_destroy_caches( struct brw_context *brw ); -void brw_state_cache_bo_delete(struct brw_cache *cache, drm_intel_bo *bo); /*********************************************************************** * brw_state_batch.c @@ -186,10 +170,17 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, GLuint sz ); void brw_destroy_batch_cache( struct brw_context *brw ); void brw_clear_batch_cache( struct brw_context *brw ); +void *brw_state_batch(struct brw_context *brw, + int size, + int alignment, + drm_intel_bo **out_bo, + uint32_t *out_offset); /* brw_wm_surface_state.c */ -drm_intel_bo * -brw_create_constant_surface( struct brw_context *brw, - struct brw_surface_key *key ); +void brw_create_constant_surface(struct brw_context *brw, + drm_intel_bo *bo, + int width, + drm_intel_bo **out_bo, + uint32_t *out_offset); #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 39019412fda..be3989eb7db 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -97,3 +97,52 @@ void brw_destroy_batch_cache( struct brw_context *brw ) { brw_clear_batch_cache(brw); } + +/** + * Allocates a block of space in the batchbuffer for indirect state. + * + * We don't want to allocate separate BOs for every bit of indirect + * state in the driver. It means overallocating by a significant + * margin (4096 bytes, even if the object is just a 20-byte surface + * state), and more buffers to walk and count for aperture size checking. + * + * However, due to the restrictions inposed by the aperture size + * checking performance hacks, we can't have the batch point at a + * separate indirect state buffer, because once the batch points at + * it, no more relocations can be added to it. So, we sneak these + * buffers in at the top of the batchbuffer. + */ +void * +brw_state_batch(struct brw_context *brw, + int size, + int alignment, + drm_intel_bo **out_bo, + uint32_t *out_offset) +{ + struct intel_batchbuffer *batch = brw->intel.batch; + uint32_t offset; + + assert(size < batch->buf->size); + offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); + + /* If allocating from the top would wrap below the batchbuffer, or + * if the batch's used space (plus the reserved pad) collides with our + * space, then flush and try again. + */ + if (batch->state_batch_offset < size || + offset < batch->ptr - batch->map + batch->reserved_space) { + intel_batchbuffer_flush(batch); + offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); + } + + batch->state_batch_offset = offset; + + if (*out_bo != batch->buf) { + drm_intel_bo_unreference(*out_bo); + drm_intel_bo_reference(batch->buf); + *out_bo = batch->buf; + } + + *out_offset = offset; + return batch->map + offset; +} diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index ea81ad13417..b31d84953a1 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -310,9 +310,7 @@ drm_intel_bo * brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, - GLuint data_size, - drm_intel_bo **reloc_bufs, - GLuint nr_reloc_bufs) + GLuint data_size) { drm_intel_bo *bo; struct brw_cache_item *item, lookup; @@ -321,8 +319,8 @@ brw_cache_data(struct brw_cache *cache, lookup.cache_id = cache_id; lookup.key = data; lookup.key_size = data_size; - lookup.reloc_bufs = reloc_bufs; - lookup.nr_reloc_bufs = nr_reloc_bufs; + lookup.reloc_bufs = NULL; + lookup.nr_reloc_bufs = 0; hash = hash_key(&lookup); lookup.hash = hash; @@ -335,7 +333,7 @@ brw_cache_data(struct brw_cache *cache, bo = brw_upload_cache(cache, cache_id, data, data_size, - reloc_bufs, nr_reloc_bufs, + NULL, 0, data, data_size); return bo; @@ -396,29 +394,10 @@ brw_init_non_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "DEPTH_STENCIL_STATE", BRW_DEPTH_STENCIL_STATE); } - -static void -brw_init_surface_cache(struct brw_context *brw) -{ - struct brw_cache *cache = &brw->surface_cache; - - cache->brw = brw; - - cache->size = 7; - cache->n_items = 0; - cache->items = (struct brw_cache_item **) - calloc(1, cache->size * sizeof(struct brw_cache_item)); - - brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE); - brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND); -} - - void brw_init_caches(struct brw_context *brw) { brw_init_non_surface_cache(brw); - brw_init_surface_cache(brw); } @@ -452,56 +431,17 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) brw->state.dirty.cache |= ~0; } -/* Clear all entries from the cache that point to the given bo. - * - * This lets us release memory for reuse earlier for known-dead buffers, - * at the cost of walking the entire hash table. - */ -void -brw_state_cache_bo_delete(struct brw_cache *cache, drm_intel_bo *bo) -{ - struct brw_cache_item **prev; - GLuint i; - - if (INTEL_DEBUG & DEBUG_STATE) - printf("%s\n", __FUNCTION__); - - for (i = 0; i < cache->size; i++) { - for (prev = &cache->items[i]; *prev;) { - struct brw_cache_item *c = *prev; - - if (drm_intel_bo_references(c->bo, bo)) { - int j; - - *prev = c->next; - - for (j = 0; j < c->nr_reloc_bufs; j++) - drm_intel_bo_unreference(c->reloc_bufs[j]); - drm_intel_bo_unreference(c->bo); - free((void *)c->key); - free(c); - cache->n_items--; - } else { - prev = &c->next; - } - } - } -} - void brw_state_cache_check_size(struct brw_context *brw) { if (INTEL_DEBUG & DEBUG_STATE) printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); - /* un-tuned guess. We've got around 20 state objects for a total of around - * 32k, so 1000 of them is around 1.5MB. + /* un-tuned guess. Each object is generally a page, so 1000 of them is 4 MB of + * state cache. */ if (brw->cache.n_items > 1000) brw_clear_cache(brw, &brw->cache); - - if (brw->surface_cache.n_items > 1000) - brw_clear_cache(brw, &brw->surface_cache); } @@ -528,5 +468,4 @@ void brw_destroy_caches(struct brw_context *brw) { brw_destroy_cache(brw, &brw->cache); - brw_destroy_cache(brw, &brw->surface_cache); } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index cb66806ebf3..d410861bdf6 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -111,8 +111,8 @@ static void dump_wm_surface_state(struct brw_context *brw) continue; } drm_intel_bo_map(surf_bo, GL_FALSE); - surfoff = surf_bo->offset; - surf = (struct brw_surface_state *)(surf_bo->virtual); + surfoff = surf_bo->offset + brw->wm.surf_offset[i]; + surf = (struct brw_surface_state *)(surf_bo->virtual + brw->wm.surf_offset[i]); sprintf(name, "WM SS%d", i); state_out(name, surf, surfoff, 0, "%s %s\n", diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 49629ba2289..f92a19c2aa0 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -61,12 +61,15 @@ static const struct brw_tracked_state *gen4_atoms[] = &brw_curbe_offsets, &brw_recalculate_urb_fence, - &brw_cc_vp, &brw_cc_unit, + &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ + &brw_wm_constants, /* Before wm_surfaces and constant_buffer */ + &brw_vs_surfaces, /* must do before unit */ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ &brw_wm_surfaces, /* must do before samplers and unit */ + &brw_wm_binding_table, &brw_wm_samplers, &brw_wm_unit, @@ -113,7 +116,6 @@ const struct brw_tracked_state *gen6_atoms[] = &gen6_clip_vp, &gen6_sf_vp, - &gen6_cc_vp, /* Command packets: */ &brw_invarient_state, @@ -126,9 +128,13 @@ const struct brw_tracked_state *gen6_atoms[] = &gen6_depth_stencil_state, /* must do before cc unit */ &gen6_cc_state_pointers, + &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ + &brw_wm_constants, /* Before wm_surfaces and constant_buffer */ + &brw_vs_surfaces, /* must do before unit */ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ &brw_wm_surfaces, /* must do before samplers and unit */ + &brw_wm_binding_table, &brw_wm_samplers, &gen6_sampler_state, @@ -266,6 +272,8 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), DEFINE_BIT(BRW_NEW_PSP), + DEFINE_BIT(BRW_NEW_WM_SURFACES), + DEFINE_BIT(BRW_NEW_BINDING_TABLE), DEFINE_BIT(BRW_NEW_INDICES), DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), @@ -292,8 +300,6 @@ static struct dirty_bit_map cache_bits[] = { DEFINE_BIT(CACHE_NEW_CLIP_VP), DEFINE_BIT(CACHE_NEW_CLIP_UNIT), DEFINE_BIT(CACHE_NEW_CLIP_PROG), - DEFINE_BIT(CACHE_NEW_SURFACE), - DEFINE_BIT(CACHE_NEW_SURF_BIND), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index f17fe485306..2a7fa5b6997 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -278,7 +278,7 @@ struct brw_aa_line_parameters struct header header; struct { - GLuint aa_coverage_scope:8; + GLuint aa_coverage_slope:8; GLuint pad0:8; GLuint aa_coverage_bias:8; GLuint pad1:8; diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 568c2e3b030..0250a68d292 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -42,42 +42,59 @@ * Otherwise, constants go through the CURBEs using the brw_constant_buffer * state atom. */ -static drm_intel_bo * -brw_vs_update_constant_buffer(struct brw_context *brw) +static void +prepare_vs_constants(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; struct brw_vertex_program *vp = (struct brw_vertex_program *) brw->vertex_program; const struct gl_program_parameter_list *params = vp->program.Base.Parameters; const int size = params->NumParameters * 4 * sizeof(GLfloat); - drm_intel_bo *const_buffer; int i; - /* BRW_NEW_VERTEX_PROGRAM */ - if (!vp->use_const_buffer) - return NULL; - - const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", - size, 64); - - /* _NEW_PROGRAM_CONSTANTS */ + if (vp->program.IsNVProgram) + _mesa_load_tracked_matrices(ctx); /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters); - drm_intel_gem_bo_map_gtt(const_buffer); + /* BRW_NEW_VERTEX_PROGRAM */ + if (!vp->use_const_buffer) { + if (brw->vs.const_bo) { + drm_intel_bo_unreference(brw->vs.const_bo); + brw->vs.const_bo = NULL; + brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; + } + return; + } + + /* _NEW_PROGRAM_CONSTANTS */ + drm_intel_bo_unreference(brw->vs.const_bo); + brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + size, 64); + + drm_intel_gem_bo_map_gtt(brw->vs.const_bo); for (i = 0; i < params->NumParameters; i++) { - memcpy(const_buffer->virtual + i * 4 * sizeof(float), + memcpy(brw->vs.const_bo->virtual + i * 4 * sizeof(float), params->ParameterValues[i], 4 * sizeof(float)); } - drm_intel_gem_bo_unmap_gtt(const_buffer); - - return const_buffer; + drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo); + brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; } +const struct brw_tracked_state brw_vs_constants = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_VERTEX_PROGRAM), + .cache = 0 + }, + .prepare = prepare_vs_constants, +}; + /** * Update the surface state for a VS constant buffer. * @@ -88,101 +105,41 @@ brw_update_vs_constant_surface( GLcontext *ctx, GLuint surf) { struct brw_context *brw = brw_context(ctx); - struct brw_surface_key key; struct brw_vertex_program *vp = (struct brw_vertex_program *) brw->vertex_program; const struct gl_program_parameter_list *params = vp->program.Base.Parameters; assert(surf == 0); - /* If we're in this state update atom, we need to update VS constants, so - * free the old buffer and create a new one for the new contents. - */ - drm_intel_bo_unreference(vp->const_buffer); - vp->const_buffer = brw_vs_update_constant_buffer(brw); - /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (vp->const_buffer == NULL) { + if (brw->vs.const_bo == NULL) { drm_intel_bo_unreference(brw->vs.surf_bo[surf]); brw->vs.surf_bo[surf] = NULL; return; } - memset(&key, 0, sizeof(key)); - - key.format = MESA_FORMAT_RGBA_FLOAT32; - key.internal_format = GL_RGBA; - key.bo = vp->const_buffer; - key.depthmode = GL_NONE; - key.pitch = params->NumParameters; - key.width = params->NumParameters; - key.height = 1; - key.depth = 1; - key.cpp = 16; - - /* - printf("%s:\n", __FUNCTION__); - printf(" width %d height %d depth %d cpp %d pitch %d\n", - key.width, key.height, key.depth, key.cpp, key.pitch); - */ - - drm_intel_bo_unreference(brw->vs.surf_bo[surf]); - brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, 1, - NULL); - if (brw->vs.surf_bo[surf] == NULL) { - brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); - } + brw_create_constant_surface(brw, brw->vs.const_bo, params->NumParameters, + &brw->vs.surf_bo[surf], + &brw->vs.surf_offset[surf]); } -/** - * Constructs the binding table for the VS surface state. - */ -static drm_intel_bo * -brw_vs_get_binding_table(struct brw_context *brw) +static void +prepare_vs_surfaces(struct brw_context *brw) { - drm_intel_bo *bind_bo; - - bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, BRW_VS_MAX_SURF, - NULL); - - if (bind_bo == NULL) { - GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); - uint32_t data[BRW_VS_MAX_SURF]; - int i; - - for (i = 0; i < BRW_VS_MAX_SURF; i++) - if (brw->vs.surf_bo[i]) - data[i] = brw->vs.surf_bo[i]->offset; - else - data[i] = 0; - - bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, BRW_VS_MAX_SURF, - data, data_size); - - /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_VS_MAX_SURF; i++) { - if (brw->vs.surf_bo[i] != NULL) { - /* The presumed offsets were set in the data values for - * brw_upload_cache. - */ - drm_intel_bo_emit_reloc(bind_bo, i * 4, - brw->vs.surf_bo[i], 0, - I915_GEM_DOMAIN_INSTRUCTION, 0); - } - } + int nr_surfaces = 0; + + if (brw->vs.const_bo) { + brw_add_validated_bo(brw, brw->vs.const_bo); + nr_surfaces = 1; } - return bind_bo; + if (brw->vs.nr_surfaces != nr_surfaces) { + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; + brw->vs.nr_surfaces = nr_surfaces; + } } /** @@ -192,43 +149,50 @@ brw_vs_get_binding_table(struct brw_context *brw) * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and * CACHE_NEW_SURF_BIND for the binding table upload. */ -static void prepare_vs_surfaces(struct brw_context *brw ) +static void upload_vs_surfaces(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; + uint32_t *bind; int i; - int nr_surfaces = 0; - - brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); - for (i = 0; i < BRW_VS_MAX_SURF; i++) { - if (brw->vs.surf_bo[i] != NULL) { - nr_surfaces = i + 1; + /* BRW_NEW_NR_VS_SURFACES */ + if (brw->vs.nr_surfaces == 0) { + if (brw->vs.bind_bo) { + drm_intel_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = NULL; + brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE; } + return; } - if (brw->vs.nr_surfaces != nr_surfaces) { - brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; - brw->vs.nr_surfaces = nr_surfaces; - } + brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); - /* Note that we don't end up updating the bind_bo if we don't have a - * surface to be pointing at. This should be relatively harmless, as it - * just slightly increases our working set size. + /* Might want to calculate nr_surfaces first, to avoid taking up so much + * space for the binding table. (once we have vs samplers) */ - if (brw->vs.nr_surfaces != 0) { - drm_intel_bo_unreference(brw->vs.bind_bo); - brw->vs.bind_bo = brw_vs_get_binding_table(brw); + bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF, + 32, &brw->vs.bind_bo, &brw->vs.bind_bo_offset); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + /* BRW_NEW_VS_CONSTBUF */ + if (brw->vs.surf_bo[i]) { + bind[i] = brw->vs.surf_offset[i]; + } else { + bind[i] = 0; + } } + + brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE; } const struct brw_tracked_state brw_vs_surfaces = { .dirty = { - .mesa = (_NEW_PROGRAM_CONSTANTS), - .brw = (BRW_NEW_VERTEX_PROGRAM), + .mesa = 0, + .brw = (BRW_NEW_VS_CONSTBUF | + BRW_NEW_NR_VS_SURFACES | + BRW_NEW_BATCH), .cache = 0 }, .prepare = prepare_vs_surfaces, + .emit = upload_vs_surfaces, }; - - - diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index a02e958c5e6..14227a51332 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -83,6 +83,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.state_bo); dri_bo_release(&brw->vs.bind_bo); + dri_bo_release(&brw->vs.const_bo); dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); dri_bo_release(&brw->clip.prog_bo); @@ -99,6 +100,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->wm.sampler_bo); dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.state_bo); + dri_bo_release(&brw->wm.const_bo); dri_bo_release(&brw->cc.prog_bo); dri_bo_release(&brw->cc.state_bo); dri_bo_release(&brw->cc.vp_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 9fbabdc2852..1fc802cfa65 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -74,7 +74,7 @@ static drm_intel_bo *upload_default_color( struct brw_context *brw, COPY_4V(sdc.color, color); return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, - &sdc, sizeof(sdc), NULL, 0); + &sdc, sizeof(sdc)); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index c7b61240e75..17b016b569b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -196,36 +196,40 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) } } -static drm_intel_bo * -brw_create_texture_surface( struct brw_context *brw, - struct brw_surface_key *key ) +static void +brw_update_texture_surface( GLcontext *ctx, GLuint unit ) { + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; + const GLuint surf_index = SURF_INDEX_TEXTURE(unit); struct brw_surface_state surf; - drm_intel_bo *bo; + void *map; memset(&surf, 0, sizeof(surf)); surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf.ss0.surface_type = translate_tex_target(key->target); - surf.ss0.surface_format = translate_tex_format(key->format, - key->internal_format, - key->depthmode); + surf.ss0.surface_type = translate_tex_target(tObj->Target); + surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat, + firstImage->InternalFormat, + tObj->DepthMode); /* This is ok for all textures with channel width 8bit or less: */ /* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - surf.ss1.base_addr = key->bo->offset; /* reloc */ + surf.ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */ - surf.ss2.mip_count = key->last_level - key->first_level; - surf.ss2.width = key->width - 1; - surf.ss2.height = key->height - 1; - brw_set_surface_tiling(&surf, key->tiling); - surf.ss3.pitch = (key->pitch * key->cpp) - 1; - surf.ss3.depth = key->depth - 1; + surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel; + surf.ss2.width = firstImage->Width - 1; + surf.ss2.height = firstImage->Height - 1; + brw_set_surface_tiling(&surf, intelObj->mt->region->tiling); + surf.ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1; + surf.ss3.depth = firstImage->Depth - 1; surf.ss4.min_lod = 0; - if (key->target == GL_TEXTURE_CUBE_MAP) { + if (tObj->Target == GL_TEXTURE_CUBE_MAP) { surf.ss0.cube_pos_x = 1; surf.ss0.cube_pos_y = 1; surf.ss0.cube_pos_z = 1; @@ -234,71 +238,33 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.cube_neg_z = 1; } - bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, - key, sizeof(*key), - &key->bo, 1, - &surf, sizeof(surf)); + map = brw_state_batch(brw, sizeof(surf), 32, + &brw->wm.surf_bo[surf_index], + &brw->wm.surf_offset[surf_index]); + memcpy(map, &surf, sizeof(surf)); /* Emit relocation to surface contents */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), - key->bo, 0, + drm_intel_bo_emit_reloc(brw->wm.surf_bo[surf_index], + brw->wm.surf_offset[surf_index] + + offsetof(struct brw_surface_state, ss1), + intelObj->mt->region->buffer, 0, I915_GEM_DOMAIN_SAMPLER, 0); - - return bo; -} - -static void -brw_update_texture_surface( GLcontext *ctx, GLuint unit ) -{ - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; - struct brw_surface_key key; - const GLuint surf = SURF_INDEX_TEXTURE(unit); - - memset(&key, 0, sizeof(key)); - - key.format = firstImage->TexFormat; - key.internal_format = firstImage->InternalFormat; - key.pitch = intelObj->mt->region->pitch; - key.depth = firstImage->Depth; - key.bo = intelObj->mt->region->buffer; - key.offset = 0; - - key.target = tObj->Target; - key.depthmode = tObj->DepthMode; - key.first_level = intelObj->firstLevel; - key.last_level = intelObj->lastLevel; - key.width = firstImage->Width; - key.height = firstImage->Height; - key.cpp = intelObj->mt->cpp; - key.tiling = intelObj->mt->region->tiling; - - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, 1, - NULL); - if (brw->wm.surf_bo[surf] == NULL) { - brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); - } } - - /** * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ -drm_intel_bo * -brw_create_constant_surface( struct brw_context *brw, - struct brw_surface_key *key ) +void +brw_create_constant_surface(struct brw_context *brw, + drm_intel_bo *bo, + int width, + drm_intel_bo **out_bo, + uint32_t *out_offset) { - const GLint w = key->width - 1; + const GLint w = width - 1; struct brw_surface_state surf; - drm_intel_bo *bo; + void *map; memset(&surf, 0, sizeof(surf)); @@ -306,29 +272,26 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss0.surface_type = BRW_SURFACE_BUFFER; surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - assert(key->bo); - surf.ss1.base_addr = key->bo->offset; /* reloc */ + assert(bo); + surf.ss1.base_addr = bo->offset; /* reloc */ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ - surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ - brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ - - bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, - key, sizeof(*key), - &key->bo, 1, - &surf, sizeof(surf)); + surf.ss3.pitch = (width * 16) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, I915_TILING_NONE); /* tiling now allowed */ + + map = brw_state_batch(brw, sizeof(surf), 32, out_bo, out_offset); + memcpy(map, &surf, sizeof(surf)); /* Emit relocation to surface contents. Section 5.1.1 of the gen4 * bspec ("Data Cache") says that the data cache does not exist as * a separate cache and is just the sampler cache. */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), - key->bo, 0, + drm_intel_bo_emit_reloc(*out_bo, (*out_offset + + offsetof(struct brw_surface_state, ss1)), + bo, 0, I915_GEM_DOMAIN_SAMPLER, 0); - - return bo; } /* Creates a new WM constant buffer reflecting the current fragment program's @@ -337,89 +300,45 @@ brw_create_constant_surface( struct brw_context *brw, * Otherwise, constants go through the CURBEs using the brw_constant_buffer * state atom. */ -static drm_intel_bo * -brw_wm_update_constant_buffer(struct brw_context *brw) +static void +prepare_wm_constants(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; const struct gl_program_parameter_list *params = fp->program.Base.Parameters; const int size = params->NumParameters * 4 * sizeof(GLfloat); - drm_intel_bo *const_buffer; - - /* BRW_NEW_FRAGMENT_PROGRAM */ - if (!fp->use_const_buffer) - return NULL; - - const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", - size, 64); - /* _NEW_PROGRAM_CONSTANTS */ - drm_intel_bo_subdata(const_buffer, 0, size, params->ParameterValues); - - return const_buffer; -} + _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); -/** - * Update the surface state for a WM constant buffer. - * The constant buffer will be (re)allocated here if needed. - */ -static void -brw_update_wm_constant_surface( GLcontext *ctx, - GLuint surf) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_surface_key key; - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - const struct gl_program_parameter_list *params = - fp->program.Base.Parameters; - - /* If we're in this state update atom, we need to update WM constants, so - * free the old buffer and create a new one for the new contents. - */ - drm_intel_bo_unreference(fp->const_buffer); - fp->const_buffer = brw_wm_update_constant_buffer(brw); - - /* If there's no constant buffer, then no surface BO is needed to point at - * it. - */ - if (fp->const_buffer == NULL) { - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = NULL; + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (!fp->use_const_buffer) { + if (brw->wm.const_bo) { + drm_intel_bo_unreference(brw->wm.const_bo); + brw->wm.const_bo = NULL; + brw->state.dirty.brw |= BRW_NEW_WM_CONSTBUF; + } return; } - memset(&key, 0, sizeof(key)); + drm_intel_bo_unreference(brw->wm.const_bo); + brw->wm.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + size, 64); - key.format = MESA_FORMAT_RGBA_FLOAT32; - key.internal_format = GL_RGBA; - key.bo = fp->const_buffer; - key.depthmode = GL_NONE; - key.pitch = params->NumParameters; - key.width = params->NumParameters; - key.height = 1; - key.depth = 1; - key.cpp = 16; - - /* - printf("%s:\n", __FUNCTION__); - printf(" width %d height %d depth %d cpp %d pitch %d\n", - key.width, key.height, key.depth, key.cpp, key.pitch); - */ - - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, 1, - NULL); - if (brw->wm.surf_bo[surf] == NULL) { - brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); - } - brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + /* _NEW_PROGRAM_CONSTANTS */ + drm_intel_bo_subdata(brw->wm.const_bo, 0, size, params->ParameterValues); } +const struct brw_tracked_state brw_wm_constants = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .cache = 0 + }, + .prepare = prepare_wm_constants, +}; + /** * Updates surface / buffer for fragment shader constant buffer, if * one is required. @@ -428,20 +347,18 @@ brw_update_wm_constant_surface( GLcontext *ctx, * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for * inclusion in the binding table. */ -static void prepare_wm_constant_surface(struct brw_context *brw ) +static void upload_wm_constant_surface(struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; + GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; - GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - - drm_intel_bo_unreference(fp->const_buffer); - fp->const_buffer = brw_wm_update_constant_buffer(brw); + const struct gl_program_parameter_list *params = + fp->program.Base.Parameters; /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (fp->const_buffer == 0) { + if (brw->wm.const_bo == 0) { if (brw->wm.surf_bo[surf] != NULL) { drm_intel_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; @@ -450,16 +367,20 @@ static void prepare_wm_constant_surface(struct brw_context *brw ) return; } - brw_update_wm_constant_surface(ctx, surf); + brw_create_constant_surface(brw, brw->wm.const_bo, params->NumParameters, + &brw->wm.surf_bo[surf], + &brw->wm.surf_offset[surf]); + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } const struct brw_tracked_state brw_wm_constant_surface = { .dirty = { - .mesa = (_NEW_PROGRAM_CONSTANTS), - .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .mesa = 0, + .brw = (BRW_NEW_WM_CONSTBUF | + BRW_NEW_BATCH), .cache = 0 }, - .prepare = prepare_wm_constant_surface, + .emit = upload_wm_constant_surface, }; @@ -488,6 +409,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, uint32_t draw_x; uint32_t draw_y; } key; + struct brw_surface_state surf; + void *map; memset(&key, 0, sizeof(key)); @@ -554,137 +477,123 @@ brw_update_renderbuffer_surface(struct brw_context *brw, (ctx->Color.BlendEnabled & (1 << unit))); } - drm_intel_bo_unreference(brw->wm.surf_bo[unit]); - brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - NULL); - - if (brw->wm.surf_bo[unit] == NULL) { - struct brw_surface_state surf; - - memset(&surf, 0, sizeof(surf)); + memset(&surf, 0, sizeof(surf)); - surf.ss0.surface_format = key.surface_format; - surf.ss0.surface_type = key.surface_type; - if (key.tiling == I915_TILING_NONE) { - surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp; + surf.ss0.surface_format = key.surface_format; + surf.ss0.surface_type = key.surface_type; + if (key.tiling == I915_TILING_NONE) { + surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp; + } else { + uint32_t tile_base, tile_x, tile_y; + uint32_t pitch = key.pitch * key.cpp; + + if (key.tiling == I915_TILING_X) { + tile_x = key.draw_x % (512 / key.cpp); + tile_y = key.draw_y % 8; + tile_base = ((key.draw_y / 8) * (8 * pitch)); + tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096; } else { - uint32_t tile_base, tile_x, tile_y; - uint32_t pitch = key.pitch * key.cpp; - - if (key.tiling == I915_TILING_X) { - tile_x = key.draw_x % (512 / key.cpp); - tile_y = key.draw_y % 8; - tile_base = ((key.draw_y / 8) * (8 * pitch)); - tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096; - } else { - /* Y */ - tile_x = key.draw_x % (128 / key.cpp); - tile_y = key.draw_y % 32; - tile_base = ((key.draw_y / 32) * (32 * pitch)); - tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096; - } - assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); - assert(tile_x % 4 == 0); - assert(tile_y % 2 == 0); - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - surf.ss1.base_addr = tile_base; - surf.ss5.x_offset = tile_x / 4; - surf.ss5.y_offset = tile_y / 2; - } - if (region_bo != NULL) - surf.ss1.base_addr += region_bo->offset; /* reloc */ - - surf.ss2.width = key.width - 1; - surf.ss2.height = key.height - 1; - brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.pitch * key.cpp) - 1; - - if (intel->gen < 6) { - /* _NEW_COLOR */ - surf.ss0.color_blend = key.color_blend; - surf.ss0.writedisable_red = !key.color_mask[0]; - surf.ss0.writedisable_green = !key.color_mask[1]; - surf.ss0.writedisable_blue = !key.color_mask[2]; - surf.ss0.writedisable_alpha = !key.color_mask[3]; + /* Y */ + tile_x = key.draw_x % (128 / key.cpp); + tile_y = key.draw_y % 32; + tile_base = ((key.draw_y / 32) * (32 * pitch)); + tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096; } + assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); + assert(tile_x % 4 == 0); + assert(tile_y % 2 == 0); + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surf.ss1.base_addr = tile_base; + surf.ss5.x_offset = tile_x / 4; + surf.ss5.y_offset = tile_y / 2; + } + if (region_bo != NULL) + surf.ss1.base_addr += region_bo->offset; /* reloc */ - /* Key size will never match key size for textures, so we're safe. */ - brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - &surf, sizeof(surf)); - if (region_bo != NULL) { - /* We might sample from it, and we might render to it, so flag - * them both. We might be able to figure out from other state - * a more restrictive relocation to emit. - */ - drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], - offsetof(struct brw_surface_state, ss1), - region_bo, - surf.ss1.base_addr - region_bo->offset, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); - } + surf.ss2.width = key.width - 1; + surf.ss2.height = key.height - 1; + brw_set_surface_tiling(&surf, key.tiling); + surf.ss3.pitch = (key.pitch * key.cpp) - 1; + + if (intel->gen < 6) { + /* _NEW_COLOR */ + surf.ss0.color_blend = key.color_blend; + surf.ss0.writedisable_red = !key.color_mask[0]; + surf.ss0.writedisable_green = !key.color_mask[1]; + surf.ss0.writedisable_blue = !key.color_mask[2]; + surf.ss0.writedisable_alpha = !key.color_mask[3]; } -} + map = brw_state_batch(brw, sizeof(surf), 32, + &brw->wm.surf_bo[unit], + &brw->wm.surf_offset[unit]); + memcpy(map, &surf, sizeof(surf)); + + if (region_bo != NULL) { + drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], + brw->wm.surf_offset[unit] + + offsetof(struct brw_surface_state, ss1), + region_bo, + surf.ss1.base_addr - region_bo->offset, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); + } +} -/** - * Constructs the binding table for the WM surface state, which maps unit - * numbers to surface state objects. - */ -static drm_intel_bo * -brw_wm_get_binding_table(struct brw_context *brw) +static void +prepare_wm_surfaces(struct brw_context *brw) { - drm_intel_bo *bind_bo; - - assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); - - bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->wm.surf_bo, brw->wm.nr_surfaces, - NULL); - - if (bind_bo == NULL) { - GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint); - uint32_t data[BRW_WM_MAX_SURF]; - int i; - - for (i = 0; i < brw->wm.nr_surfaces; i++) - if (brw->wm.surf_bo[i]) - data[i] = brw->wm.surf_bo[i]->offset; - else - data[i] = 0; - - bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->wm.surf_bo, brw->wm.nr_surfaces, - data, data_size); - - /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_WM_MAX_SURF; i++) { - if (brw->wm.surf_bo[i] != NULL) { - drm_intel_bo_emit_reloc(bind_bo, i * sizeof(GLuint), - brw->wm.surf_bo[i], 0, - I915_GEM_DOMAIN_INSTRUCTION, 0); - } + GLcontext *ctx = &brw->intel.ctx; + int i; + int nr_surfaces = 0; + + if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_region *region = irb ? irb->region : NULL; + + brw_add_validated_bo(brw, region->buffer); + nr_surfaces = SURF_INDEX_DRAW(i) + 1; + } + } + + if (brw->wm.const_bo) { + brw_add_validated_bo(brw, brw->wm.const_bo); + nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; + } + + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; + struct gl_texture_object *tObj = texUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + + if (texUnit->_ReallyEnabled) { + brw_add_validated_bo(brw, intelObj->mt->region->buffer); + nr_surfaces = SURF_INDEX_TEXTURE(i) + 1; } } - return bind_bo; + /* Have to update this in our prepare, since the unit's prepare + * relies on it. + */ + if (brw->wm.nr_surfaces != nr_surfaces) { + brw->wm.nr_surfaces = nr_surfaces; + brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; + } } -static void prepare_wm_surfaces(struct brw_context *brw ) +/** + * Constructs the set of surface state objects pointed to by the + * binding table. + */ +static void +upload_wm_surfaces(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; GLuint i; - int old_nr_surfaces; /* _NEW_BUFFERS | _NEW_COLOR */ /* Update surfaces for drawing buffers */ @@ -698,32 +607,21 @@ static void prepare_wm_surfaces(struct brw_context *brw ) brw_update_renderbuffer_surface(brw, NULL, 0); } - old_nr_surfaces = brw->wm.nr_surfaces; - brw->wm.nr_surfaces = BRW_MAX_DRAW_BUFFERS; - - if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) - brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; - /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; const GLuint surf = SURF_INDEX_TEXTURE(i); - /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ + /* _NEW_TEXTURE */ if (texUnit->_ReallyEnabled) { brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = surf + 1; } else { drm_intel_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; } } - drm_intel_bo_unreference(brw->wm.bind_bo); - brw->wm.bind_bo = brw_wm_get_binding_table(brw); - - if (brw->wm.nr_surfaces != old_nr_surfaces) - brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } const struct brw_tracked_state brw_wm_surfaces = { @@ -731,12 +629,48 @@ const struct brw_tracked_state brw_wm_surfaces = { .mesa = (_NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS), - .brw = (BRW_NEW_CONTEXT | - BRW_NEW_WM_SURFACES), + .brw = (BRW_NEW_BATCH), .cache = 0 }, .prepare = prepare_wm_surfaces, + .emit = upload_wm_surfaces, }; +/** + * Constructs the binding table for the WM surface state, which maps unit + * numbers to surface state objects. + */ +static void +brw_wm_upload_binding_table(struct brw_context *brw) +{ + uint32_t *bind; + int i; + + /* Might want to calculate nr_surfaces first, to avoid taking up so much + * space for the binding table. + */ + bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_WM_MAX_SURF, + 32, &brw->wm.bind_bo, &brw->wm.bind_bo_offset); + + for (i = 0; i < BRW_WM_MAX_SURF; i++) { + /* BRW_NEW_WM_SURFACES */ + bind[i] = brw->wm.surf_offset[i]; + if (brw->wm.surf_bo[i]) { + bind[i] = brw->wm.surf_offset[i]; + } else { + bind[i] = 0; + } + } + brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE; +} +const struct brw_tracked_state brw_wm_binding_table = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_BATCH | + BRW_NEW_WM_SURFACES), + .cache = 0 + }, + .emit = brw_wm_upload_binding_table, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index 2e21e5f7335..34a9dc234c2 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -63,8 +63,7 @@ prepare_scissor_state(struct brw_context *brw) drm_intel_bo_unreference(brw->sf.state_bo); brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT, - &scissor, sizeof(scissor), - NULL, 0); + &scissor, sizeof(scissor)); } const struct brw_tracked_state gen6_scissor_state = { diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 8d96b44f1dc..51940efb443 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -87,7 +87,7 @@ upload_sf_state(struct brw_context *brw) if (ctx->Polygon.CullFlag) { switch (ctx->Polygon.CullFaceMode) { case GL_FRONT: - dw3 |= GEN6_SF_CULL_BOTH; + dw3 |= GEN6_SF_CULL_FRONT; break; case GL_BACK: dw3 |= GEN6_SF_CULL_BACK; diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index 0c2aa4206c6..301c68e7f9e 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -29,7 +29,6 @@ #include "brw_state.h" #include "brw_defines.h" #include "intel_batchbuffer.h" -#include "main/macros.h" /* The clip VP defines the guardband region where expensive clipping is skipped * and fragments are allowed to be generated and clipped out cheaply by the SF. @@ -51,8 +50,7 @@ prepare_clip_vp(struct brw_context *brw) drm_intel_bo_unreference(brw->clip.vp_bo); brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP, - &vp, sizeof(vp), - NULL, 0); + &vp, sizeof(vp)); } const struct brw_tracked_state gen6_clip_vp = { @@ -95,8 +93,7 @@ prepare_sf_vp(struct brw_context *brw) drm_intel_bo_unreference(brw->sf.vp_bo); brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, - &sfv, sizeof(sfv), - NULL, 0); + &sfv, sizeof(sfv)); } const struct brw_tracked_state gen6_sf_vp = { @@ -108,36 +105,6 @@ const struct brw_tracked_state gen6_sf_vp = { .prepare = prepare_sf_vp, }; -static void -prepare_cc_vp(struct brw_context *brw) -{ - GLcontext *ctx = &brw->intel.ctx; - struct brw_cc_viewport ccv; - - /* _NEW_TRANSOFORM */ - if (ctx->Transform.DepthClamp) { - /* _NEW_VIEWPORT */ - ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); - ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); - } else { - ccv.min_depth = 0.0; - ccv.max_depth = 1.0; - } - - drm_intel_bo_unreference(brw->cc.vp_bo); - brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv), - NULL, 0); -} - -const struct brw_tracked_state gen6_cc_vp = { - .dirty = { - .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, - .brw = 0, - .cache = 0, - }, - .prepare = prepare_cc_vp, -}; - static void prepare_viewport_state_pointers(struct brw_context *brw) { brw_add_validated_bo(brw, brw->sf.state_bo); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 325f6b43d30..863c85449d9 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -98,7 +98,8 @@ upload_wm_state(struct brw_context *brw) /* CACHE_NEW_SAMPLER */ dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT; - dw4 |= (1 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); + dw4 |= (brw->wm.prog_data->first_curbe_grf << + GEN6_WM_DISPATCH_START_GRF_SHIFT_0); dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; dw5 |= GEN6_WM_DISPATCH_ENABLE; diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 1116bccd8e7..698445c5268 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -49,6 +49,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->ptr = batch->map; batch->reserved_space = BATCH_RESERVED; batch->dirty_state = ~0; + batch->state_batch_offset = batch->size; } struct intel_batchbuffer * @@ -84,6 +85,12 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used) int x_off = 0, y_off = 0; drm_intel_bo_subdata(batch->buf, 0, used, batch->buffer); + if (batch->state_batch_offset != batch->size) { + drm_intel_bo_subdata(batch->buf, + batch->state_batch_offset, + batch->size - batch->state_batch_offset, + batch->buffer + batch->state_batch_offset); + } batch->ptr = NULL; diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index f4ac1825cd1..ae53f455117 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -23,6 +23,7 @@ struct intel_batchbuffer GLubyte *ptr; GLuint size; + uint32_t state_batch_offset; #ifdef DEBUG /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */ @@ -92,7 +93,8 @@ static INLINE uint32_t float_as_int(float f) static INLINE GLint intel_batchbuffer_space(struct intel_batchbuffer *batch) { - return (batch->size - batch->reserved_space) - (batch->ptr - batch->map); + return (batch->state_batch_offset - batch->reserved_space) - + (batch->ptr - batch->map); } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index dec47974f2a..5f2035d79c9 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -827,8 +827,6 @@ intelDestroyContext(__DRIcontext * driContextPriv) assert(intel); /* should never be null */ if (intel) { - GLboolean release_texture_heaps; - INTEL_FIREVERTICES(intel); _mesa_meta_free(&intel->ctx); @@ -837,7 +835,6 @@ intelDestroyContext(__DRIcontext * driContextPriv) intel->vtbl.destroy(intel); - release_texture_heaps = (intel->ctx.Shared->RefCount == 1); _swsetup_DestroyContext(&intel->ctx); _tnl_DestroyContext(&intel->ctx); _vbo_DestroyContext(&intel->ctx); @@ -855,18 +852,6 @@ intelDestroyContext(__DRIcontext * driContextPriv) drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); intel->first_post_swapbuffers_batch = NULL; - if (release_texture_heaps) { - /* Nothing is currently done here to free texture heaps; - * but we're not using the texture heap utilities, so I - * rather think we shouldn't. I've taken a look, and can't - * find any private texture data hanging around anywhere, but - * I'm not yet certain there isn't any at all... - */ - /* if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "do something to free texture heaps\n"); - */ - } - driDestroyOptionCache(&intel->optionCache); /* free the Mesa context */ diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 14ff4a96950..c7ac2de01e6 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -261,6 +261,8 @@ extern char *__progname; #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) +#define ROUND_DOWN_TO(value, alignment) (ALIGN(value - alignment - 1, \ + alignment)) #define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0) static INLINE uint32_t diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 8f61f1f5b24..4a83886fc16 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -42,9 +42,6 @@ #include "intel_fbo.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" -#ifndef I915 -#include "brw_state.h" -#endif #define FILE_DEBUG_FLAG DEBUG_FBO @@ -296,12 +293,6 @@ intel_renderbuffer_set_region(struct intel_context *intel, old = rb->region; rb->region = NULL; intel_region_reference(&rb->region, region); -#ifndef I915 - if (old) { - brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache, - old->buffer); - } -#endif intel_region_release(&old); } diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 71ef7a8e39b..39ac0205fa1 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -29,9 +29,6 @@ #include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_tex_layout.h" -#ifndef I915 -#include "brw_state.h" -#endif #include "main/enums.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE @@ -203,19 +200,6 @@ intel_miptree_release(struct intel_context *intel, DBG("%s deleting %p\n", __FUNCTION__, *mt); -#ifndef I915 - /* Free up cached binding tables holding a reference on our buffer, to - * avoid excessive memory consumption. - * - * This isn't as aggressive as we could be, as we'd like to do - * it from any time we free the last ref on a region. But intel_region.c - * is context-agnostic. Perhaps our constant state cache should be, as - * well. - */ - brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache, - (*mt)->region->buffer); -#endif - intel_region_release(&((*mt)->region)); for (i = 0; i < MAX_TEXTURE_LEVELS; i++) { diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index c30552c5a79..fb840c1020d 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -257,6 +257,8 @@ intelSpanRenderStart(GLcontext * ctx) for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; + + intel_finalize_mipmap_tree(intel, i); intel_tex_map_images(intel, intel_texture_object(texObj)); } } diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 34d22b45591..ff3801dc676 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ radeon_code.c \ radeon_compiler.c \ radeon_emulate_branches.c \ + radeon_emulate_loops.c \ radeon_program.c \ radeon_program_print.c \ radeon_opcodes.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript index 663926e3216..50d9cdb7f2d 100755 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -23,6 +23,7 @@ r300compiler = env.ConvenienceLibrary( 'radeon_pair_regalloc.c', 'radeon_optimize.c', 'radeon_emulate_branches.c', + 'radeon_emulate_loops.c', 'radeon_dataflow.c', 'radeon_dataflow_deadcode.c', 'radeon_dataflow_swizzles.c', diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 7f3b88ed759..bbdfa0d56f9 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -26,6 +26,7 @@ #include "radeon_dataflow.h" #include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" #include "radeon_program_alu.h" #include "radeon_program_tex.h" #include "r300_fragprog.h" @@ -103,6 +104,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) /* XXX Ideally this should be done only for r3xx, but since * we don't have branching support for r5xx, we use the emulation * on all chipsets. */ + + if (c->Base.is_r500) { + rc_emulate_loops(&c->Base, R500_PFS_MAX_INST); + } else { + rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST); + } + debug_program_log(c, "after emulate loops"); + rc_emulate_branches(&c->Base); debug_program_log(c, "after emulate branches"); diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 507b2e532fe..e984797e2d3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -30,6 +30,7 @@ #include "radeon_program_alu.h" #include "radeon_swizzle.h" #include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" /* * Take an already-setup and valid source then swizzle it appropriately to @@ -348,7 +349,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi if (!valid_dst(compiler->code, &vpi->DstReg)) continue; - if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS || + (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) { rc_error(&compiler->Base, "Vertex program has too many instructions\n"); return; } @@ -404,7 +406,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c { struct rc_instruction *inst; unsigned int num_orig_temps = 0; - char hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + char hwtemps[R300_VS_MAX_TEMPS]; struct temporary_allocation * ta; unsigned int i, j; @@ -463,11 +465,11 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c unsigned int orig = inst->U.I.DstReg.Index; if (!ta[orig].Allocated) { - for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { + for(j = 0; j < R300_VS_MAX_TEMPS; ++j) { if (!hwtemps[j]) break; } - if (j >= VSF_MAX_FRAGMENT_TEMPS) { + if (j >= R300_VS_MAX_TEMPS) { fprintf(stderr, "Out of hw temporaries\n"); } else { ta[orig].Allocated = 1; @@ -600,6 +602,13 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) /* XXX Ideally this should be done only for r3xx, but since * we don't have branching support for r5xx, we use the emulation * on all chipsets. */ + if (compiler->Base.is_r500){ + rc_emulate_loops(&compiler->Base, R500_VS_MAX_ALU); + } else { + rc_emulate_loops(&compiler->Base, R300_VS_MAX_ALU); + } + debug_program_log(compiler, "after emulate loops"); + rc_emulate_branches(&compiler->Base); debug_program_log(compiler, "after emulate branches"); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 1979e7e4e49..d03689763bc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -235,8 +235,11 @@ struct rX00_fragment_program_code { }; -#define VSF_MAX_FRAGMENT_LENGTH (255*4) -#define VSF_MAX_FRAGMENT_TEMPS (14) +#define R300_VS_MAX_ALU 256 +#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4) +#define R500_VS_MAX_ALU 1024 +#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) +#define R300_VS_MAX_TEMPS 32 #define VSF_MAX_INPUTS 32 #define VSF_MAX_OUTPUTS 32 @@ -244,8 +247,8 @@ struct rX00_fragment_program_code { struct r300_vertex_program_code { int length; union { - uint32_t d[VSF_MAX_FRAGMENT_LENGTH]; - float f[VSF_MAX_FRAGMENT_LENGTH]; + uint32_t d[R500_VS_MAX_ALU_DWORDS]; + float f[R500_VS_MAX_ALU_DWORDS]; } body; int pos_end; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c new file mode 100644 index 00000000000..4c5d29f4217 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -0,0 +1,474 @@ +/* + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_emulate_loops.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct emulate_loop_state { + struct radeon_compiler * C; + struct loop_info * Loops; + unsigned int LoopCount; + unsigned int LoopReserved; +}; + +struct loop_info { + struct rc_instruction * BeginLoop; + struct rc_instruction * Cond; + struct rc_instruction * If; + struct rc_instruction * Brk; + struct rc_instruction * EndIf; + struct rc_instruction * EndLoop; +}; + +struct const_value { + + struct radeon_compiler * C; + struct rc_src_register * Src; + float Value; + int HasValue; +}; + +struct count_inst { + struct radeon_compiler * C; + int Index; + rc_swizzle Swz; + float Amount; + int Unknown; +}; + +static float get_constant_value(struct radeon_compiler * c, + struct rc_src_register * src, + int chan) +{ + float base = 1.0f; + int swz = GET_SWZ(src->Swizzle, chan); + if(swz >= 4 || src->Index >= c->Program.Constants.Count ){ + rc_error(c, "get_constant_value: Can't find a value.\n"); + return 0.0f; + } + if(GET_BIT(src->Negate, chan)){ + base = -1.0f; + } + return base * + c->Program.Constants.Constants[src->Index].u.Immediate[swz]; +} + +static int src_reg_is_immediate(struct rc_src_register * src, + struct radeon_compiler * c) +{ + return src->File == RC_FILE_CONSTANT && + c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; +} + +static unsigned int loop_count_instructions(struct loop_info * loop) +{ + unsigned int count = 0; + struct rc_instruction * inst = loop->BeginLoop->Next; + while(inst != loop->EndLoop){ + count++; + inst = inst->Next; + } + return count; +} + +static unsigned int loop_calc_iterations(struct loop_info * loop, + unsigned int loop_count, unsigned int max_instructions) +{ + unsigned int icount = loop_count_instructions(loop); + return max_instructions / (loop_count * icount); +} + +static void loop_unroll(struct emulate_loop_state * s, + struct loop_info *loop, unsigned int iterations) +{ + unsigned int i; + struct rc_instruction * ptr; + struct rc_instruction * first = loop->BeginLoop->Next; + struct rc_instruction * last = loop->EndLoop->Prev; + struct rc_instruction * append_to = last; + rc_remove_instruction(loop->BeginLoop); + rc_remove_instruction(loop->EndLoop); + for( i = 1; i < iterations; i++){ + for(ptr = first; ptr != last->Next; ptr = ptr->Next){ + struct rc_instruction *new = rc_alloc_instruction(s->C); + memcpy(new, ptr, sizeof(struct rc_instruction)); + rc_insert_instruction(append_to, new); + append_to = new; + } + } +} + + +static void update_const_value(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct const_value * value = data; + if(value->Src->File != file || + value->Src->Index != index || + !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_MOV: + if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){ + return; + } + value->HasValue = 1; + value->Value = + get_constant_value(value->C, &inst->U.I.SrcReg[0], 0); + break; + } +} + +static void get_incr_amount(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct count_inst * count_inst = data; + int amnt_src_index; + const struct rc_opcode_info * opcode; + float amount; + + if(file != RC_FILE_TEMPORARY || + count_inst->Index != index || + (1 << GET_SWZ(count_inst->Swz,0) != mask)){ + return; + } + /* Find the index of the counter register. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + if(opcode->NumSrcRegs != 2){ + count_inst->Unknown = 1; + return; + } + if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[0].Index == count_inst->Index && + inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ + amnt_src_index = 1; + } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[1].Index == count_inst->Index && + inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ + amnt_src_index = 0; + } + else{ + count_inst->Unknown = 1; + return; + } + if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index], + count_inst->C)){ + amount = get_constant_value(count_inst->C, + &inst->U.I.SrcReg[amnt_src_index], 0); + } + else{ + count_inst->Unknown = 1 ; + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + count_inst->Amount += amount; + break; + case RC_OPCODE_SUB: + if(amnt_src_index == 0){ + count_inst->Unknown = 0; + return; + } + count_inst->Amount -= amount; + break; + default: + count_inst->Unknown = 1; + return; + } + +} + +static int transform_const_loop(struct emulate_loop_state * s, + struct loop_info * loop, + struct rc_instruction * cond) +{ + int end_loops = 1; + int iterations; + struct count_inst count_inst; + float limit_value; + struct rc_src_register * counter; + struct rc_src_register * limit; + struct const_value counter_value; + struct rc_instruction * inst; + + /* Find the counter and the upper limit */ + + if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){ + limit = &cond->U.I.SrcReg[0]; + counter = &cond->U.I.SrcReg[1]; + } + else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){ + limit = &cond->U.I.SrcReg[1]; + counter = &cond->U.I.SrcReg[0]; + } + else{ + DBG("No constant limit.\n"); + return 0; + } + + /* Find the initial value of the counter */ + counter_value.Src = counter; + counter_value.Value = 0.0f; + counter_value.HasValue = 0; + counter_value.C = s->C; + for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop; + inst = inst->Next){ + rc_for_all_writes_mask(inst, update_const_value, &counter_value); + } + if(!counter_value.HasValue){ + DBG("Initial counter value cannot be determined.\n"); + return 0; + } + DBG("Initial counter value is %f\n", counter_value.Value); + /* Determine how the counter is modified each loop */ + count_inst.C = s->C; + count_inst.Index = counter->Index; + count_inst.Swz = counter->Swizzle; + count_inst.Amount = 0.0f; + count_inst.Unknown = 0; + for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ + switch(inst->U.I.Opcode){ + /* XXX In the future we might want to try to unroll nested + * loops here.*/ + case RC_OPCODE_BGNLOOP: + end_loops++; + break; + case RC_OPCODE_ENDLOOP: + loop->EndLoop = inst; + end_loops--; + break; + /* XXX Check if the counter is modified within an if statement. + */ + case RC_OPCODE_IF: + break; + default: + rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); + if(count_inst.Unknown){ + return 0; + } + break; + } + } + /* Infinite loop */ + if(count_inst.Amount == 0.0f){ + return 0; + } + DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); + /* Calculate the number of iterations of this loop. Keeping this + * simple, since we only support increment and decrement loops. + */ + limit_value = get_constant_value(s->C, limit, 0); + iterations = (int) ((limit_value - counter_value.Value) / + count_inst.Amount); + + DBG("Loop will have %d iterations.\n", iterations); + + /* Prepare loop for unrolling */ + rc_remove_instruction(loop->Cond); + rc_remove_instruction(loop->If); + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + + loop_unroll(s, loop, iterations); + loop->EndLoop = NULL; + return 1; +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * <Additional conditional code> -> <Additional conditional code> + * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> <Loop Body> + * <Loop Body> -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst A pointer to a BGNLOOP instruction. + * @return If the loop can be unrolled, a pointer to the first instruction of + * the unrolled loop. + * Otherwise, A pointer to the ENDLOOP instruction. + * Null if there is an error. + */ +static struct rc_instruction * transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst) +{ + struct loop_info *loop; + struct rc_instruction * ptr; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + memset(loop, 0, sizeof(struct loop_info)); + if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ + rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__); + return NULL; + } + loop->BeginLoop = inst; + + for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){ + switch(ptr->U.I.Opcode){ + case RC_OPCODE_BGNLOOP: + /* Nested loop */ + ptr = transform_loop(s, ptr); + if(!ptr){ + return NULL; + } + break; + case RC_OPCODE_BRK: + loop->Brk = ptr; + if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){ + rc_error(s->C, + "%s: expected ENDIF\n",__FUNCTION__); + return NULL; + } + loop->EndIf = ptr->Next; + if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){ + rc_error(s->C, + "%s: expected IF\n", __FUNCTION__); + return NULL; + } + loop->If = ptr->Prev; + switch(loop->If->Prev->U.I.Opcode){ + case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLE: + case RC_OPCODE_SEQ: + case RC_OPCODE_SNE: + break; + default: + rc_error(s->C, "%s expected conditional\n", + __FUNCTION__); + return NULL; + } + loop->Cond = loop->If->Prev; + ptr = loop->EndIf; + break; + case RC_OPCODE_ENDLOOP: + loop->EndLoop = ptr; + break; + } + } + /* Reverse the conditional instruction */ + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGE: + loop->Cond->U.I.Opcode = RC_OPCODE_SLT; + break; + case RC_OPCODE_SLT: + loop->Cond->U.I.Opcode = RC_OPCODE_SGE; + break; + case RC_OPCODE_SLE: + loop->Cond->U.I.Opcode = RC_OPCODE_SGT; + break; + case RC_OPCODE_SGT: + loop->Cond->U.I.Opcode = RC_OPCODE_SLE; + break; + case RC_OPCODE_SEQ: + loop->Cond->U.I.Opcode = RC_OPCODE_SNE; + break; + case RC_OPCODE_SNE: + loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; + break; + default: + rc_error(s->C, "loop->Cond is not a conditional.\n"); + return NULL; + } + + /* Check if the number of loops is known at compile time. */ + if(transform_const_loop(s, loop, ptr)){ + return loop->BeginLoop->Next; + } + + /* Prepare the loop to be unrolled */ + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); + return loop->EndLoop; +} + +static void rc_transform_loops(struct emulate_loop_state * s) +{ + struct rc_instruction * ptr = s->C->Program.Instructions.Next; + while(ptr != &s->C->Program.Instructions) { + if(ptr->Type == RC_INSTRUCTION_NORMAL && + ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + ptr = transform_loop(s, ptr); + if(!ptr){ + return; + } + } + ptr = ptr->Next; + } +} + +static void rc_unroll_loops(struct emulate_loop_state *s, + unsigned int max_instructions) +{ + int i; + /* Iterate backwards of the list of loops so that loops that nested + * loops are unrolled first. + */ + for( i = s->LoopCount - 1; i >= 0; i-- ){ + if(!s->Loops[i].EndLoop){ + continue; + } + unsigned int iterations = loop_calc_iterations(&s->Loops[i], + s->LoopCount, max_instructions); + loop_unroll(s, &s->Loops[i], iterations); + } +} + +void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions) +{ + struct emulate_loop_state s; + + memset(&s, 0, sizeof(struct emulate_loop_state)); + s.C = c; + + /* We may need to move these two operations to r3xx_(vert|frag)prog.c + * and run the optimization passes between them in order to increase + * the number of unrolls we can do for each loop. + */ + rc_transform_loops(&s); + + rc_unroll_loops(&s, max_instructions); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h new file mode 100644 index 00000000000..ddcf1c0fabe --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -0,0 +1,12 @@ + + +#ifndef RADEON_EMULATE_LOOPS_H +#define RADEON_EMULATE_LOOPS_H + +#define MAX_ITERATIONS 8 + +struct radeon_compiler; + +void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions); + +#endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index d593b3e81ae..1dc16855dc1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -368,6 +368,24 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .NumSrcRegs = 0 }, { + .Opcode = RC_OPCODE_BGNLOOP, + .Name = "BGNLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BRK, + .Name = "BRK", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDLOOP, + .Name = "ENDLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { .Opcode = RC_OPCODE_REPL_ALPHA, .Name = "REPL_ALPHA", .HasDstReg = 1 diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 87a2e23084c..91c82ac0890 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -180,6 +180,12 @@ typedef enum { /** branch instruction: has no effect */ RC_OPCODE_ENDIF, + + RC_OPCODE_BGNLOOP, + + RC_OPCODE_BRK, + + RC_OPCODE_ENDLOOP, /** special instruction, used in R300-R500 fragment program pair instructions * indicates that the result of the alpha operation shall be replicated diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 6992ca59dbf..e4b302bbad9 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -376,13 +376,12 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) ctx->Const.MaxDrawBuffers = 1; ctx->Const.MaxColorAttachments = 1; - /* currently bogus data */ if (r300->options.hw_tcl_enabled) { - ctx->Const.VertexProgram.MaxNativeInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAluInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ + ctx->Const.VertexProgram.MaxNativeInstructions = 255; + ctx->Const.VertexProgram.MaxNativeAluInstructions = 255; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; ctx->Const.VertexProgram.MaxNativeTemps = 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ + ctx->Const.VertexProgram.MaxNativeParameters = 256; ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 61133e686f1..88d6b06df56 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -6159,7 +6159,7 @@ GLboolean callPreSub(r700_AssemblerBase* pAsm, } if(uNumValidSrc > 0) { - prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr; + prelude_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr; pAsm->alu_x_opcode = SQ_CF_INST_ALU; } @@ -6279,7 +6279,7 @@ GLboolean callPreSub(r700_AssemblerBase* pAsm, next_ins(pAsm); - pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr; pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr; pAsm->alu_x_opcode = SQ_CF_INST_ALU; } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 5a90f729e68..aab1a7947ab 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -563,11 +563,15 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) /* see if we need any point_sprite replacements, also increase num_interp * as there's no vp output for them */ - for (i = FRAG_ATTRIB_TEX0; i<= FRAG_ATTRIB_TEX7; i++) + if (ctx->Point.PointSprite) { - if(ctx->Point.CoordReplace[i - FRAG_ATTRIB_TEX0] == GL_TRUE) { - ui++; - point_sprite = GL_TRUE; + for (i = FRAG_ATTRIB_TEX0; i<= FRAG_ATTRIB_TEX7; i++) + { + if (ctx->Point.CoordReplace[i - FRAG_ATTRIB_TEX0] == GL_TRUE) + { + ui++; + point_sprite = GL_TRUE; + } } } @@ -670,8 +674,9 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) for(i=0; i<8; i++) { + GLboolean coord_replace = ctx->Point.PointSprite && ctx->Point.CoordReplace[i]; unBit = 1 << (VERT_RESULT_TEX0 + i); - if((OutputsWritten & unBit) || (ctx->Point.CoordReplace[i] == GL_TRUE)) + if ((OutputsWritten & unBit) || coord_replace) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -679,7 +684,7 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) SEMANTIC_shift, SEMANTIC_mask); CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); /* ARB_point_sprite */ - if(ctx->Point.CoordReplace[i] == GL_TRUE) + if (coord_replace) { SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit); } diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index bcac125baf4..d2b190e42e0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -593,12 +593,7 @@ static int image_matches_texture_obj(struct gl_texture_object *texObj, if (!baseImage) return 0; - /* Check image level against object BaseLevel, but not MaxLevel. MaxLevel is not - * the highest level that can be assigned to the miptree. - */ - const unsigned maxLevel = texObj->BaseLevel + baseImage->MaxLog2; - if (level < texObj->BaseLevel || level > maxLevel - || level > RADEON_MIPTREE_MAX_TEXTURE_LEVELS) + if (level < texObj->BaseLevel || level > texObj->MaxLevel) return 0; const unsigned levelDiff = level - texObj->BaseLevel; @@ -620,7 +615,9 @@ static void teximage_assign_miptree(radeonContextPtr rmesa, radeonTexObj *t = radeon_tex_obj(texObj); radeon_texture_image* image = get_radeon_texture_image(texImage); - /* check image for dimension and level compatibility with texture */ + /* Since miptree holds only images for levels <BaseLevel..MaxLevel> + * don't allocate the miptree if the teximage won't fit. + */ if (!image_matches_texture_obj(texObj, texImage, level)) return; diff --git a/src/mesa/drivers/osmesa/Makefile b/src/mesa/drivers/osmesa/Makefile index ea49a896590..c6b4a040851 100644 --- a/src/mesa/drivers/osmesa/Makefile +++ b/src/mesa/drivers/osmesa/Makefile @@ -20,17 +20,11 @@ INCLUDE_DIRS = \ -I$(TOP)/src/mesa \ -I$(TOP)/src/mesa/main -# Standalone osmesa needs to be linked with core Mesa APIs -ifeq ($(DRIVER_DIRS), osmesa) CORE_MESA = \ $(TOP)/src/mesa/libmesa.a \ $(TOP)/src/mapi/glapi/libglapi.a \ $(TOP)/src/glsl/cl/libglslcl.a \ $(TOP)/src/glsl/pp/libglslpp.a -else -CORE_MESA = -endif - .c.o: $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ diff --git a/src/mesa/main/arbprogram.h b/src/mesa/main/arbprogram.h index df16513e398..787ffd62f4b 100644 --- a/src/mesa/main/arbprogram.h +++ b/src/mesa/main/arbprogram.h @@ -27,6 +27,10 @@ #define ARBPROGRAM_H +#include "compiler.h" +#include "glheader.h" + + extern void GLAPIENTRY _mesa_BindProgram(GLenum target, GLuint id); diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 1a2e9b1da6f..48b9904642a 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -147,6 +147,8 @@ invalidate_framebuffer(struct gl_framebuffer *fb) /** * Given a GL_*_ATTACHMENTn token, return a pointer to the corresponding * gl_renderbuffer_attachment object. + * This function is only used for user-created FB objects, not the + * default / window-system FB object. * If \p attachment is GL_DEPTH_STENCIL_ATTACHMENT, return a pointer to * the depth buffer attachment point. */ @@ -156,6 +158,8 @@ _mesa_get_attachment(GLcontext *ctx, struct gl_framebuffer *fb, { GLuint i; + assert(fb->Name > 0); + switch (attachment) { case GL_COLOR_ATTACHMENT0_EXT: case GL_COLOR_ATTACHMENT1_EXT: @@ -195,6 +199,45 @@ _mesa_get_attachment(GLcontext *ctx, struct gl_framebuffer *fb, /** + * As above, but only used for getting attachments of the default / + * window-system framebuffer (not user-created framebuffer objects). + */ +static struct gl_renderbuffer_attachment * +_mesa_get_fb0_attachment(GLcontext *ctx, struct gl_framebuffer *fb, + GLenum attachment) +{ + assert(fb->Name == 0); + + switch (attachment) { + case GL_FRONT_LEFT: + return &fb->Attachment[BUFFER_FRONT_LEFT]; + case GL_FRONT_RIGHT: + return &fb->Attachment[BUFFER_FRONT_RIGHT]; + case GL_BACK_LEFT: + return &fb->Attachment[BUFFER_BACK_LEFT]; + case GL_BACK_RIGHT: + return &fb->Attachment[BUFFER_BACK_RIGHT]; + case GL_AUX0: + if (fb->Visual.numAuxBuffers == 1) { + return &fb->Attachment[BUFFER_AUX0]; + } + return NULL; + case GL_DEPTH_BUFFER: + /* fall-through / new in GL 3.0 */ + case GL_DEPTH_ATTACHMENT_EXT: + return &fb->Attachment[BUFFER_DEPTH]; + case GL_STENCIL_BUFFER: + /* fall-through / new in GL 3.0 */ + case GL_STENCIL_ATTACHMENT_EXT: + return &fb->Attachment[BUFFER_STENCIL]; + default: + return NULL; + } +} + + + +/** * Remove any texture or renderbuffer attached to the given attachment * point. Update reference counts, etc. */ @@ -1878,12 +1921,14 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } if (buffer->Name == 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetFramebufferAttachmentParameterivEXT"); - return; + /* the default / window-system FBO */ + att = _mesa_get_fb0_attachment(ctx, buffer, attachment); + } + else { + /* user-created framebuffer FBO */ + att = _mesa_get_attachment(ctx, buffer, attachment); } - att = _mesa_get_attachment(ctx, buffer, attachment); if (att == NULL) { _mesa_error(ctx, GL_INVALID_ENUM, "glGetFramebufferAttachmentParameterivEXT(attachment)"); diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 01f84180af7..56558cfcc1e 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -879,6 +879,7 @@ _mesa_source_buffer_exists(GLcontext *ctx, GLenum format) return GL_FALSE; } ASSERT(_mesa_get_format_bits(ctx->ReadBuffer->_ColorReadBuffer->Format, GL_RED_BITS) > 0 || + _mesa_get_format_bits(ctx->ReadBuffer->_ColorReadBuffer->Format, GL_ALPHA_BITS) > 0 || _mesa_get_format_bits(ctx->ReadBuffer->_ColorReadBuffer->Format, GL_INDEX_BITS) > 0); break; case GL_DEPTH: diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 2101b9bc18d..8f7ebeed976 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1242,8 +1242,6 @@ st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, assert(xoffset % util_format_get_blockwidth(pformat) == 0); assert(yoffset % util_format_get_blockheight(pformat) == 0); - assert(width % util_format_get_blockwidth(pformat) == 0); - assert(height % util_format_get_blockheight(pformat) == 0); for (y = 0; y < height; y += util_format_get_blockheight(pformat)) { /* don't need to adjust for xoffset and yoffset as st_texture_image_map does that */ diff --git a/src/mesa/swrast_setup/ss_triangle.c b/src/mesa/swrast_setup/ss_triangle.c index bad0d819460..f22bc52f0a8 100644 --- a/src/mesa/swrast_setup/ss_triangle.c +++ b/src/mesa/swrast_setup/ss_triangle.c @@ -159,7 +159,7 @@ static void _swsetup_render_tri(GLcontext *ctx, } #define SS_COLOR(a,b) UNCLAMPED_FLOAT_TO_RGBA_CHAN(a,b) -#define SS_SPEC(a,b) UNCLAMPED_FLOAT_TO_RGB_CHAN(a,b) +#define SS_SPEC(a,b) COPY_4V(a,b) #define SS_IND(a,b) (a = b) #define IND (0) |