diff options
Diffstat (limited to 'src')
59 files changed, 841 insertions, 580 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index c5cf6d4a6c4..efd159f8869 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -325,16 +325,18 @@ lp_typekind_name(LLVMTypeKind t) return "LLVMArrayTypeKind"; case LLVMPointerTypeKind: return "LLVMPointerTypeKind"; +#if HAVE_LLVM < 0x0300 case LLVMOpaqueTypeKind: return "LLVMOpaqueTypeKind"; +#endif case LLVMVectorTypeKind: return "LLVMVectorTypeKind"; case LLVMMetadataTypeKind: return "LLVMMetadataTypeKind"; - /* Only in LLVM 2.7 and later??? +#if HAVE_LLVM == 0x0207 case LLVMUnionTypeKind: return "LLVMUnionTypeKind"; - */ +#endif default: return "unknown LLVMTypeKind"; } diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index e50db6d67fe..71fe53e3a27 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -257,7 +257,7 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, ret = u_upload_data( upload, min_out_offset, size, - map, + map + offset, out_offset, outbuf, flushed ); diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 28ff40a2328..1b30309bb58 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -99,11 +99,11 @@ static void i915_destroy(struct pipe_context *pipe) struct i915_context *i915 = i915_context(pipe); int i; - draw_destroy(i915->draw); - if (i915->blitter) util_blitter_destroy(i915->blitter); + draw_destroy(i915->draw); + if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index d85c0a6da41..8cb6cd0938e 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -35,6 +35,9 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_GLSL: return 1; + case PIPE_CAP_SM3: + /* TODO: >= nv4x support Shader Model 3.0 */ + return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index fbf25feaf20..4605c833dea 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2026,6 +2026,11 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + tmp = 0; + tmp |= S_008E2C_NUM_PS_LDS(0x1000); + tmp |= S_008E2C_NUM_LS_LDS(0x1000); + r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL); diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index d795f5757ed..96dbd4da91b 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -216,6 +216,13 @@ #define S_008C28_NUM_LS_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) #define G_008C28_NUM_LS_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) #define C_008C28_NUM_LS_STACK_ENTRIES(x) 0xF000FFFF +#define R_008E2C_SQ_LDS_RESOURCE_MGMT 0x00008E2C +#define S_008E2C_NUM_PS_LDS(x) (((x) & 0xFFFF) << 0) +#define G_008E2C_NUM_PS_LDS(x) (((x) >> 0) & 0xFFFF) +#define C_008E2C_NUM_PS_LDS(x) 0x0000FFFF +#define S_008E2C_NUM_LS_LDS(x) (((x) & 0xFFFF) << 16) +#define G_008E2C_NUM_LS_LDS(x) (((x) >> 16) & 0xFFFF) +#define C_008E2C_NUM_LS_LDS(x) 0xFFFF0000 #define R_008CF0_SQ_MS_FIFO_SIZES 0x00008CF0 #define S_008CF0_CACHE_FIFO_SIZE(x) (((x) & 0xFF) << 0) diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c index 6233fb81781..a7aafd846cd 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_api.c +++ b/src/gallium/state_trackers/glx/xlib/glx_api.c @@ -864,16 +864,19 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) parselist++; break; case GLX_FBCONFIG_ID: + case GLX_VISUAL_ID: if (!fbConfig) return NULL; parselist++; desiredVisualID = *parselist++; break; case GLX_X_RENDERABLE: + case GLX_MAX_PBUFFER_WIDTH: + case GLX_MAX_PBUFFER_HEIGHT: + case GLX_MAX_PBUFFER_PIXELS: if (!fbConfig) - return NULL; - parselist += 2; - /* ignore */ + return NULL; /* invalid config option */ + parselist += 2; /* ignore the parameter */ break; #ifdef GLX_EXT_texture_from_pixmap diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile index 832d7ba438f..69e7eecdf0c 100644 --- a/src/gallium/targets/egl-static/Makefile +++ b/src/gallium/targets/egl-static/Makefile @@ -42,7 +42,7 @@ egl_CPPFLAGS += \ -I$(TOP)/src/egl/main \ -D_EGL_MAIN=_eglMain egl_LIBS += $(TOP)/src/gallium/state_trackers/egl/libegl.a -egl_SYS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) -lEGL -lm +egl_SYS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) -lEGL -lm -lpthread # EGL platforms ifneq ($(findstring x11, $(EGL_PLATFORMS)),) diff --git a/src/gallium/winsys/i915/drm/i915_drm_buffer.c b/src/gallium/winsys/i915/drm/i915_drm_buffer.c index 89d8e89e6a7..ac66af333df 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_buffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_buffer.c @@ -218,6 +218,8 @@ i915_drm_buffer_is_busy(struct i915_winsys *iws, struct i915_winsys_buffer *buffer) { struct i915_drm_buffer* i915_buffer = i915_drm_buffer(buffer); + if (!i915_buffer) + return FALSE; return drm_intel_bo_busy(i915_buffer->bo); } diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 4d9dd505c41..60d2e289396 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -55,6 +55,7 @@ static const struct r600_reg evergreen_config_reg_list[] = { {R_008C24_SQ_STACK_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_008C28_SQ_STACK_RESOURCE_MGMT_3, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_008E2C_SQ_LDS_RESOURCE_MGMT, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, }; diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index 1255072a571..0f2f1a0eea4 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -165,8 +165,18 @@ ir_function_signature * ir_function::matching_signature(const exec_list *actual_parameters) { ir_function_signature *match = NULL; - int matched_score = 0; - + bool multiple_inexact_matches = false; + + /* From page 42 (page 49 of the PDF) of the GLSL 1.20 spec: + * + * "If an exact match is found, the other signatures are ignored, and + * the exact match is used. Otherwise, if no exact match is found, then + * the implicit conversions in Section 4.1.10 "Implicit Conversions" will + * be applied to the calling arguments if this can make their types match + * a signature. In this case, it is a semantic error if there are + * multiple ways to apply these conversions to the actual arguments of a + * call such that the call can be made to match multiple signatures." + */ foreach_iter(exec_list_iterator, iter, signatures) { ir_function_signature *const sig = (ir_function_signature *) iter.get(); @@ -178,13 +188,24 @@ ir_function::matching_signature(const exec_list *actual_parameters) if (score == 0) return sig; - /* If we found a match with fewer conversions, use that instead */ - if (score > 0 && (match == NULL || score < matched_score)) { - match = sig; - matched_score = score; + if (score > 0) { + if (match == NULL) + match = sig; + else + multiple_inexact_matches = true; } } + /* There is no exact match (we would have returned it by now). If there + * are multiple inexact matches, the call is ambiguous, which is an error. + * + * FINISHME: Report a decent error. Returning NULL will likely result in + * FINISHME: a "no matching signature" error; it should report that the + * FINISHME: call is ambiguous. But reporting errors from here is hard. + */ + if (multiple_inexact_matches) + return NULL; + return match; } diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index abfb32be3ae..d22118beb0b 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -33,6 +33,7 @@ #include "tnl/t_pipeline.h" #include "intel_span.h" #include "intel_tris.h" +#include "../glsl/ralloc.h" /*************************************** * Mesa's Driver Functions @@ -53,7 +54,7 @@ i830CreateContext(const struct gl_config * mesaVis, void *sharedContextPrivate) { struct dd_function_table functions; - struct i830_context *i830 = CALLOC_STRUCT(i830_context); + struct i830_context *i830 = rzalloc(NULL, struct i830_context); struct intel_context *intel = &i830->intel; struct gl_context *ctx = &intel->ctx; if (!i830) diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index d4af5e51026..71ce44fd5c9 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -76,7 +76,8 @@ translate_texture_format(GLuint mesa_format) case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); default: - fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); + fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + _mesa_get_format_name(mesa_format)); abort(); return 0; } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index f02f2d78267..11bee140ab6 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -36,6 +36,7 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" +#include "../glsl/ralloc.h" #include "i915_reg.h" #include "i915_program.h" @@ -97,8 +98,7 @@ i915CreateContext(int api, void *sharedContextPrivate) { struct dd_function_table functions; - struct i915_context *i915 = - (struct i915_context *) CALLOC_STRUCT(i915_context); + struct i915_context *i915 = rzalloc(NULL, struct i915_context); struct intel_context *intel = &i915->intel; struct gl_context *ctx = &intel->ctx; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index b67ebb9a1ec..e9e8078328a 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -210,6 +210,7 @@ get_result_vector(struct i915_fragment_program *p, case PROGRAM_OUTPUT: switch (inst->DstReg.Index) { case FRAG_RESULT_COLOR: + case FRAG_RESULT_DATA0: return UREG(REG_TYPE_OC, 0); case FRAG_RESULT_DEPTH: p->depth_written = 1; diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index 507adf1d3dc..ca1949b223e 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -538,7 +538,6 @@ i915_upload_program(struct i915_context *i915, { GLuint program_size = p->csr - p->program; GLuint decl_size = p->decl - p->declarations; - GLuint nr; if (p->error) return; @@ -555,33 +554,33 @@ i915_upload_program(struct i915_context *i915, i915->state.ProgramSize = decl_size + program_size; } - nr = p->nr_constants; - if (i915->state.ConstantSize != 2 + nr*4 || - memcmp(i915->state.Constant + 2, - p->constant, 4*sizeof(int)*nr)) { - if (nr) { - I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 1); - I915_STATECHANGE(i915, I915_UPLOAD_CONSTANTS); - - i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4); - i915->state.Constant[1] = (1 << nr) -1; - - memcpy(&i915->state.Constant[2], p->constant, 4 * sizeof(int) * nr); - i915->state.ConstantSize = 2 + nr * 4; - - if (0) { - GLuint i; - for (i = 0; i < nr; i++) { - fprintf(stderr, "const[%d]: %f %f %f %f\n", i, - p->constant[i][0], - p->constant[i][1], p->constant[i][2], p->constant[i][3]); - } - } - } - else { - I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 0); + /* Always seemed to get a failure if I used memcmp() to + * shortcircuit this state upload. Needs further investigation? + */ + if (p->nr_constants) { + GLuint nr = p->nr_constants; + + I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 1); + I915_STATECHANGE(i915, I915_UPLOAD_CONSTANTS); + + i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | ((nr) * 4); + i915->state.Constant[1] = (1 << (nr - 1)) | ((1 << (nr - 1)) - 1); + + memcpy(&i915->state.Constant[2], p->constant, 4 * sizeof(int) * (nr)); + i915->state.ConstantSize = 2 + (nr) * 4; + + if (0) { + GLuint i; + for (i = 0; i < nr; i++) { + fprintf(stderr, "const[%d]: %f %f %f %f\n", i, + p->constant[i][0], + p->constant[i][1], p->constant[i][2], p->constant[i][3]); + } } } + else { + I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 0); + } p->on_hardware = 1; } diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c b/src/mesa/drivers/dri/i915/i915_tex_layout.c index 6e4512129cd..e6a47116223 100644 --- a/src/mesa/drivers/dri/i915/i915_tex_layout.c +++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c @@ -219,9 +219,9 @@ i915_miptree_layout_2d(struct intel_context *intel, width, height, 1); if (mt->compressed) - img_height = MAX2(1, height / 4); + img_height = ALIGN(height, 4) / 4; else - img_height = (MAX2(2, height) + 1) & ~1; + img_height = ALIGN(height, 2); mt->total_height += img_height; diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index bcf42d59969..7cd6820cd51 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -82,6 +82,7 @@ translate_texture_format(gl_format mesa_format, GLenum DepthMode) case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); case MESA_FORMAT_S8_Z24: + case MESA_FORMAT_X8_Z24: if (DepthMode == GL_ALPHA) return (MAPSURF_32BIT | MT_32BIT_x8A24); else if (DepthMode == GL_INTENSITY) @@ -89,7 +90,8 @@ translate_texture_format(gl_format mesa_format, GLenum DepthMode) else return (MAPSURF_32BIT | MT_32BIT_x8L24); default: - fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); + fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + _mesa_get_format_name(mesa_format)); abort(); return 0; } diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 7bcb72f42d0..941c4350ddd 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -1078,6 +1078,13 @@ intelRunPipeline(struct gl_context * ctx) if (ctx->NewState) _mesa_update_state_locked(ctx); + /* We need to get this done before we start the pipeline, or a + * change in the INTEL_FALLBACK() of its intel_draw_buffers() call + * while the pipeline is running will result in mismatched swrast + * map/unmaps, and later assertion failures. + */ + intel_prepare_render(intel); + if (intel->NewGLState) { if (intel->NewGLState & _NEW_TEXTURE) { intel->vtbl.update_texture_state(intel); @@ -1092,7 +1099,9 @@ intelRunPipeline(struct gl_context * ctx) } intel_map_vertex_shader_textures(ctx); + intel->tnl_pipeline_running = true; _tnl_run_pipeline(ctx); + intel->tnl_pipeline_running = false; intel_unmap_vertex_shader_textures(ctx); _mesa_unlock_context_textures(ctx); @@ -1228,6 +1237,8 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) if (mode) { intel->Fallback |= bit; if (oldfallback == 0) { + assert(!intel->tnl_pipeline_running); + intel_flush(ctx); if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "ENTER FALLBACK %x: %s\n", @@ -1239,6 +1250,8 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) else { intel->Fallback &= ~bit; if (oldfallback == bit) { + assert(!intel->tnl_pipeline_running); + _swrast_flush(ctx); if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "LEAVE FALLBACK %s\n", getFallbackString(bit)); diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 94b8c20b019..9c26150d241 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -43,7 +43,8 @@ prepare_cc_vp(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_cc_viewport *ccv; - ccv = brw_state_batch(brw, sizeof(*ccv), 32, &brw->cc.vp_offset); + ccv = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, + sizeof(*ccv), 32, &brw->cc.vp_offset); /* _NEW_TRANSOFORM */ if (ctx->Transform.DepthClamp) { @@ -98,7 +99,8 @@ static void upload_cc_unit(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_cc_unit_state *cc; - cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + cc = brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(*cc), 64, &brw->cc.state_offset); memset(cc, 0, sizeof(*cc)); /* _NEW_STENCIL */ diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index b9efbb74c87..31fbadf5ef2 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -40,7 +40,8 @@ brw_prepare_clip_unit(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_clip_unit_state *clip; - clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); + clip = brw_state_batch(brw, AUB_TRACE_CLIP_STATE, + sizeof(*clip), 32, &brw->clip.state_offset); memset(clip, 0, sizeof(*clip)); /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 636821839a1..ac683bd9960 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -40,6 +40,7 @@ #include "brw_state.h" #include "intel_span.h" #include "tnl/t_pipeline.h" +#include "../glsl/ralloc.h" /*************************************** * Mesa's Driver Functions @@ -59,7 +60,7 @@ GLboolean brwCreateContext( int api, void *sharedContextPrivate) { struct dd_function_table functions; - struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); + struct brw_context *brw = rzalloc(NULL, struct brw_context); struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; unsigned i; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a8e2b802803..471015cf9d0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -188,6 +188,31 @@ struct brw_state_flags { GLuint cache; }; +enum state_struct_type { + AUB_TRACE_VS_STATE = 1, + AUB_TRACE_GS_STATE = 2, + AUB_TRACE_CLIP_STATE = 3, + AUB_TRACE_SF_STATE = 4, + AUB_TRACE_WM_STATE = 5, + AUB_TRACE_CC_STATE = 6, + AUB_TRACE_CLIP_VP_STATE = 7, + AUB_TRACE_SF_VP_STATE = 8, + AUB_TRACE_CC_VP_STATE = 0x9, + AUB_TRACE_SAMPLER_STATE = 0xa, + AUB_TRACE_KERNEL_INSTRUCTIONS = 0xb, + AUB_TRACE_SCRATCH_SPACE = 0xc, + AUB_TRACE_SAMPLER_DEFAULT_COLOR = 0xd, + + AUB_TRACE_SCISSOR_STATE = 0x15, + AUB_TRACE_BLEND_STATE = 0x16, + AUB_TRACE_DEPTH_STENCIL_STATE = 0x17, + + /* Not written to .aub files the same way the structures above are. */ + AUB_TRACE_NO_TYPE = 0x100, + AUB_TRACE_BINDING_TABLE = 0x101, + AUB_TRACE_SURFACE_STATE = 0x102, + AUB_TRACE_VS_CONSTANTS = 0x103, +}; /** Subclass of Mesa vertex program */ struct brw_vertex_program { @@ -744,6 +769,14 @@ struct brw_context int num_prepare_atoms, num_emit_atoms; struct brw_tracked_state prepare_atoms[64], emit_atoms[64]; + + /* If (INTEL_DEBUG & DEBUG_BATCH) */ + struct { + uint32_t offset; + uint32_t size; + enum state_struct_type type; + } *state_batch_list; + int state_batch_count; }; diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index bbfefcd816a..e0309e71fc3 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -41,7 +41,8 @@ brw_prepare_gs_unit(struct brw_context *brw) struct intel_context *intel = &brw->intel; struct brw_gs_unit_state *gs; - gs = brw_state_batch(brw, sizeof(*gs), 32, &brw->gs.state_offset); + gs = brw_state_batch(brw, AUB_TRACE_GS_STATE, + sizeof(*gs), 32, &brw->gs.state_offset); memset(gs, 0, sizeof(*gs)); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index eb3d103099b..9201be7caab 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -46,7 +46,8 @@ static void upload_sf_vp(struct brw_context *brw) const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); if (render_to_fbo) { @@ -129,7 +130,8 @@ static void upload_sf_unit( struct brw_context *brw ) int chipset_max_threads; bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; - sf = brw_state_batch(brw, sizeof(*sf), 64, &brw->sf.state_offset); + sf = brw_state_batch(brw, AUB_TRACE_SF_STATE, + sizeof(*sf), 64, &brw->sf.state_offset); memset(sf, 0, sizeof(*sf)); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b384651d8d0..cede4e5c916 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -172,6 +172,7 @@ void brw_destroy_caches( struct brw_context *brw ); sizeof(*(s)), false) void *brw_state_batch(struct brw_context *brw, + enum state_struct_type type, int size, int alignment, uint32_t *out_offset); diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 213c7a38d8c..5a983c3d847 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -32,6 +32,29 @@ #include "brw_state.h" #include "intel_batchbuffer.h" #include "main/imports.h" +#include "../glsl/ralloc.h" + +static void +brw_track_state_batch(struct brw_context *brw, + enum state_struct_type type, + uint32_t offset, + int size) +{ + struct intel_batchbuffer *batch = &brw->intel.batch; + + if (!brw->state_batch_list) { + /* Our structs are always aligned to at least 32 bytes, so + * our array doesn't need to be any larger + */ + brw->state_batch_list = ralloc_size(brw, sizeof(*brw->state_batch_list) * + batch->bo->size / 32); + } + + brw->state_batch_list[brw->state_batch_count].offset = offset; + brw->state_batch_list[brw->state_batch_count].size = size; + brw->state_batch_list[brw->state_batch_count].type = type; + brw->state_batch_count++; +} /** * Allocates a block of space in the batchbuffer for indirect state. @@ -49,6 +72,7 @@ */ void * brw_state_batch(struct brw_context *brw, + enum state_struct_type type, int size, int alignment, uint32_t *out_offset) @@ -71,6 +95,9 @@ brw_state_batch(struct brw_context *brw, batch->state_batch_offset = offset; + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) + brw_track_state_batch(brw, type, offset, size); + *out_offset = offset; return batch->map + (offset>>2); } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 7a3a88f04f5..b9e5cc1a534 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -31,44 +31,23 @@ #include "brw_context.h" #include "brw_defines.h" -/** - * Prints out a header, the contents, and the message associated with - * the hardware state data given. - * - * \param name Name of the state object - * \param data Pointer to the base of the state object - * \param hw_offset Hardware offset of the base of the state data. - * \param index Index of the DWORD being output. - */ static void -state_out(const char *name, void *data, uint32_t hw_offset, int index, - char *fmt, ...) -{ - va_list va; +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) PRINTFLIKE(5, 6); - fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", - name, hw_offset + index * 4, ((uint32_t *)data)[index]); - va_start(va, fmt); - vfprintf(stderr, fmt, va); - va_end(va); -} - -/** Generic, undecoded state buffer debug printout */ static void -state_struct_out(const char *name, drm_intel_bo *buffer, - unsigned int offset, unsigned int size) +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) { - int i; - - if (buffer == NULL) - return; - - drm_intel_bo_map(buffer, GL_FALSE); - for (i = 0; i < size / 4; i++) { - state_out(name, buffer->virtual + offset, buffer->offset + offset, i, - "dword %d\n", i); - } - drm_intel_bo_unmap(buffer); + struct intel_context *intel = &brw->intel; + uint32_t *data = intel->batch.bo->virtual + offset; + va_list va; + + fprintf(stderr, "0x%08x: 0x%08x: %8s: ", + offset + index * 4, data[index], name); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); } static const char * @@ -98,394 +77,536 @@ get_965_surface_format(unsigned int surface_format) } } -static void dump_wm_surface_state(struct brw_context *brw) +static void dump_vs_state(struct brw_context *brw, uint32_t offset) { - dri_bo *bo; - GLubyte *base; - int i; + struct intel_context *intel = &brw->intel; + const char *name = "VS_STATE"; + struct brw_vs_unit_state *vs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + vs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} - bo = brw->intel.batch.bo; - drm_intel_bo_map(bo, GL_FALSE); - base = bo->virtual; +static void dump_gs_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "GS_STATE"; + struct brw_gs_unit_state *gs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + gs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} - for (i = 0; i < brw->wm.nr_surfaces; i++) { - unsigned int surfoff; - uint32_t *surf; - char name[20]; +static void dump_clip_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "CLIP_STATE"; + struct brw_clip_unit_state *clip = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + clip->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "clip5\n"); + batch_out(brw, name, offset, 6, "clip6\n"); + batch_out(brw, name, offset, 7, "vp xmin %f\n", clip->viewport_xmin); + batch_out(brw, name, offset, 8, "vp xmax %f\n", clip->viewport_xmax); + batch_out(brw, name, offset, 9, "vp ymin %f\n", clip->viewport_ymin); + batch_out(brw, name, offset, 10, "vp ymax %f\n", clip->viewport_ymax); +} - if (brw->wm.surf_offset[i] == 0) { - fprintf(stderr, "WM SURF%d: NULL\n", i); - continue; - } - surfoff = bo->offset + brw->wm.surf_offset[i]; - surf = (uint32_t *)(base + brw->wm.surf_offset[i]); - - sprintf(name, "WM SURF%d", i); - state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), - get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); - state_out(name, surf, surfoff, 1, "offset\n"); - state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, - GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1); - state_out(name, surf, surfoff, 3, "pitch %d, %s tiled\n", - GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, - (surf[3] & BRW_SURFACE_TILED) ? - ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); - state_out(name, surf, surfoff, 4, "mip base %d\n", - GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); - state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), - GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); - } - drm_intel_bo_unmap(bo); +static void dump_sf_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "SF_STATE"; + struct brw_sf_unit_state *sf = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + sf->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "sf5: viewport offset\n"); + batch_out(brw, name, offset, 6, "sf6\n"); + batch_out(brw, name, offset, 7, "sf7\n"); } -static void dump_gen7_surface_state(struct brw_context *brw) +static void dump_wm_state(struct brw_context *brw, uint32_t offset) { - dri_bo *bo; - GLubyte *base; - int i; + struct intel_context *intel = &brw->intel; + const char *name = "WM_STATE"; + struct brw_wm_unit_state *wm = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "wm4\n"); + batch_out(brw, name, offset, 5, "wm5: %s%s%s%s%s%s, %d threads\n", + wm->wm5.enable_8_pix ? "8pix" : "", + wm->wm5.enable_16_pix ? "16pix" : "", + wm->wm5.program_uses_depth ? ", uses depth" : "", + wm->wm5.program_computes_depth ? ", computes depth" : "", + wm->wm5.program_uses_killpixel ? ", kills" : "", + wm->wm5.thread_dispatch_enable ? "" : ", no dispatch", + wm->wm5.max_threads + 1); + batch_out(brw, name, offset, 6, "depth offset constant %f\n", + wm->global_depth_offset_constant); + batch_out(brw, name, offset, 7, "depth offset scale %f\n", + wm->global_depth_offset_scale); + batch_out(brw, name, offset, 8, "wm8: kernel 1 (gen5+)\n"); + batch_out(brw, name, offset, 9, "wm9: kernel 2 (gen5+)\n"); + batch_out(brw, name, offset, 10, "wm10: kernel 3 (gen5+)\n"); +} - bo = brw->intel.batch.bo; - drm_intel_bo_map(bo, GL_FALSE); - base = bo->virtual; +static void dump_surface_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SURF"; + uint32_t *surf = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), + get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, + GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1, + GET_FIELD(surf[2], BRW_SURFACE_LOD)); + batch_out(brw, name, offset, 3, "pitch %d, %s tiled\n", + GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, + (surf[3] & BRW_SURFACE_TILED) ? + ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); + batch_out(brw, name, offset, 4, "mip base %d\n", + GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), + GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); +} - for (i = 0; i < brw->wm.nr_surfaces; i++) { - unsigned int surfoff; - struct gen7_surface_state *surf; - char name[20]; +static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SURF"; + struct gen7_surface_state *surf = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + surf->ss2.width + 1, surf->ss2.height + 1, surf->ss5.mip_count); + batch_out(brw, name, offset, 3, "pitch %d, %stiled\n", + surf->ss3.pitch + 1, surf->ss0.tiled_surface ? "" : "not "); + batch_out(brw, name, offset, 4, "mip base %d\n", + surf->ss5.min_lod); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + surf->ss5.x_offset, surf->ss5.y_offset); +} - if (brw->wm.surf_offset[i] == 0) { - fprintf(stderr, "WM SURF%d: NULL\n", i); - continue; - } - surfoff = bo->offset + brw->wm.surf_offset[i]; - surf = (struct gen7_surface_state *) (base + brw->wm.surf_offset[i]); - - sprintf(name, "WM SURF%d", i); - state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(surf->ss0.surface_type), - get_965_surface_format(surf->ss0.surface_format)); - state_out(name, surf, surfoff, 1, "offset\n"); - state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - surf->ss2.width + 1, surf->ss2.height + 1, surf->ss5.mip_count); - state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n", - surf->ss3.pitch + 1, surf->ss0.tiled_surface ? "" : "not "); - state_out(name, surf, surfoff, 4, "mip base %d\n", - surf->ss5.min_lod); - state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - surf->ss5.x_offset, surf->ss5.y_offset); +static void +dump_sdc(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SDC"; + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 5 && intel->gen <= 6) { + struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "unorm rgba\n"); + batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]); + batch_out(brw, name, offset, 3, "g %f\n", sdc->f[2]); + batch_out(brw, name, offset, 4, "a %f\n", sdc->f[3]); + batch_out(brw, name, offset, 5, "half float rg\n"); + batch_out(brw, name, offset, 6, "half float ba\n"); + batch_out(brw, name, offset, 7, "u16 rg\n"); + batch_out(brw, name, offset, 8, "u16 ba\n"); + batch_out(brw, name, offset, 9, "s16 rg\n"); + batch_out(brw, name, offset, 10, "s16 ba\n"); + batch_out(brw, name, offset, 11, "s8 rgba\n"); + } else { + struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "r %f\n", sdc->color[0]); + batch_out(brw, name, offset, 1, "g %f\n", sdc->color[1]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->color[2]); + batch_out(brw, name, offset, 3, "a %f\n", sdc->color[3]); } - drm_intel_bo_unmap(bo); } -static void dump_wm_sampler_state(struct brw_context *brw) +static void dump_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &brw->intel.ctx; int i; + struct brw_sampler_state *samp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - unsigned int offset; - uint32_t sdc_offset; - struct brw_sampler_state *samp; + for (i = 0; i < size / sizeof(*samp); i++) { char name[20]; - if (!ctx->Texture.Unit[i]._ReallyEnabled) { - fprintf(stderr, "WM SAMP%d: disabled\n", i); - continue; - } - - offset = (intel->batch.bo->offset + - brw->wm.sampler_offset + - i * sizeof(struct brw_sampler_state)); - samp = (struct brw_sampler_state *)(intel->batch.bo->virtual + - brw->wm.sampler_offset + - i * sizeof(struct brw_sampler_state)); - sprintf(name, "WM SAMP%d", i); - state_out(name, samp, offset, 0, "filtering\n"); - state_out(name, samp, offset, 1, "wrapping, lod\n"); - state_out(name, samp, offset, 2, "default color pointer\n"); - state_out(name, samp, offset, 3, "chroma key, aniso\n"); - - sprintf(name, " WM SDC%d", i); - - sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; - if (intel->gen >= 5) { - struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + - brw->wm.sdc_offset[i]); - state_out(name, sdc, sdc_offset, 0, "unorm rgba\n"); - state_out(name, sdc, sdc_offset, 1, "r %f\n", sdc->f[0]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->f[1]); - state_out(name, sdc, sdc_offset, 3, "g %f\n", sdc->f[2]); - state_out(name, sdc, sdc_offset, 4, "a %f\n", sdc->f[3]); - state_out(name, sdc, sdc_offset, 5, "half float rg\n"); - state_out(name, sdc, sdc_offset, 6, "half float ba\n"); - state_out(name, sdc, sdc_offset, 7, "u16 rg\n"); - state_out(name, sdc, sdc_offset, 8, "u16 ba\n"); - state_out(name, sdc, sdc_offset, 9, "s16 rg\n"); - state_out(name, sdc, sdc_offset, 10, "s16 ba\n"); - state_out(name, sdc, sdc_offset, 11, "s8 rgba\n"); - } else { - struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + - brw->wm.sdc_offset[i]); - state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); - state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); - state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); - } + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); + + samp++; + offset += sizeof(*samp); } - drm_intel_bo_unmap(intel->batch.bo); } -static void dump_gen7_sampler_state(struct brw_context *brw) +static void dump_gen7_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &brw->intel.ctx; + struct gen7_sampler_state *samp = intel->batch.bo->virtual + offset; int i; assert(intel->gen >= 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - unsigned int offset; - uint32_t sdc_offset; - struct gen7_sampler_state *samp; + for (i = 0; i < size / sizeof(*samp); i++) { char name[20]; - if (!ctx->Texture.Unit[i]._ReallyEnabled) { - fprintf(stderr, "WM SAMP%d: disabled\n", i); - continue; - } - - offset = (intel->batch.bo->offset + - brw->wm.sampler_offset + - i * sizeof(struct gen7_sampler_state)); - samp = (struct gen7_sampler_state *) - (intel->batch.bo->virtual + brw->wm.sampler_offset + - i * sizeof(struct gen7_sampler_state)); - sprintf(name, "WM SAMP%d", i); - state_out(name, samp, offset, 0, "filtering\n"); - state_out(name, samp, offset, 1, "wrapping, lod\n"); - state_out(name, samp, offset, 2, "default color pointer\n"); - state_out(name, samp, offset, 3, "chroma key, aniso\n"); - - sprintf(name, " WM SDC%d", i); - - sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; - struct brw_sampler_default_color *sdc = - intel->batch.bo->virtual + brw->wm.sdc_offset[i]; - state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); - state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); - state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); + + samp++; + offset += sizeof(*samp); } drm_intel_bo_unmap(intel->batch.bo); } -static void dump_sf_viewport_state(struct brw_context *brw) +static void dump_sf_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "SF VP"; - struct brw_sf_viewport *vp; - uint32_t vp_off; + struct brw_sf_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->sf.vp_offset; - vp_off = intel->batch.bo->offset + brw->sf.vp_offset; + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); - state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); - state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); - state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); - state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); - state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); - state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); - - state_out(name, vp, vp_off, 6, "top left = %d,%d\n", + batch_out(brw, name, offset, 6, "top left = %d,%d\n", vp->scissor.xmin, vp->scissor.ymin); - state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", + batch_out(brw, name, offset, 7, "bottom right = %d,%d\n", vp->scissor.xmax, vp->scissor.ymax); - - drm_intel_bo_unmap(intel->batch.bo); } -static void dump_clip_viewport_state(struct brw_context *brw) +static void dump_clip_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "CLIP VP"; - struct brw_clipper_viewport *vp; - uint32_t vp_off; + struct brw_clipper_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->clip.vp_offset; - vp_off = intel->batch.bo->offset + brw->clip.vp_offset; - - state_out(name, vp, vp_off, 0, "xmin = %f\n", vp->xmin); - state_out(name, vp, vp_off, 1, "xmax = %f\n", vp->xmax); - state_out(name, vp, vp_off, 2, "ymin = %f\n", vp->ymin); - state_out(name, vp, vp_off, 3, "ymax = %f\n", vp->ymax); - drm_intel_bo_unmap(intel->batch.bo); + batch_out(brw, name, offset, 0, "xmin = %f\n", vp->xmin); + batch_out(brw, name, offset, 1, "xmax = %f\n", vp->xmax); + batch_out(brw, name, offset, 2, "ymin = %f\n", vp->ymin); + batch_out(brw, name, offset, 3, "ymax = %f\n", vp->ymax); } -static void dump_sf_clip_viewport_state(struct brw_context *brw) +static void dump_sf_clip_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "SF_CLIP VP"; - struct gen7_sf_clip_viewport *vp; - uint32_t vp_off; + struct gen7_sf_clip_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen >= 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->sf.vp_offset; - vp_off = intel->batch.bo->offset + brw->sf.vp_offset; - - state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); - state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); - state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); - state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); - state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); - state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); - state_out(name, vp, vp_off, 6, "guardband xmin = %f\n", vp->guardband.xmin); - state_out(name, vp, vp_off, 7, "guardband xmax = %f\n", vp->guardband.xmax); - state_out(name, vp, vp_off, 8, "guardband ymin = %f\n", vp->guardband.ymin); - state_out(name, vp, vp_off, 9, "guardband ymax = %f\n", vp->guardband.ymax); - drm_intel_bo_unmap(intel->batch.bo); + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); + batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin); + batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax); + batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin); + batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax); } -static void dump_cc_viewport_state(struct brw_context *brw) +static void dump_cc_viewport_state(struct brw_context *brw, uint32_t offset) { - struct intel_context *intel = &brw->intel; const char *name = "CC VP"; - struct brw_cc_viewport *vp; - uint32_t vp_off; + struct brw_cc_viewport *vp = brw->intel.batch.bo->virtual + offset; - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->cc.vp_offset; - vp_off = intel->batch.bo->offset + brw->cc.vp_offset; + batch_out(brw, name, offset, 0, "min_depth = %f\n", vp->min_depth); + batch_out(brw, name, offset, 1, "max_depth = %f\n", vp->max_depth); +} - state_out(name, vp, vp_off, 0, "min_depth = %f\n", vp->min_depth); - state_out(name, vp, vp_off, 1, "max_depth = %f\n", vp->max_depth); - drm_intel_bo_unmap(intel->batch.bo); +static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "D_S"; + struct gen6_depth_stencil_state *ds = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, + "stencil %sable, func %d, write %sable\n", + ds->ds0.stencil_enable ? "en" : "dis", + ds->ds0.stencil_func, + ds->ds0.stencil_write_enable ? "en" : "dis"); + batch_out(brw, name, offset, 1, + "stencil test mask 0x%x, write mask 0x%x\n", + ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); + batch_out(brw, name, offset, 2, + "depth test %sable, func %d, write %sable\n", + ds->ds2.depth_test_enable ? "en" : "dis", + ds->ds2.depth_test_func, + ds->ds2.depth_write_enable ? "en" : "dis"); } -static void dump_depth_stencil_state(struct brw_context *brw) +static void dump_cc_state_gen4(struct brw_context *brw, uint32_t offset) { - struct intel_context *intel = &brw->intel; - const char *name = "DEPTH STENCIL"; - struct gen6_depth_stencil_state *ds; - uint32_t ds_off; - - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - ds = intel->batch.bo->virtual + brw->cc.depth_stencil_state_offset; - ds_off = intel->batch.bo->offset + brw->cc.depth_stencil_state_offset; - - state_out(name, ds, ds_off, 0, "stencil %sable, func %d, write %sable\n", - ds->ds0.stencil_enable ? "en" : "dis", - ds->ds0.stencil_func, - ds->ds0.stencil_write_enable ? "en" : "dis"); - state_out(name, ds, ds_off, 1, "stencil test mask 0x%x, write mask 0x%x\n", - ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); - state_out(name, ds, ds_off, 2, "depth test %sable, func %d, write %sable\n", - ds->ds2.depth_test_enable ? "en" : "dis", - ds->ds2.depth_test_func, - ds->ds2.depth_write_enable ? "en" : "dis"); - drm_intel_bo_unmap(intel->batch.bo); + const char *name = "CC"; + + batch_out(brw, name, offset, 0, "cc0\n"); + batch_out(brw, name, offset, 1, "cc1\n"); + batch_out(brw, name, offset, 2, "cc2\n"); + batch_out(brw, name, offset, 3, "cc3\n"); + batch_out(brw, name, offset, 4, "cc4: viewport offset\n"); + batch_out(brw, name, offset, 5, "cc5\n"); + batch_out(brw, name, offset, 6, "cc6\n"); + batch_out(brw, name, offset, 7, "cc7\n"); } -static void dump_cc_state(struct brw_context *brw) +static void dump_cc_state_gen6(struct brw_context *brw, uint32_t offset) { const char *name = "CC"; - struct gen6_color_calc_state *cc; - uint32_t cc_off; - dri_bo *bo = brw->intel.batch.bo; - - if (brw->cc.state_offset == 0) - return; - - drm_intel_bo_map(bo, GL_FALSE); - cc = bo->virtual + brw->cc.state_offset; - cc_off = bo->offset + brw->cc.state_offset; - - state_out(name, cc, cc_off, 0, "alpha test format %s, round disable %d, stencil ref %d," - "bf stencil ref %d\n", - cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", - cc->cc0.round_disable, - cc->cc0.stencil_ref, - cc->cc0.bf_stencil_ref); - state_out(name, cc, cc_off, 1, "\n"); - state_out(name, cc, cc_off, 2, "constant red %f\n", cc->constant_r); - state_out(name, cc, cc_off, 3, "constant green %f\n", cc->constant_g); - state_out(name, cc, cc_off, 4, "constant blue %f\n", cc->constant_b); - state_out(name, cc, cc_off, 5, "constant alpha %f\n", cc->constant_a); - - drm_intel_bo_unmap(bo); + struct gen6_color_calc_state *cc = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, + "alpha test format %s, round disable %d, stencil ref %d, " + "bf stencil ref %d\n", + cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", + cc->cc0.round_disable, + cc->cc0.stencil_ref, + cc->cc0.bf_stencil_ref); + batch_out(brw, name, offset, 1, "\n"); + batch_out(brw, name, offset, 2, "constant red %f\n", cc->constant_r); + batch_out(brw, name, offset, 3, "constant green %f\n", cc->constant_g); + batch_out(brw, name, offset, 4, "constant blue %f\n", cc->constant_b); + batch_out(brw, name, offset, 5, "constant alpha %f\n", cc->constant_a); +} +static void dump_blend_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "BLEND"; + + batch_out(brw, name, offset, 0, "\n"); + batch_out(brw, name, offset, 1, "\n"); } -static void dump_blend_state(struct brw_context *brw) +static void +dump_scissor(struct brw_context *brw, uint32_t offset) { + const char *name = "SCISSOR"; struct intel_context *intel = &brw->intel; - const char *name = "BLEND"; - struct gen6_blend_state *blend; - uint32_t blend_off; + struct gen6_scissor_rect *scissor = intel->batch.bo->virtual + offset; - drm_intel_bo_map(intel->batch.bo, GL_FALSE); + batch_out(brw, name, offset, 0, "xmin %d, ymin %d\n", + scissor->xmin, scissor->ymin); + batch_out(brw, name, offset, 1, "xmax %d, ymax %d\n", + scissor->xmax, scissor->ymax); +} - blend = intel->batch.bo->virtual + brw->cc.blend_state_offset; - blend_off = intel->batch.bo->offset + brw->cc.blend_state_offset; +static void +dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "VS_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; - state_out(name, blend, blend_off, 0, "\n"); - state_out(name, blend, blend_off, 1, "\n"); + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} - drm_intel_bo_unmap(intel->batch.bo); +static void dump_binding_table(struct brw_context *brw, uint32_t offset, + uint32_t size) +{ + char name[20]; + int i; + uint32_t *data = brw->intel.batch.bo->virtual + offset; + + for (i = 0; i < size / 4; i++) { + if (data[i] == 0) + continue; + sprintf(name, "BIND%d", i); + batch_out(brw, name, offset, i, "surface state address\n"); + } } -static void brw_debug_prog(struct brw_context *brw, - const char *name, uint32_t prog_offset) +static void +dump_prog_cache(struct brw_context *brw) { - unsigned int i; + struct intel_context *intel = &brw->intel; + struct brw_cache *cache = &brw->cache; + unsigned int b, i; uint32_t *data; drm_intel_bo_map(brw->cache.bo, false); - data = brw->cache.bo->virtual + prog_offset; - - for (i = 0; i < brw->cache.bo->size / 4 / 4; i++) { - fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", - name, (unsigned int)brw->cache.bo->offset + i * 4 * 4, - data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); - /* Stop at the end of the program. It'd be nice to keep track of the actual - * intended program size instead of guessing like this. - */ - if (data[i * 4 + 0] == 0 && - data[i * 4 + 1] == 0 && - data[i * 4 + 2] == 0 && - data[i * 4 + 3] == 0) - break; + for (b = 0; b < cache->size; b++) { + struct brw_cache_item *item; + + for (item = cache->items[b]; item; item = item->next) { + const char *name; + uint32_t offset = item->offset; + + data = brw->cache.bo->virtual + item->offset; + + switch (item->cache_id) { + case BRW_VS_PROG: + name = "VS kernel"; + break; + case BRW_GS_PROG: + name = "GS kernel"; + break; + case BRW_CLIP_PROG: + name = "CLIP kernel"; + break; + case BRW_SF_PROG: + name = "SF kernel"; + break; + case BRW_WM_PROG: + name = "WM kernel"; + break; + default: + name = "unknown"; + break; + } + + for (i = 0; i < item->size / 4 / 4; i++) { + fprintf(stderr, "0x%08x: %8s: 0x%08x 0x%08x 0x%08x 0x%08x ", + offset + i * 4 * 4, + name, + data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + + brw_disasm(stderr, (void *)(data + i * 4), intel->gen); + } + } } drm_intel_bo_unmap(brw->cache.bo); } +static void +dump_state_batch(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + int i; + + for (i = 0; i < brw->state_batch_count; i++) { + uint32_t offset = brw->state_batch_list[i].offset; + uint32_t size = brw->state_batch_list[i].size; + + switch (brw->state_batch_list[i].type) { + case AUB_TRACE_VS_STATE: + dump_vs_state(brw, offset); + break; + case AUB_TRACE_GS_STATE: + dump_gs_state(brw, offset); + break; + case AUB_TRACE_CLIP_STATE: + dump_clip_state(brw, offset); + break; + case AUB_TRACE_SF_STATE: + dump_sf_state(brw, offset); + break; + case AUB_TRACE_WM_STATE: + dump_wm_state(brw, offset); + break; + case AUB_TRACE_CLIP_VP_STATE: + dump_clip_viewport_state(brw, offset); + break; + case AUB_TRACE_SF_VP_STATE: + if (intel->gen >= 7) { + dump_sf_clip_viewport_state(brw, offset); + } else { + dump_sf_viewport_state(brw, offset); + } + break; + case AUB_TRACE_CC_VP_STATE: + dump_cc_viewport_state(brw, offset); + break; + case AUB_TRACE_DEPTH_STENCIL_STATE: + dump_depth_stencil_state(brw, offset); + break; + case AUB_TRACE_CC_STATE: + if (intel->gen >= 6) + dump_cc_state_gen6(brw, offset); + else + dump_cc_state_gen4(brw, offset); + break; + case AUB_TRACE_BLEND_STATE: + dump_blend_state(brw, offset); + break; + case AUB_TRACE_BINDING_TABLE: + dump_binding_table(brw, offset, size); + break; + case AUB_TRACE_SURFACE_STATE: + if (intel->gen < 7) { + dump_surface_state(brw, offset); + } else { + dump_gen7_surface_state(brw, offset); + } + break; + case AUB_TRACE_SAMPLER_STATE: + if (intel->gen < 7) { + dump_sampler_state(brw, offset, size); + } else { + dump_gen7_sampler_state(brw, offset, size); + } + break; + case AUB_TRACE_SAMPLER_DEFAULT_COLOR: + dump_sdc(brw, offset); + break; + case AUB_TRACE_SCISSOR_STATE: + dump_scissor(brw, offset); + break; + case AUB_TRACE_VS_CONSTANTS: + dump_vs_constants(brw, offset, size); + break; + default: + break; + } + } +} /** * Print additional debug information associated with the batchbuffer @@ -501,51 +622,10 @@ void brw_debug_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); - state_struct_out("WM bind", - brw->intel.batch.bo, - brw->wm.bind_bo_offset, - 4 * brw->wm.nr_surfaces); - if (intel->gen < 7) { - dump_wm_surface_state(brw); - dump_wm_sampler_state(brw); - } else { - dump_gen7_surface_state(brw); - dump_gen7_sampler_state(brw); - } - - if (intel->gen < 6) - state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, - sizeof(struct brw_vs_unit_state)); - brw_debug_prog(brw, "VS prog", brw->vs.prog_offset); - - if (intel->gen < 6) - state_struct_out("GS", intel->batch.bo, brw->gs.state_offset, - sizeof(struct brw_gs_unit_state)); - if (brw->gs.prog_active) { - brw_debug_prog(brw, "GS prog", brw->gs.prog_offset); - } + drm_intel_bo_map(intel->batch.bo, false); + dump_state_batch(brw); + drm_intel_bo_unmap(intel->batch.bo); - if (intel->gen < 6) { - state_struct_out("SF", intel->batch.bo, brw->sf.state_offset, - sizeof(struct brw_sf_unit_state)); - brw_debug_prog(brw, "SF prog", brw->sf.prog_offset); - } - if (intel->gen >= 7) - dump_sf_clip_viewport_state(brw); - else - dump_sf_viewport_state(brw); - if (intel->gen == 6) - dump_clip_viewport_state(brw); - - if (intel->gen < 6) - state_struct_out("WM", intel->batch.bo, brw->wm.state_offset, - sizeof(struct brw_wm_unit_state)); - brw_debug_prog(brw, "WM prog", brw->wm.prog_offset); - - if (intel->gen >= 6) { - dump_cc_viewport_state(brw); - dump_depth_stencil_state(brw); - dump_cc_state(brw); - dump_blend_state(brw); - } + if (0) + dump_prog_cache(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 179ca199b45..fc4373ab311 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -43,7 +43,8 @@ brw_prepare_vs_unit(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_vs_unit_state *vs; - vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset); + vs = brw_state_batch(brw, AUB_TRACE_VS_STATE, + sizeof(*vs), 32, &brw->vs.state_offset); memset(vs, 0, sizeof(*vs)); /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 611f6333689..f9ee4d112a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -182,7 +182,8 @@ static void upload_vs_surfaces(struct brw_context *brw) /* Might want to calculate nr_surfaces first, to avoid taking up so much * space for the binding table. (once we have vs samplers) */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF, + bind = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(uint32_t) * BRW_VS_MAX_SURF, 32, &brw->vs.bind_bo_offset); for (i = 0; i < BRW_VS_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 8612e743265..6aeeda6e0fa 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -123,6 +123,8 @@ static void brw_new_batch( struct intel_context *intel ) */ intel->batch.need_workaround_flush = true; + brw->state_batch_count = 0; + brw->vb.nr_current_buffers = 0; /* Mark that the current program cache BO has been used by the GPU. diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 59dcda7b414..7cd3edad235 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -563,13 +563,14 @@ static void precalc_dst( struct brw_wm_compile *c, struct prog_src_register src0 = inst->SrcReg[0]; struct prog_src_register src1 = inst->SrcReg[1]; struct prog_dst_register dst = inst->DstReg; - + struct prog_dst_register temp = get_temp(c); + if (dst.WriteMask & WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ emit_op(c, OPCODE_MUL, - dst_mask(dst, WRITEMASK_Y), + dst_mask(temp, WRITEMASK_Y), inst->SaturateMode, src0, src1, @@ -584,7 +585,7 @@ static void precalc_dst( struct brw_wm_compile *c, */ swz = emit_op(c, OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XZ), + dst_mask(temp, WRITEMASK_XZ), inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), src_undef(), @@ -597,12 +598,26 @@ static void precalc_dst( struct brw_wm_compile *c, */ emit_op(c, OPCODE_MOV, - dst_mask(dst, WRITEMASK_W), + dst_mask(temp, WRITEMASK_W), inst->SaturateMode, src1, src_undef(), src_undef()); } + + /* This will get optimized out in general, but it ensures that we + * don't overwrite src operands in our channel-wise splitting + * above. See piglit fp-dst-aliasing-[12]. + */ + emit_op(c, + OPCODE_MOV, + dst, + 0, + src_reg_from_dst(temp), + src_undef(), + src_undef()); + + release_temp(c, temp); } @@ -611,7 +626,17 @@ static void precalc_lit( struct brw_wm_compile *c, { struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register dst = inst->DstReg; - + + if (dst.WriteMask & WRITEMASK_YZ) { + emit_op(c, + OPCODE_LIT, + dst_mask(dst, WRITEMASK_YZ), + inst->SaturateMode, + src0, + src_undef(), + src_undef()); + } + if (dst.WriteMask & WRITEMASK_XW) { struct prog_instruction *swz; @@ -627,16 +652,6 @@ static void precalc_lit( struct brw_wm_compile *c, /* Avoid letting the negation flag of src0 affect our 1 constant. */ swz->SrcReg[0].Negate = NEGATE_NONE; } - - if (dst.WriteMask & WRITEMASK_YZ) { - emit_op(c, - OPCODE_LIT, - dst_mask(dst, WRITEMASK_YZ), - inst->SaturateMode, - src0, - src_undef(), - src_undef()); - } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 5de39aa4575..98146136703 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -108,7 +108,8 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, if (intel->gen == 5 || intel->gen == 6) { struct gen5_sampler_default_color *sdc; - sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); + sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); memset(sdc, 0, sizeof(*sdc)); @@ -144,7 +145,8 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, } else { struct brw_sampler_default_color *sdc; - sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); + sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); COPY_4V(sdc->color, color); } @@ -326,7 +328,8 @@ prepare_wm_samplers(struct brw_context *brw) if (brw->wm.sampler_count == 0) return; - samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->wm.sampler_offset); memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 506e2bdff5b..c820ce48c29 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -78,7 +78,8 @@ brw_prepare_wm_unit(struct brw_context *brw) const struct gl_fragment_program *fp = brw->fragment_program; struct brw_wm_unit_state *wm; - wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.state_offset); + wm = brw_state_batch(brw, AUB_TRACE_WM_STATE, + sizeof(*wm), 32, &brw->wm.state_offset); memset(wm, 0, sizeof(*wm)); if (brw->wm.prog_data->prog_offset_16) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 89fea9cc952..fb4fb146f8d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -226,7 +226,8 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) const GLuint surf_index = SURF_INDEX_TEXTURE(unit); uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[surf_index]); surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | @@ -272,7 +273,8 @@ brw_create_constant_surface(struct brw_context *brw, const GLint w = width - 1; uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, out_offset); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, out_offset); surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | @@ -404,7 +406,8 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) struct intel_context *intel = &brw->intel; uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[unit]); surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); @@ -439,7 +442,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, uint32_t tile_x, tile_y; uint32_t format = 0; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[unit]); switch (irb->Base.Format) { case MESA_FORMAT_XRGB8888: @@ -637,7 +641,8 @@ brw_wm_upload_binding_table(struct brw_context *brw) /* Might want to calculate nr_surfaces first, to avoid taking up so much * space for the binding table. */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_WM_MAX_SURF, + bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, + sizeof(uint32_t) * BRW_WM_MAX_SURF, 32, &brw->wm.bind_bo_offset); for (i = 0; i < BRW_WM_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 294d5a5e644..41d13ad2bf4 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -51,7 +51,8 @@ prepare_blend_state(struct brw_context *brw) nr_draw_buffers = 1; size = sizeof(*blend) * nr_draw_buffers; - blend = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset); + blend = brw_state_batch(brw, AUB_TRACE_BLEND_STATE, + size, 64, &brw->cc.blend_state_offset); memset(blend, 0, size); @@ -139,7 +140,8 @@ gen6_prepare_color_calc_state(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct gen6_color_calc_state *cc; - cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + cc = brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(*cc), 64, &brw->cc.state_offset); memset(cc, 0, sizeof(*cc)); /* _NEW_COLOR */ diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c index 775e1ce2c9c..5d14147db3d 100644 --- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -34,7 +34,8 @@ gen6_prepare_depth_stencil_state(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct gen6_depth_stencil_state *ds; - ds = brw_state_batch(brw, sizeof(*ds), 64, + ds = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*ds), 64, &brw->cc.depth_stencil_state_offset); memset(ds, 0, sizeof(*ds)); diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index 7492e508864..dc73b10f4cd 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -39,7 +39,8 @@ gen6_upload_scissor_state(struct brw_context *brw) struct gen6_scissor_rect *scissor; uint32_t scissor_state_offset; - scissor = brw_state_batch(brw, sizeof(*scissor), 32, &scissor_state_offset); + scissor = brw_state_batch(brw, AUB_TRACE_SCISSOR_STATE, + sizeof(*scissor), 32, &scissor_state_offset); /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index c6c55c926c7..a4bfa54837d 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -43,7 +43,8 @@ prepare_clip_vp(struct brw_context *brw) { struct brw_clipper_viewport *vp; - vp = brw_state_batch(brw, sizeof(*vp), 32, &brw->clip.vp_offset); + vp = brw_state_batch(brw, AUB_TRACE_CLIP_VP_STATE, + sizeof(*vp), 32, &brw->clip.vp_offset); vp->xmin = -1.0; vp->xmax = 1.0; @@ -72,7 +73,8 @@ prepare_sf_vp(struct brw_context *brw) const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); /* _NEW_BUFFERS */ diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 022e23e12b0..fb4cdbaadf9 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -60,7 +60,7 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) float *param; int i; - param = brw_state_batch(brw, + param = brw_state_batch(brw, AUB_TRACE_VS_CONSTANTS, (MAX_CLIP_PLANES + nr_params) * 4 * sizeof(float), 32, &brw->vs.push_const_offset); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 9ef6133e2b9..185da9c355f 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -54,7 +54,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, + constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index 95f6fbf7414..e787c21f4d1 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -183,7 +183,8 @@ gen7_prepare_samplers(struct brw_context *brw) if (brw->wm.sampler_count == 0) return; - samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->wm.sampler_offset); memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 99efe96a1fa..0f97cea652d 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -39,11 +39,12 @@ upload_sbe_state(struct brw_context *brw) uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); uint32_t dw1, dw10, dw11; int i; - int attr = 0; + int attr = 0, input_index = 0; /* _NEW_TRANSFORM */ int urb_start = ctx->Transform.ClipPlanesEnabled ? 2 : 1; /* _NEW_LIGHT */ int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + uint16_t attr_overrides[FRAG_ATTRIB_MAX]; /* FINISHME: Attribute Swizzle Control Mode? */ dw1 = @@ -57,12 +58,6 @@ upload_sbe_state(struct brw_context *brw) dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; dw10 = 0; - if (ctx->Point.PointSprite) { - for (i = 0; i < 8; i++) { - if (ctx->Point.CoordReplace[i]) - dw10 |= (1 << i); - } - } /* _NEW_LIGHT (flat shading) */ dw11 = 0; @@ -71,30 +66,43 @@ upload_sbe_state(struct brw_context *brw) ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1)); } + /* Create the mapping from the FS inputs we produce to the VS outputs + * they source from. + */ + for (; attr < FRAG_ATTRIB_MAX; attr++) { + if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr))) + continue; + + if (ctx->Point.PointSprite && + attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7 && + ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) { + dw10 |= (1 << input_index); + } + + if (attr == FRAG_ATTRIB_PNTC) + dw10 |= (1 << input_index); + + /* The hardware can only do the overrides on 16 overrides at a + * time, and the other up to 16 have to be lined up so that the + * input index = the output index. We'll need to do some + * tweaking to make sure that's the case. + */ + assert(input_index < 16 || attr == input_index); + + attr_overrides[input_index++] = get_attr_override(brw, attr, + two_side_color); + } + + for (; attr < FRAG_ATTRIB_MAX; attr++) + attr_overrides[input_index++] = 0; + BEGIN_BATCH(14); OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); OUT_BATCH(dw1); /* Output dwords 2 through 9 */ for (i = 0; i < 8; i++) { - uint32_t attr_overrides = 0; - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color); - attr++; - break; - } - } - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16; - attr++; - break; - } - } - OUT_BATCH(attr_overrides); + OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16); } OUT_BATCH(dw10); /* point sprite texcoord bitmask */ diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c index 838ad3a3948..e9aacd56317 100644 --- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c @@ -36,7 +36,8 @@ prepare_sf_clip_viewport(struct brw_context *brw) const GLfloat *v = ctx->Viewport._WindowMap.m; struct gen7_sf_clip_viewport *vp; - vp = brw_state_batch(brw, sizeof(vp), 64, &brw->sf.vp_offset); + vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(vp), 64, &brw->sf.vp_offset); /* Also assign to clip.vp_offset in case something uses it. */ brw->clip.vp_offset = brw->sf.vp_offset; diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 0f5b06cbf48..a102ca772b3 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -51,7 +51,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, + constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 9994b67bfc5..4add1a69f02 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -65,8 +65,8 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit) const GLuint surf_index = SURF_INDEX_TEXTURE(unit); struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[surf_index]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = translate_tex_target(tObj->Target); @@ -135,7 +135,8 @@ gen7_create_constant_surface(struct brw_context *brw, const GLint w = width - 1; struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, out_offset); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, out_offset); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = BRW_SURFACE_BUFFER; @@ -210,8 +211,8 @@ gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit) { struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = BRW_SURFACE_NULL; @@ -235,8 +236,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, struct gen7_surface_state *surf; uint32_t tile_x, tile_y; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); switch (irb->Base.Format) { diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 292b7b034ee..2ba13632569 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -56,7 +56,7 @@ #include "drirenderbuffer.h" #include "utils.h" - +#include "../glsl/ralloc.h" #ifndef INTEL_DEBUG int INTEL_DEBUG = (0); @@ -924,7 +924,7 @@ intelDestroyContext(__DRIcontext * driContextPriv) _math_matrix_dtr(&intel->ViewportMatrix); - FREE(intel); + ralloc_free(intel); driContextPriv->driverPrivate = NULL; } } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 148fb0c2c9a..1727badb704 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -199,6 +199,7 @@ struct intel_context drm_intel_bo *first_post_swapbuffers_batch; GLboolean need_throttle; GLboolean no_batch_wrap; + bool tnl_pipeline_running; /**< Set while i915's _tnl_run_pipeline. */ struct { diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 269faefa1c0..1f8b885bbec 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -120,25 +120,6 @@ intel_miptree_create_for_teximage(struct intel_context *intel, expect_accelerated_upload); } - - - -static GLuint -target_to_face(GLenum target) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - return ((GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X); - default: - return 0; - } -} - /* There are actually quite a few combinations this will work for, * more than what I've listed here. */ @@ -426,7 +407,7 @@ intelTexImage(struct gl_context * ctx, DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = level; if (_mesa_is_format_compressed(texImage->TexFormat)) { @@ -835,7 +816,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, rb->region->width, rb->region->height, 1, 0, internalFormat, texFormat); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = level; texImage->RowStride = rb->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); @@ -893,7 +874,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, image->region->width, image->region->height, 1, 0, image->internal_format, image->format); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = 0; texImage->RowStride = image->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); diff --git a/src/mesa/drivers/dri/nouveau/nv10_render.c b/src/mesa/drivers/dri/nouveau/nv10_render.c index 20fb4478426..6134650346d 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_render.c +++ b/src/mesa/drivers/dri/nouveau/nv10_render.c @@ -99,7 +99,7 @@ get_hw_format(int type) case GL_UNSIGNED_SHORT: return NV10_3D_VTXBUF_FMT_TYPE_V16_SNORM; case GL_UNSIGNED_BYTE: - return NV10_3D_VTXBUF_FMT_TYPE_B8G8R8A8_UNORM; + return NV10_3D_VTXBUF_FMT_TYPE_U8_UNORM; default: assert(0); } diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 48657b44be1..1f5fc33d775 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -1097,12 +1097,16 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) parselist++; break; case GLX_FBCONFIG_ID: + case GLX_VISUAL_ID: if (!fbConfig) return NULL; parselist++; desiredVisualID = *parselist++; break; case GLX_X_RENDERABLE: + case GLX_MAX_PBUFFER_WIDTH: + case GLX_MAX_PBUFFER_HEIGHT: + case GLX_MAX_PBUFFER_PIXELS: if (!fbConfig) return NULL; parselist += 2; diff --git a/src/mesa/main/depthstencil.c b/src/mesa/main/depthstencil.c index 4d0600050ff..40d6c9612a2 100644 --- a/src/mesa/main/depthstencil.c +++ b/src/mesa/main/depthstencil.c @@ -63,7 +63,8 @@ static void delete_wrapper(struct gl_renderbuffer *rb) { ASSERT(rb->Format == MESA_FORMAT_S8 || - rb->Format == MESA_FORMAT_X8_Z24); + rb->Format == MESA_FORMAT_X8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT); _mesa_reference_renderbuffer(&rb->Wrapped, NULL); free(rb); } diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index f58b1975672..f9298d2d1e9 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -1485,7 +1485,9 @@ _mesa_test_formats(void) info->DataType == GL_SIGNED_NORMALIZED || info->DataType == GL_UNSIGNED_INT || info->DataType == GL_INT || - info->DataType == GL_FLOAT); + info->DataType == GL_FLOAT || + /* Z32_FLOAT_X24S8 has DataType of GL_NONE */ + info->DataType == GL_NONE); if (info->BaseFormat == GL_RGB) { assert(info->RedBits > 0); diff --git a/src/mesa/main/texfetch_tmp.h b/src/mesa/main/texfetch_tmp.h index 3b1eedf39bf..d170adf2e00 100644 --- a/src/mesa/main/texfetch_tmp.h +++ b/src/mesa/main/texfetch_tmp.h @@ -2287,7 +2287,8 @@ static void FETCH(f_z24_s8)( const struct gl_texture_image *texImage, const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1); const GLfloat scale = 1.0F / (GLfloat) 0xffffff; texel[0] = ((*src) >> 8) * scale; - ASSERT(texImage->TexFormat == MESA_FORMAT_Z24_S8); + ASSERT(texImage->TexFormat == MESA_FORMAT_Z24_S8 || + texImage->TexFormat == MESA_FORMAT_Z24_X8); ASSERT(texel[0] >= 0.0F); ASSERT(texel[0] <= 1.0F); } @@ -2314,7 +2315,8 @@ static void FETCH(f_s8_z24)( const struct gl_texture_image *texImage, const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1); const GLfloat scale = 1.0F / (GLfloat) 0xffffff; texel[0] = ((*src) & 0x00ffffff) * scale; - ASSERT(texImage->TexFormat == MESA_FORMAT_S8_Z24); + ASSERT(texImage->TexFormat == MESA_FORMAT_S8_Z24 || + texImage->TexFormat == MESA_FORMAT_X8_Z24); ASSERT(texel[0] >= 0.0F); ASSERT(texel[0] <= 1.0F); } diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 738e97ca55c..d8e5a3a9772 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1415,9 +1415,9 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir) case ir_var_in: case ir_var_inout: /* The linker assigns locations for varyings and attributes, - * including deprecated builtins (like gl_Color), user-assign - * generic attributes (glBindVertexLocation), and - * user-defined varyings. + * including deprecated builtins (like gl_Color), + * user-assigned generic attributes (glBindVertexLocation), + * and user-defined varyings. * * FINISHME: We would hit this path for function arguments. Fix! */ diff --git a/src/mesa/state_tracker/st_cb_condrender.c b/src/mesa/state_tracker/st_cb_condrender.c index 64c6c117fca..1ced560e160 100644 --- a/src/mesa/state_tracker/st_cb_condrender.c +++ b/src/mesa/state_tracker/st_cb_condrender.c @@ -41,6 +41,7 @@ #include "st_context.h" #include "st_cb_queryobj.h" #include "st_cb_condrender.h" +#include "st_cb_bitmap.h" /** @@ -55,6 +56,8 @@ st_BeginConditionalRender(struct gl_context *ctx, struct gl_query_object *q, struct pipe_context *pipe = st->pipe; uint m; + st_flush_bitmap_cache(st); + switch (mode) { case GL_QUERY_WAIT: m = PIPE_RENDER_COND_WAIT; @@ -90,6 +93,8 @@ st_EndConditionalRender(struct gl_context *ctx, struct gl_query_object *q) struct pipe_context *pipe = st->pipe; (void) q; + st_flush_bitmap_cache(st); + pipe->render_condition(pipe, NULL, 0); st->render_condition = NULL; } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index dca3324645c..1d908c0317a 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -273,26 +273,6 @@ make_passthrough_vertex_shader(struct st_context *st, /** - * Return a texture base format for drawing/copying an image - * of the given format. - */ -static GLenum -base_format(GLenum format) -{ - switch (format) { - case GL_DEPTH_COMPONENT: - return GL_DEPTH_COMPONENT; - case GL_DEPTH_STENCIL: - return GL_DEPTH_STENCIL; - case GL_STENCIL_INDEX: - return GL_STENCIL_INDEX; - default: - return GL_RGBA; - } -} - - -/** * Return a texture internalFormat for drawing/copying an image * of the given format and type. */ @@ -301,11 +281,36 @@ internal_format(struct gl_context *ctx, GLenum format, GLenum type) { switch (format) { case GL_DEPTH_COMPONENT: - return GL_DEPTH_COMPONENT; + switch (type) { + case GL_UNSIGNED_SHORT: + return GL_DEPTH_COMPONENT16; + + case GL_UNSIGNED_INT: + return GL_DEPTH_COMPONENT32; + + case GL_FLOAT: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_COMPONENT32F; + else + return GL_DEPTH_COMPONENT; + + default: + return GL_DEPTH_COMPONENT; + } + case GL_DEPTH_STENCIL: - return GL_DEPTH_STENCIL; + switch (type) { + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + return GL_DEPTH32F_STENCIL8; + + case GL_UNSIGNED_INT_24_8: + default: + return GL_DEPTH24_STENCIL8; + } + case GL_STENCIL_INDEX: return GL_STENCIL_INDEX; + default: if (_mesa_is_integer_format(format)) { switch (type) { @@ -999,7 +1004,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE; struct pipe_sampler_view *sv[2]; int num_sampler_view = 1; - enum pipe_format stencil_format = PIPE_FORMAT_NONE; struct st_fp_variant *fpv; if (format == GL_DEPTH_STENCIL) @@ -1009,33 +1013,12 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, else if (format == GL_DEPTH_COMPONENT) write_depth = GL_TRUE; - if (write_stencil) { - enum pipe_format tex_format; - /* can we write to stencil if not fallback */ - if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) - goto stencil_fallback; - - tex_format = st_choose_format(st->pipe->screen, base_format(format), - GL_NONE, GL_NONE, - PIPE_TEXTURE_2D, - 0, PIPE_BIND_SAMPLER_VIEW); - - switch (tex_format) { - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - stencil_format = PIPE_FORMAT_X24S8_USCALED; - break; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - stencil_format = PIPE_FORMAT_S8X24_USCALED; - break; - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: - stencil_format = PIPE_FORMAT_X32_S8X24_USCALED; - break; - case PIPE_FORMAT_S8_USCALED: - stencil_format = PIPE_FORMAT_S8_USCALED; - break; - default: - goto stencil_fallback; - } + if (write_stencil && + !pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) { + /* software fallback */ + draw_stencil_pixels(ctx, x, y, width, height, format, type, + unpack, pixels); + return; } /* Mesa state should be up to date by now */ @@ -1080,7 +1063,32 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, sv[0] = st_create_texture_sampler_view(st->pipe, pt); if (sv[0]) { - if (write_stencil) { + /* Create a second sampler view to read stencil. + * The stencil is written using the shader stencil export + * functionality. */ + if (write_stencil) { + enum pipe_format stencil_format = PIPE_FORMAT_NONE; + + switch (pt->format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_X24S8_USCALED: + stencil_format = PIPE_FORMAT_X24S8_USCALED; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_S8X24_USCALED: + stencil_format = PIPE_FORMAT_S8X24_USCALED; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + case PIPE_FORMAT_X32_S8X24_USCALED: + stencil_format = PIPE_FORMAT_X32_S8X24_USCALED; + break; + case PIPE_FORMAT_S8_USCALED: + stencil_format = PIPE_FORMAT_S8_USCALED; + break; + default: + assert(0); + } + sv[1] = st_create_texture_sampler_view_format(st->pipe, pt, stencil_format); num_sampler_view++; @@ -1101,11 +1109,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, pipe_resource_reference(&pt, NULL); } } - return; - -stencil_fallback: - draw_stencil_pixels(ctx, x, y, width, height, format, type, - unpack, pixels); } diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c index d0ac253bcec..057499615bf 100644 --- a/src/mesa/state_tracker/st_cb_queryobj.c +++ b/src/mesa/state_tracker/st_cb_queryobj.c @@ -41,6 +41,7 @@ #include "pipe/p_defines.h" #include "st_context.h" #include "st_cb_queryobj.h" +#include "st_cb_bitmap.h" #if FEATURE_queryobj @@ -83,6 +84,8 @@ st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q) struct st_query_object *stq = st_query_object(q); unsigned type; + st_flush_bitmap_cache(st_context(ctx)); + /* convert GL query type to Gallium query type */ switch (q->Target) { case GL_ANY_SAMPLES_PASSED: @@ -128,6 +131,8 @@ st_EndQuery(struct gl_context *ctx, struct gl_query_object *q) struct pipe_context *pipe = st_context(ctx)->pipe; struct st_query_object *stq = st_query_object(q); + st_flush_bitmap_cache(st_context(ctx)); + pipe->end_query(pipe, stq->pq); } diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 6eddbfc88e4..6d4bc544d0c 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -179,6 +179,9 @@ struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, st_init_driver_functions(&funcs); ctx = _mesa_create_context(api, visual, shareCtx, &funcs, NULL); + if (!ctx) { + return NULL; + } /* XXX: need a capability bit in gallium to query if the pipe * driver prefers DP4 or MUL/MAD for vertex transformation. |