diff options
Diffstat (limited to 'src/mesa/drivers')
41 files changed, 683 insertions, 494 deletions
diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index abfb32be3ae..d22118beb0b 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -33,6 +33,7 @@ #include "tnl/t_pipeline.h" #include "intel_span.h" #include "intel_tris.h" +#include "../glsl/ralloc.h" /*************************************** * Mesa's Driver Functions @@ -53,7 +54,7 @@ i830CreateContext(const struct gl_config * mesaVis, void *sharedContextPrivate) { struct dd_function_table functions; - struct i830_context *i830 = CALLOC_STRUCT(i830_context); + struct i830_context *i830 = rzalloc(NULL, struct i830_context); struct intel_context *intel = &i830->intel; struct gl_context *ctx = &intel->ctx; if (!i830) diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index d4af5e51026..71ce44fd5c9 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -76,7 +76,8 @@ translate_texture_format(GLuint mesa_format) case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); default: - fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); + fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + _mesa_get_format_name(mesa_format)); abort(); return 0; } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index f02f2d78267..11bee140ab6 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -36,6 +36,7 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" +#include "../glsl/ralloc.h" #include "i915_reg.h" #include "i915_program.h" @@ -97,8 +98,7 @@ i915CreateContext(int api, void *sharedContextPrivate) { struct dd_function_table functions; - struct i915_context *i915 = - (struct i915_context *) CALLOC_STRUCT(i915_context); + struct i915_context *i915 = rzalloc(NULL, struct i915_context); struct intel_context *intel = &i915->intel; struct gl_context *ctx = &intel->ctx; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index b67ebb9a1ec..e9e8078328a 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -210,6 +210,7 @@ get_result_vector(struct i915_fragment_program *p, case PROGRAM_OUTPUT: switch (inst->DstReg.Index) { case FRAG_RESULT_COLOR: + case FRAG_RESULT_DATA0: return UREG(REG_TYPE_OC, 0); case FRAG_RESULT_DEPTH: p->depth_written = 1; diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c b/src/mesa/drivers/dri/i915/i915_tex_layout.c index 6e4512129cd..e6a47116223 100644 --- a/src/mesa/drivers/dri/i915/i915_tex_layout.c +++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c @@ -219,9 +219,9 @@ i915_miptree_layout_2d(struct intel_context *intel, width, height, 1); if (mt->compressed) - img_height = MAX2(1, height / 4); + img_height = ALIGN(height, 4) / 4; else - img_height = (MAX2(2, height) + 1) & ~1; + img_height = ALIGN(height, 2); mt->total_height += img_height; diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index bcf42d59969..7cd6820cd51 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -82,6 +82,7 @@ translate_texture_format(gl_format mesa_format, GLenum DepthMode) case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); case MESA_FORMAT_S8_Z24: + case MESA_FORMAT_X8_Z24: if (DepthMode == GL_ALPHA) return (MAPSURF_32BIT | MT_32BIT_x8A24); else if (DepthMode == GL_INTENSITY) @@ -89,7 +90,8 @@ translate_texture_format(gl_format mesa_format, GLenum DepthMode) else return (MAPSURF_32BIT | MT_32BIT_x8L24); default: - fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); + fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + _mesa_get_format_name(mesa_format)); abort(); return 0; } diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 7bcb72f42d0..941c4350ddd 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -1078,6 +1078,13 @@ intelRunPipeline(struct gl_context * ctx) if (ctx->NewState) _mesa_update_state_locked(ctx); + /* We need to get this done before we start the pipeline, or a + * change in the INTEL_FALLBACK() of its intel_draw_buffers() call + * while the pipeline is running will result in mismatched swrast + * map/unmaps, and later assertion failures. + */ + intel_prepare_render(intel); + if (intel->NewGLState) { if (intel->NewGLState & _NEW_TEXTURE) { intel->vtbl.update_texture_state(intel); @@ -1092,7 +1099,9 @@ intelRunPipeline(struct gl_context * ctx) } intel_map_vertex_shader_textures(ctx); + intel->tnl_pipeline_running = true; _tnl_run_pipeline(ctx); + intel->tnl_pipeline_running = false; intel_unmap_vertex_shader_textures(ctx); _mesa_unlock_context_textures(ctx); @@ -1228,6 +1237,8 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) if (mode) { intel->Fallback |= bit; if (oldfallback == 0) { + assert(!intel->tnl_pipeline_running); + intel_flush(ctx); if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "ENTER FALLBACK %x: %s\n", @@ -1239,6 +1250,8 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) else { intel->Fallback &= ~bit; if (oldfallback == bit) { + assert(!intel->tnl_pipeline_running); + _swrast_flush(ctx); if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "LEAVE FALLBACK %s\n", getFallbackString(bit)); diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 94b8c20b019..9c26150d241 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -43,7 +43,8 @@ prepare_cc_vp(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_cc_viewport *ccv; - ccv = brw_state_batch(brw, sizeof(*ccv), 32, &brw->cc.vp_offset); + ccv = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, + sizeof(*ccv), 32, &brw->cc.vp_offset); /* _NEW_TRANSOFORM */ if (ctx->Transform.DepthClamp) { @@ -98,7 +99,8 @@ static void upload_cc_unit(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_cc_unit_state *cc; - cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + cc = brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(*cc), 64, &brw->cc.state_offset); memset(cc, 0, sizeof(*cc)); /* _NEW_STENCIL */ diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index b9efbb74c87..31fbadf5ef2 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -40,7 +40,8 @@ brw_prepare_clip_unit(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_clip_unit_state *clip; - clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); + clip = brw_state_batch(brw, AUB_TRACE_CLIP_STATE, + sizeof(*clip), 32, &brw->clip.state_offset); memset(clip, 0, sizeof(*clip)); /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 636821839a1..ac683bd9960 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -40,6 +40,7 @@ #include "brw_state.h" #include "intel_span.h" #include "tnl/t_pipeline.h" +#include "../glsl/ralloc.h" /*************************************** * Mesa's Driver Functions @@ -59,7 +60,7 @@ GLboolean brwCreateContext( int api, void *sharedContextPrivate) { struct dd_function_table functions; - struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); + struct brw_context *brw = rzalloc(NULL, struct brw_context); struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; unsigned i; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a8e2b802803..471015cf9d0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -188,6 +188,31 @@ struct brw_state_flags { GLuint cache; }; +enum state_struct_type { + AUB_TRACE_VS_STATE = 1, + AUB_TRACE_GS_STATE = 2, + AUB_TRACE_CLIP_STATE = 3, + AUB_TRACE_SF_STATE = 4, + AUB_TRACE_WM_STATE = 5, + AUB_TRACE_CC_STATE = 6, + AUB_TRACE_CLIP_VP_STATE = 7, + AUB_TRACE_SF_VP_STATE = 8, + AUB_TRACE_CC_VP_STATE = 0x9, + AUB_TRACE_SAMPLER_STATE = 0xa, + AUB_TRACE_KERNEL_INSTRUCTIONS = 0xb, + AUB_TRACE_SCRATCH_SPACE = 0xc, + AUB_TRACE_SAMPLER_DEFAULT_COLOR = 0xd, + + AUB_TRACE_SCISSOR_STATE = 0x15, + AUB_TRACE_BLEND_STATE = 0x16, + AUB_TRACE_DEPTH_STENCIL_STATE = 0x17, + + /* Not written to .aub files the same way the structures above are. */ + AUB_TRACE_NO_TYPE = 0x100, + AUB_TRACE_BINDING_TABLE = 0x101, + AUB_TRACE_SURFACE_STATE = 0x102, + AUB_TRACE_VS_CONSTANTS = 0x103, +}; /** Subclass of Mesa vertex program */ struct brw_vertex_program { @@ -744,6 +769,14 @@ struct brw_context int num_prepare_atoms, num_emit_atoms; struct brw_tracked_state prepare_atoms[64], emit_atoms[64]; + + /* If (INTEL_DEBUG & DEBUG_BATCH) */ + struct { + uint32_t offset; + uint32_t size; + enum state_struct_type type; + } *state_batch_list; + int state_batch_count; }; diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index bbfefcd816a..e0309e71fc3 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -41,7 +41,8 @@ brw_prepare_gs_unit(struct brw_context *brw) struct intel_context *intel = &brw->intel; struct brw_gs_unit_state *gs; - gs = brw_state_batch(brw, sizeof(*gs), 32, &brw->gs.state_offset); + gs = brw_state_batch(brw, AUB_TRACE_GS_STATE, + sizeof(*gs), 32, &brw->gs.state_offset); memset(gs, 0, sizeof(*gs)); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index eb3d103099b..9201be7caab 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -46,7 +46,8 @@ static void upload_sf_vp(struct brw_context *brw) const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); if (render_to_fbo) { @@ -129,7 +130,8 @@ static void upload_sf_unit( struct brw_context *brw ) int chipset_max_threads; bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; - sf = brw_state_batch(brw, sizeof(*sf), 64, &brw->sf.state_offset); + sf = brw_state_batch(brw, AUB_TRACE_SF_STATE, + sizeof(*sf), 64, &brw->sf.state_offset); memset(sf, 0, sizeof(*sf)); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b384651d8d0..cede4e5c916 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -172,6 +172,7 @@ void brw_destroy_caches( struct brw_context *brw ); sizeof(*(s)), false) void *brw_state_batch(struct brw_context *brw, + enum state_struct_type type, int size, int alignment, uint32_t *out_offset); diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 213c7a38d8c..5a983c3d847 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -32,6 +32,29 @@ #include "brw_state.h" #include "intel_batchbuffer.h" #include "main/imports.h" +#include "../glsl/ralloc.h" + +static void +brw_track_state_batch(struct brw_context *brw, + enum state_struct_type type, + uint32_t offset, + int size) +{ + struct intel_batchbuffer *batch = &brw->intel.batch; + + if (!brw->state_batch_list) { + /* Our structs are always aligned to at least 32 bytes, so + * our array doesn't need to be any larger + */ + brw->state_batch_list = ralloc_size(brw, sizeof(*brw->state_batch_list) * + batch->bo->size / 32); + } + + brw->state_batch_list[brw->state_batch_count].offset = offset; + brw->state_batch_list[brw->state_batch_count].size = size; + brw->state_batch_list[brw->state_batch_count].type = type; + brw->state_batch_count++; +} /** * Allocates a block of space in the batchbuffer for indirect state. @@ -49,6 +72,7 @@ */ void * brw_state_batch(struct brw_context *brw, + enum state_struct_type type, int size, int alignment, uint32_t *out_offset) @@ -71,6 +95,9 @@ brw_state_batch(struct brw_context *brw, batch->state_batch_offset = offset; + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) + brw_track_state_batch(brw, type, offset, size); + *out_offset = offset; return batch->map + (offset>>2); } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 7a3a88f04f5..b9e5cc1a534 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -31,44 +31,23 @@ #include "brw_context.h" #include "brw_defines.h" -/** - * Prints out a header, the contents, and the message associated with - * the hardware state data given. - * - * \param name Name of the state object - * \param data Pointer to the base of the state object - * \param hw_offset Hardware offset of the base of the state data. - * \param index Index of the DWORD being output. - */ static void -state_out(const char *name, void *data, uint32_t hw_offset, int index, - char *fmt, ...) -{ - va_list va; +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) PRINTFLIKE(5, 6); - fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", - name, hw_offset + index * 4, ((uint32_t *)data)[index]); - va_start(va, fmt); - vfprintf(stderr, fmt, va); - va_end(va); -} - -/** Generic, undecoded state buffer debug printout */ static void -state_struct_out(const char *name, drm_intel_bo *buffer, - unsigned int offset, unsigned int size) +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) { - int i; - - if (buffer == NULL) - return; - - drm_intel_bo_map(buffer, GL_FALSE); - for (i = 0; i < size / 4; i++) { - state_out(name, buffer->virtual + offset, buffer->offset + offset, i, - "dword %d\n", i); - } - drm_intel_bo_unmap(buffer); + struct intel_context *intel = &brw->intel; + uint32_t *data = intel->batch.bo->virtual + offset; + va_list va; + + fprintf(stderr, "0x%08x: 0x%08x: %8s: ", + offset + index * 4, data[index], name); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); } static const char * @@ -98,394 +77,536 @@ get_965_surface_format(unsigned int surface_format) } } -static void dump_wm_surface_state(struct brw_context *brw) +static void dump_vs_state(struct brw_context *brw, uint32_t offset) { - dri_bo *bo; - GLubyte *base; - int i; + struct intel_context *intel = &brw->intel; + const char *name = "VS_STATE"; + struct brw_vs_unit_state *vs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + vs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} - bo = brw->intel.batch.bo; - drm_intel_bo_map(bo, GL_FALSE); - base = bo->virtual; +static void dump_gs_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "GS_STATE"; + struct brw_gs_unit_state *gs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + gs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} - for (i = 0; i < brw->wm.nr_surfaces; i++) { - unsigned int surfoff; - uint32_t *surf; - char name[20]; +static void dump_clip_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "CLIP_STATE"; + struct brw_clip_unit_state *clip = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + clip->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "clip5\n"); + batch_out(brw, name, offset, 6, "clip6\n"); + batch_out(brw, name, offset, 7, "vp xmin %f\n", clip->viewport_xmin); + batch_out(brw, name, offset, 8, "vp xmax %f\n", clip->viewport_xmax); + batch_out(brw, name, offset, 9, "vp ymin %f\n", clip->viewport_ymin); + batch_out(brw, name, offset, 10, "vp ymax %f\n", clip->viewport_ymax); +} - if (brw->wm.surf_offset[i] == 0) { - fprintf(stderr, "WM SURF%d: NULL\n", i); - continue; - } - surfoff = bo->offset + brw->wm.surf_offset[i]; - surf = (uint32_t *)(base + brw->wm.surf_offset[i]); - - sprintf(name, "WM SURF%d", i); - state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), - get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); - state_out(name, surf, surfoff, 1, "offset\n"); - state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, - GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1); - state_out(name, surf, surfoff, 3, "pitch %d, %s tiled\n", - GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, - (surf[3] & BRW_SURFACE_TILED) ? - ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); - state_out(name, surf, surfoff, 4, "mip base %d\n", - GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); - state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), - GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); - } - drm_intel_bo_unmap(bo); +static void dump_sf_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "SF_STATE"; + struct brw_sf_unit_state *sf = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + sf->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "sf5: viewport offset\n"); + batch_out(brw, name, offset, 6, "sf6\n"); + batch_out(brw, name, offset, 7, "sf7\n"); } -static void dump_gen7_surface_state(struct brw_context *brw) +static void dump_wm_state(struct brw_context *brw, uint32_t offset) { - dri_bo *bo; - GLubyte *base; - int i; + struct intel_context *intel = &brw->intel; + const char *name = "WM_STATE"; + struct brw_wm_unit_state *wm = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "wm4\n"); + batch_out(brw, name, offset, 5, "wm5: %s%s%s%s%s%s, %d threads\n", + wm->wm5.enable_8_pix ? "8pix" : "", + wm->wm5.enable_16_pix ? "16pix" : "", + wm->wm5.program_uses_depth ? ", uses depth" : "", + wm->wm5.program_computes_depth ? ", computes depth" : "", + wm->wm5.program_uses_killpixel ? ", kills" : "", + wm->wm5.thread_dispatch_enable ? "" : ", no dispatch", + wm->wm5.max_threads + 1); + batch_out(brw, name, offset, 6, "depth offset constant %f\n", + wm->global_depth_offset_constant); + batch_out(brw, name, offset, 7, "depth offset scale %f\n", + wm->global_depth_offset_scale); + batch_out(brw, name, offset, 8, "wm8: kernel 1 (gen5+)\n"); + batch_out(brw, name, offset, 9, "wm9: kernel 2 (gen5+)\n"); + batch_out(brw, name, offset, 10, "wm10: kernel 3 (gen5+)\n"); +} - bo = brw->intel.batch.bo; - drm_intel_bo_map(bo, GL_FALSE); - base = bo->virtual; +static void dump_surface_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SURF"; + uint32_t *surf = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), + get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, + GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1, + GET_FIELD(surf[2], BRW_SURFACE_LOD)); + batch_out(brw, name, offset, 3, "pitch %d, %s tiled\n", + GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, + (surf[3] & BRW_SURFACE_TILED) ? + ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); + batch_out(brw, name, offset, 4, "mip base %d\n", + GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), + GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); +} - for (i = 0; i < brw->wm.nr_surfaces; i++) { - unsigned int surfoff; - struct gen7_surface_state *surf; - char name[20]; +static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SURF"; + struct gen7_surface_state *surf = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + surf->ss2.width + 1, surf->ss2.height + 1, surf->ss5.mip_count); + batch_out(brw, name, offset, 3, "pitch %d, %stiled\n", + surf->ss3.pitch + 1, surf->ss0.tiled_surface ? "" : "not "); + batch_out(brw, name, offset, 4, "mip base %d\n", + surf->ss5.min_lod); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + surf->ss5.x_offset, surf->ss5.y_offset); +} - if (brw->wm.surf_offset[i] == 0) { - fprintf(stderr, "WM SURF%d: NULL\n", i); - continue; - } - surfoff = bo->offset + brw->wm.surf_offset[i]; - surf = (struct gen7_surface_state *) (base + brw->wm.surf_offset[i]); - - sprintf(name, "WM SURF%d", i); - state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(surf->ss0.surface_type), - get_965_surface_format(surf->ss0.surface_format)); - state_out(name, surf, surfoff, 1, "offset\n"); - state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - surf->ss2.width + 1, surf->ss2.height + 1, surf->ss5.mip_count); - state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n", - surf->ss3.pitch + 1, surf->ss0.tiled_surface ? "" : "not "); - state_out(name, surf, surfoff, 4, "mip base %d\n", - surf->ss5.min_lod); - state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - surf->ss5.x_offset, surf->ss5.y_offset); +static void +dump_sdc(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SDC"; + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 5 && intel->gen <= 6) { + struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "unorm rgba\n"); + batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]); + batch_out(brw, name, offset, 3, "g %f\n", sdc->f[2]); + batch_out(brw, name, offset, 4, "a %f\n", sdc->f[3]); + batch_out(brw, name, offset, 5, "half float rg\n"); + batch_out(brw, name, offset, 6, "half float ba\n"); + batch_out(brw, name, offset, 7, "u16 rg\n"); + batch_out(brw, name, offset, 8, "u16 ba\n"); + batch_out(brw, name, offset, 9, "s16 rg\n"); + batch_out(brw, name, offset, 10, "s16 ba\n"); + batch_out(brw, name, offset, 11, "s8 rgba\n"); + } else { + struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "r %f\n", sdc->color[0]); + batch_out(brw, name, offset, 1, "g %f\n", sdc->color[1]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->color[2]); + batch_out(brw, name, offset, 3, "a %f\n", sdc->color[3]); } - drm_intel_bo_unmap(bo); } -static void dump_wm_sampler_state(struct brw_context *brw) +static void dump_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &brw->intel.ctx; int i; + struct brw_sampler_state *samp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - unsigned int offset; - uint32_t sdc_offset; - struct brw_sampler_state *samp; + for (i = 0; i < size / sizeof(*samp); i++) { char name[20]; - if (!ctx->Texture.Unit[i]._ReallyEnabled) { - fprintf(stderr, "WM SAMP%d: disabled\n", i); - continue; - } - - offset = (intel->batch.bo->offset + - brw->wm.sampler_offset + - i * sizeof(struct brw_sampler_state)); - samp = (struct brw_sampler_state *)(intel->batch.bo->virtual + - brw->wm.sampler_offset + - i * sizeof(struct brw_sampler_state)); - sprintf(name, "WM SAMP%d", i); - state_out(name, samp, offset, 0, "filtering\n"); - state_out(name, samp, offset, 1, "wrapping, lod\n"); - state_out(name, samp, offset, 2, "default color pointer\n"); - state_out(name, samp, offset, 3, "chroma key, aniso\n"); - - sprintf(name, " WM SDC%d", i); - - sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; - if (intel->gen >= 5) { - struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + - brw->wm.sdc_offset[i]); - state_out(name, sdc, sdc_offset, 0, "unorm rgba\n"); - state_out(name, sdc, sdc_offset, 1, "r %f\n", sdc->f[0]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->f[1]); - state_out(name, sdc, sdc_offset, 3, "g %f\n", sdc->f[2]); - state_out(name, sdc, sdc_offset, 4, "a %f\n", sdc->f[3]); - state_out(name, sdc, sdc_offset, 5, "half float rg\n"); - state_out(name, sdc, sdc_offset, 6, "half float ba\n"); - state_out(name, sdc, sdc_offset, 7, "u16 rg\n"); - state_out(name, sdc, sdc_offset, 8, "u16 ba\n"); - state_out(name, sdc, sdc_offset, 9, "s16 rg\n"); - state_out(name, sdc, sdc_offset, 10, "s16 ba\n"); - state_out(name, sdc, sdc_offset, 11, "s8 rgba\n"); - } else { - struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + - brw->wm.sdc_offset[i]); - state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); - state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); - state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); - } + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); + + samp++; + offset += sizeof(*samp); } - drm_intel_bo_unmap(intel->batch.bo); } -static void dump_gen7_sampler_state(struct brw_context *brw) +static void dump_gen7_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &brw->intel.ctx; + struct gen7_sampler_state *samp = intel->batch.bo->virtual + offset; int i; assert(intel->gen >= 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - unsigned int offset; - uint32_t sdc_offset; - struct gen7_sampler_state *samp; + for (i = 0; i < size / sizeof(*samp); i++) { char name[20]; - if (!ctx->Texture.Unit[i]._ReallyEnabled) { - fprintf(stderr, "WM SAMP%d: disabled\n", i); - continue; - } - - offset = (intel->batch.bo->offset + - brw->wm.sampler_offset + - i * sizeof(struct gen7_sampler_state)); - samp = (struct gen7_sampler_state *) - (intel->batch.bo->virtual + brw->wm.sampler_offset + - i * sizeof(struct gen7_sampler_state)); - sprintf(name, "WM SAMP%d", i); - state_out(name, samp, offset, 0, "filtering\n"); - state_out(name, samp, offset, 1, "wrapping, lod\n"); - state_out(name, samp, offset, 2, "default color pointer\n"); - state_out(name, samp, offset, 3, "chroma key, aniso\n"); - - sprintf(name, " WM SDC%d", i); - - sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; - struct brw_sampler_default_color *sdc = - intel->batch.bo->virtual + brw->wm.sdc_offset[i]; - state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); - state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); - state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); + + samp++; + offset += sizeof(*samp); } drm_intel_bo_unmap(intel->batch.bo); } -static void dump_sf_viewport_state(struct brw_context *brw) +static void dump_sf_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "SF VP"; - struct brw_sf_viewport *vp; - uint32_t vp_off; + struct brw_sf_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->sf.vp_offset; - vp_off = intel->batch.bo->offset + brw->sf.vp_offset; + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); - state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); - state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); - state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); - state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); - state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); - state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); - - state_out(name, vp, vp_off, 6, "top left = %d,%d\n", + batch_out(brw, name, offset, 6, "top left = %d,%d\n", vp->scissor.xmin, vp->scissor.ymin); - state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", + batch_out(brw, name, offset, 7, "bottom right = %d,%d\n", vp->scissor.xmax, vp->scissor.ymax); - - drm_intel_bo_unmap(intel->batch.bo); } -static void dump_clip_viewport_state(struct brw_context *brw) +static void dump_clip_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "CLIP VP"; - struct brw_clipper_viewport *vp; - uint32_t vp_off; + struct brw_clipper_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->clip.vp_offset; - vp_off = intel->batch.bo->offset + brw->clip.vp_offset; - - state_out(name, vp, vp_off, 0, "xmin = %f\n", vp->xmin); - state_out(name, vp, vp_off, 1, "xmax = %f\n", vp->xmax); - state_out(name, vp, vp_off, 2, "ymin = %f\n", vp->ymin); - state_out(name, vp, vp_off, 3, "ymax = %f\n", vp->ymax); - drm_intel_bo_unmap(intel->batch.bo); + batch_out(brw, name, offset, 0, "xmin = %f\n", vp->xmin); + batch_out(brw, name, offset, 1, "xmax = %f\n", vp->xmax); + batch_out(brw, name, offset, 2, "ymin = %f\n", vp->ymin); + batch_out(brw, name, offset, 3, "ymax = %f\n", vp->ymax); } -static void dump_sf_clip_viewport_state(struct brw_context *brw) +static void dump_sf_clip_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "SF_CLIP VP"; - struct gen7_sf_clip_viewport *vp; - uint32_t vp_off; + struct gen7_sf_clip_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen >= 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->sf.vp_offset; - vp_off = intel->batch.bo->offset + brw->sf.vp_offset; - - state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); - state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); - state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); - state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); - state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); - state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); - state_out(name, vp, vp_off, 6, "guardband xmin = %f\n", vp->guardband.xmin); - state_out(name, vp, vp_off, 7, "guardband xmax = %f\n", vp->guardband.xmax); - state_out(name, vp, vp_off, 8, "guardband ymin = %f\n", vp->guardband.ymin); - state_out(name, vp, vp_off, 9, "guardband ymax = %f\n", vp->guardband.ymax); - drm_intel_bo_unmap(intel->batch.bo); + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); + batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin); + batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax); + batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin); + batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax); } -static void dump_cc_viewport_state(struct brw_context *brw) +static void dump_cc_viewport_state(struct brw_context *brw, uint32_t offset) { - struct intel_context *intel = &brw->intel; const char *name = "CC VP"; - struct brw_cc_viewport *vp; - uint32_t vp_off; + struct brw_cc_viewport *vp = brw->intel.batch.bo->virtual + offset; - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->cc.vp_offset; - vp_off = intel->batch.bo->offset + brw->cc.vp_offset; + batch_out(brw, name, offset, 0, "min_depth = %f\n", vp->min_depth); + batch_out(brw, name, offset, 1, "max_depth = %f\n", vp->max_depth); +} - state_out(name, vp, vp_off, 0, "min_depth = %f\n", vp->min_depth); - state_out(name, vp, vp_off, 1, "max_depth = %f\n", vp->max_depth); - drm_intel_bo_unmap(intel->batch.bo); +static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "D_S"; + struct gen6_depth_stencil_state *ds = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, + "stencil %sable, func %d, write %sable\n", + ds->ds0.stencil_enable ? "en" : "dis", + ds->ds0.stencil_func, + ds->ds0.stencil_write_enable ? "en" : "dis"); + batch_out(brw, name, offset, 1, + "stencil test mask 0x%x, write mask 0x%x\n", + ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); + batch_out(brw, name, offset, 2, + "depth test %sable, func %d, write %sable\n", + ds->ds2.depth_test_enable ? "en" : "dis", + ds->ds2.depth_test_func, + ds->ds2.depth_write_enable ? "en" : "dis"); } -static void dump_depth_stencil_state(struct brw_context *brw) +static void dump_cc_state_gen4(struct brw_context *brw, uint32_t offset) { - struct intel_context *intel = &brw->intel; - const char *name = "DEPTH STENCIL"; - struct gen6_depth_stencil_state *ds; - uint32_t ds_off; - - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - ds = intel->batch.bo->virtual + brw->cc.depth_stencil_state_offset; - ds_off = intel->batch.bo->offset + brw->cc.depth_stencil_state_offset; - - state_out(name, ds, ds_off, 0, "stencil %sable, func %d, write %sable\n", - ds->ds0.stencil_enable ? "en" : "dis", - ds->ds0.stencil_func, - ds->ds0.stencil_write_enable ? "en" : "dis"); - state_out(name, ds, ds_off, 1, "stencil test mask 0x%x, write mask 0x%x\n", - ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); - state_out(name, ds, ds_off, 2, "depth test %sable, func %d, write %sable\n", - ds->ds2.depth_test_enable ? "en" : "dis", - ds->ds2.depth_test_func, - ds->ds2.depth_write_enable ? "en" : "dis"); - drm_intel_bo_unmap(intel->batch.bo); + const char *name = "CC"; + + batch_out(brw, name, offset, 0, "cc0\n"); + batch_out(brw, name, offset, 1, "cc1\n"); + batch_out(brw, name, offset, 2, "cc2\n"); + batch_out(brw, name, offset, 3, "cc3\n"); + batch_out(brw, name, offset, 4, "cc4: viewport offset\n"); + batch_out(brw, name, offset, 5, "cc5\n"); + batch_out(brw, name, offset, 6, "cc6\n"); + batch_out(brw, name, offset, 7, "cc7\n"); } -static void dump_cc_state(struct brw_context *brw) +static void dump_cc_state_gen6(struct brw_context *brw, uint32_t offset) { const char *name = "CC"; - struct gen6_color_calc_state *cc; - uint32_t cc_off; - dri_bo *bo = brw->intel.batch.bo; - - if (brw->cc.state_offset == 0) - return; - - drm_intel_bo_map(bo, GL_FALSE); - cc = bo->virtual + brw->cc.state_offset; - cc_off = bo->offset + brw->cc.state_offset; - - state_out(name, cc, cc_off, 0, "alpha test format %s, round disable %d, stencil ref %d," - "bf stencil ref %d\n", - cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", - cc->cc0.round_disable, - cc->cc0.stencil_ref, - cc->cc0.bf_stencil_ref); - state_out(name, cc, cc_off, 1, "\n"); - state_out(name, cc, cc_off, 2, "constant red %f\n", cc->constant_r); - state_out(name, cc, cc_off, 3, "constant green %f\n", cc->constant_g); - state_out(name, cc, cc_off, 4, "constant blue %f\n", cc->constant_b); - state_out(name, cc, cc_off, 5, "constant alpha %f\n", cc->constant_a); - - drm_intel_bo_unmap(bo); + struct gen6_color_calc_state *cc = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, + "alpha test format %s, round disable %d, stencil ref %d, " + "bf stencil ref %d\n", + cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", + cc->cc0.round_disable, + cc->cc0.stencil_ref, + cc->cc0.bf_stencil_ref); + batch_out(brw, name, offset, 1, "\n"); + batch_out(brw, name, offset, 2, "constant red %f\n", cc->constant_r); + batch_out(brw, name, offset, 3, "constant green %f\n", cc->constant_g); + batch_out(brw, name, offset, 4, "constant blue %f\n", cc->constant_b); + batch_out(brw, name, offset, 5, "constant alpha %f\n", cc->constant_a); +} +static void dump_blend_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "BLEND"; + + batch_out(brw, name, offset, 0, "\n"); + batch_out(brw, name, offset, 1, "\n"); } -static void dump_blend_state(struct brw_context *brw) +static void +dump_scissor(struct brw_context *brw, uint32_t offset) { + const char *name = "SCISSOR"; struct intel_context *intel = &brw->intel; - const char *name = "BLEND"; - struct gen6_blend_state *blend; - uint32_t blend_off; + struct gen6_scissor_rect *scissor = intel->batch.bo->virtual + offset; - drm_intel_bo_map(intel->batch.bo, GL_FALSE); + batch_out(brw, name, offset, 0, "xmin %d, ymin %d\n", + scissor->xmin, scissor->ymin); + batch_out(brw, name, offset, 1, "xmax %d, ymax %d\n", + scissor->xmax, scissor->ymax); +} - blend = intel->batch.bo->virtual + brw->cc.blend_state_offset; - blend_off = intel->batch.bo->offset + brw->cc.blend_state_offset; +static void +dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "VS_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; - state_out(name, blend, blend_off, 0, "\n"); - state_out(name, blend, blend_off, 1, "\n"); + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} - drm_intel_bo_unmap(intel->batch.bo); +static void dump_binding_table(struct brw_context *brw, uint32_t offset, + uint32_t size) +{ + char name[20]; + int i; + uint32_t *data = brw->intel.batch.bo->virtual + offset; + + for (i = 0; i < size / 4; i++) { + if (data[i] == 0) + continue; + sprintf(name, "BIND%d", i); + batch_out(brw, name, offset, i, "surface state address\n"); + } } -static void brw_debug_prog(struct brw_context *brw, - const char *name, uint32_t prog_offset) +static void +dump_prog_cache(struct brw_context *brw) { - unsigned int i; + struct intel_context *intel = &brw->intel; + struct brw_cache *cache = &brw->cache; + unsigned int b, i; uint32_t *data; drm_intel_bo_map(brw->cache.bo, false); - data = brw->cache.bo->virtual + prog_offset; - - for (i = 0; i < brw->cache.bo->size / 4 / 4; i++) { - fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", - name, (unsigned int)brw->cache.bo->offset + i * 4 * 4, - data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); - /* Stop at the end of the program. It'd be nice to keep track of the actual - * intended program size instead of guessing like this. - */ - if (data[i * 4 + 0] == 0 && - data[i * 4 + 1] == 0 && - data[i * 4 + 2] == 0 && - data[i * 4 + 3] == 0) - break; + for (b = 0; b < cache->size; b++) { + struct brw_cache_item *item; + + for (item = cache->items[b]; item; item = item->next) { + const char *name; + uint32_t offset = item->offset; + + data = brw->cache.bo->virtual + item->offset; + + switch (item->cache_id) { + case BRW_VS_PROG: + name = "VS kernel"; + break; + case BRW_GS_PROG: + name = "GS kernel"; + break; + case BRW_CLIP_PROG: + name = "CLIP kernel"; + break; + case BRW_SF_PROG: + name = "SF kernel"; + break; + case BRW_WM_PROG: + name = "WM kernel"; + break; + default: + name = "unknown"; + break; + } + + for (i = 0; i < item->size / 4 / 4; i++) { + fprintf(stderr, "0x%08x: %8s: 0x%08x 0x%08x 0x%08x 0x%08x ", + offset + i * 4 * 4, + name, + data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + + brw_disasm(stderr, (void *)(data + i * 4), intel->gen); + } + } } drm_intel_bo_unmap(brw->cache.bo); } +static void +dump_state_batch(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + int i; + + for (i = 0; i < brw->state_batch_count; i++) { + uint32_t offset = brw->state_batch_list[i].offset; + uint32_t size = brw->state_batch_list[i].size; + + switch (brw->state_batch_list[i].type) { + case AUB_TRACE_VS_STATE: + dump_vs_state(brw, offset); + break; + case AUB_TRACE_GS_STATE: + dump_gs_state(brw, offset); + break; + case AUB_TRACE_CLIP_STATE: + dump_clip_state(brw, offset); + break; + case AUB_TRACE_SF_STATE: + dump_sf_state(brw, offset); + break; + case AUB_TRACE_WM_STATE: + dump_wm_state(brw, offset); + break; + case AUB_TRACE_CLIP_VP_STATE: + dump_clip_viewport_state(brw, offset); + break; + case AUB_TRACE_SF_VP_STATE: + if (intel->gen >= 7) { + dump_sf_clip_viewport_state(brw, offset); + } else { + dump_sf_viewport_state(brw, offset); + } + break; + case AUB_TRACE_CC_VP_STATE: + dump_cc_viewport_state(brw, offset); + break; + case AUB_TRACE_DEPTH_STENCIL_STATE: + dump_depth_stencil_state(brw, offset); + break; + case AUB_TRACE_CC_STATE: + if (intel->gen >= 6) + dump_cc_state_gen6(brw, offset); + else + dump_cc_state_gen4(brw, offset); + break; + case AUB_TRACE_BLEND_STATE: + dump_blend_state(brw, offset); + break; + case AUB_TRACE_BINDING_TABLE: + dump_binding_table(brw, offset, size); + break; + case AUB_TRACE_SURFACE_STATE: + if (intel->gen < 7) { + dump_surface_state(brw, offset); + } else { + dump_gen7_surface_state(brw, offset); + } + break; + case AUB_TRACE_SAMPLER_STATE: + if (intel->gen < 7) { + dump_sampler_state(brw, offset, size); + } else { + dump_gen7_sampler_state(brw, offset, size); + } + break; + case AUB_TRACE_SAMPLER_DEFAULT_COLOR: + dump_sdc(brw, offset); + break; + case AUB_TRACE_SCISSOR_STATE: + dump_scissor(brw, offset); + break; + case AUB_TRACE_VS_CONSTANTS: + dump_vs_constants(brw, offset, size); + break; + default: + break; + } + } +} /** * Print additional debug information associated with the batchbuffer @@ -501,51 +622,10 @@ void brw_debug_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); - state_struct_out("WM bind", - brw->intel.batch.bo, - brw->wm.bind_bo_offset, - 4 * brw->wm.nr_surfaces); - if (intel->gen < 7) { - dump_wm_surface_state(brw); - dump_wm_sampler_state(brw); - } else { - dump_gen7_surface_state(brw); - dump_gen7_sampler_state(brw); - } - - if (intel->gen < 6) - state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, - sizeof(struct brw_vs_unit_state)); - brw_debug_prog(brw, "VS prog", brw->vs.prog_offset); - - if (intel->gen < 6) - state_struct_out("GS", intel->batch.bo, brw->gs.state_offset, - sizeof(struct brw_gs_unit_state)); - if (brw->gs.prog_active) { - brw_debug_prog(brw, "GS prog", brw->gs.prog_offset); - } + drm_intel_bo_map(intel->batch.bo, false); + dump_state_batch(brw); + drm_intel_bo_unmap(intel->batch.bo); - if (intel->gen < 6) { - state_struct_out("SF", intel->batch.bo, brw->sf.state_offset, - sizeof(struct brw_sf_unit_state)); - brw_debug_prog(brw, "SF prog", brw->sf.prog_offset); - } - if (intel->gen >= 7) - dump_sf_clip_viewport_state(brw); - else - dump_sf_viewport_state(brw); - if (intel->gen == 6) - dump_clip_viewport_state(brw); - - if (intel->gen < 6) - state_struct_out("WM", intel->batch.bo, brw->wm.state_offset, - sizeof(struct brw_wm_unit_state)); - brw_debug_prog(brw, "WM prog", brw->wm.prog_offset); - - if (intel->gen >= 6) { - dump_cc_viewport_state(brw); - dump_depth_stencil_state(brw); - dump_cc_state(brw); - dump_blend_state(brw); - } + if (0) + dump_prog_cache(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index d5010a21e80..fc4373ab311 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -43,10 +43,12 @@ brw_prepare_vs_unit(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_vs_unit_state *vs; - vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset); + vs = brw_state_batch(brw, AUB_TRACE_VS_STATE, + sizeof(*vs), 32, &brw->vs.state_offset); memset(vs, 0, sizeof(*vs)); /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */ + vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs->thread0.kernel_start_pointer = brw_program_reloc(brw, brw->vs.state_offset + @@ -54,7 +56,6 @@ brw_prepare_vs_unit(struct brw_context *brw) brw->vs.prog_offset + (vs->thread0.grf_reg_count << 1)) >> 6; - vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, * which will have the channel mask for dwords 4-7 enabled in the thread, diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 611f6333689..f9ee4d112a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -182,7 +182,8 @@ static void upload_vs_surfaces(struct brw_context *brw) /* Might want to calculate nr_surfaces first, to avoid taking up so much * space for the binding table. (once we have vs samplers) */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF, + bind = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(uint32_t) * BRW_VS_MAX_SURF, 32, &brw->vs.bind_bo_offset); for (i = 0; i < BRW_VS_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 8612e743265..6aeeda6e0fa 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -123,6 +123,8 @@ static void brw_new_batch( struct intel_context *intel ) */ intel->batch.need_workaround_flush = true; + brw->state_batch_count = 0; + brw->vb.nr_current_buffers = 0; /* Mark that the current program cache BO has been used by the GPU. diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 59dcda7b414..7cd3edad235 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -563,13 +563,14 @@ static void precalc_dst( struct brw_wm_compile *c, struct prog_src_register src0 = inst->SrcReg[0]; struct prog_src_register src1 = inst->SrcReg[1]; struct prog_dst_register dst = inst->DstReg; - + struct prog_dst_register temp = get_temp(c); + if (dst.WriteMask & WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ emit_op(c, OPCODE_MUL, - dst_mask(dst, WRITEMASK_Y), + dst_mask(temp, WRITEMASK_Y), inst->SaturateMode, src0, src1, @@ -584,7 +585,7 @@ static void precalc_dst( struct brw_wm_compile *c, */ swz = emit_op(c, OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XZ), + dst_mask(temp, WRITEMASK_XZ), inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), src_undef(), @@ -597,12 +598,26 @@ static void precalc_dst( struct brw_wm_compile *c, */ emit_op(c, OPCODE_MOV, - dst_mask(dst, WRITEMASK_W), + dst_mask(temp, WRITEMASK_W), inst->SaturateMode, src1, src_undef(), src_undef()); } + + /* This will get optimized out in general, but it ensures that we + * don't overwrite src operands in our channel-wise splitting + * above. See piglit fp-dst-aliasing-[12]. + */ + emit_op(c, + OPCODE_MOV, + dst, + 0, + src_reg_from_dst(temp), + src_undef(), + src_undef()); + + release_temp(c, temp); } @@ -611,7 +626,17 @@ static void precalc_lit( struct brw_wm_compile *c, { struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register dst = inst->DstReg; - + + if (dst.WriteMask & WRITEMASK_YZ) { + emit_op(c, + OPCODE_LIT, + dst_mask(dst, WRITEMASK_YZ), + inst->SaturateMode, + src0, + src_undef(), + src_undef()); + } + if (dst.WriteMask & WRITEMASK_XW) { struct prog_instruction *swz; @@ -627,16 +652,6 @@ static void precalc_lit( struct brw_wm_compile *c, /* Avoid letting the negation flag of src0 affect our 1 constant. */ swz->SrcReg[0].Negate = NEGATE_NONE; } - - if (dst.WriteMask & WRITEMASK_YZ) { - emit_op(c, - OPCODE_LIT, - dst_mask(dst, WRITEMASK_YZ), - inst->SaturateMode, - src0, - src_undef(), - src_undef()); - } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 5de39aa4575..98146136703 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -108,7 +108,8 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, if (intel->gen == 5 || intel->gen == 6) { struct gen5_sampler_default_color *sdc; - sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); + sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); memset(sdc, 0, sizeof(*sdc)); @@ -144,7 +145,8 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, } else { struct brw_sampler_default_color *sdc; - sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); + sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); COPY_4V(sdc->color, color); } @@ -326,7 +328,8 @@ prepare_wm_samplers(struct brw_context *brw) if (brw->wm.sampler_count == 0) return; - samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->wm.sampler_offset); memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 506e2bdff5b..c820ce48c29 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -78,7 +78,8 @@ brw_prepare_wm_unit(struct brw_context *brw) const struct gl_fragment_program *fp = brw->fragment_program; struct brw_wm_unit_state *wm; - wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.state_offset); + wm = brw_state_batch(brw, AUB_TRACE_WM_STATE, + sizeof(*wm), 32, &brw->wm.state_offset); memset(wm, 0, sizeof(*wm)); if (brw->wm.prog_data->prog_offset_16) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 89fea9cc952..fb4fb146f8d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -226,7 +226,8 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) const GLuint surf_index = SURF_INDEX_TEXTURE(unit); uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[surf_index]); surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | @@ -272,7 +273,8 @@ brw_create_constant_surface(struct brw_context *brw, const GLint w = width - 1; uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, out_offset); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, out_offset); surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | @@ -404,7 +406,8 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) struct intel_context *intel = &brw->intel; uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[unit]); surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); @@ -439,7 +442,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, uint32_t tile_x, tile_y; uint32_t format = 0; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[unit]); switch (irb->Base.Format) { case MESA_FORMAT_XRGB8888: @@ -637,7 +641,8 @@ brw_wm_upload_binding_table(struct brw_context *brw) /* Might want to calculate nr_surfaces first, to avoid taking up so much * space for the binding table. */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_WM_MAX_SURF, + bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, + sizeof(uint32_t) * BRW_WM_MAX_SURF, 32, &brw->wm.bind_bo_offset); for (i = 0; i < BRW_WM_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 294d5a5e644..41d13ad2bf4 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -51,7 +51,8 @@ prepare_blend_state(struct brw_context *brw) nr_draw_buffers = 1; size = sizeof(*blend) * nr_draw_buffers; - blend = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset); + blend = brw_state_batch(brw, AUB_TRACE_BLEND_STATE, + size, 64, &brw->cc.blend_state_offset); memset(blend, 0, size); @@ -139,7 +140,8 @@ gen6_prepare_color_calc_state(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct gen6_color_calc_state *cc; - cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + cc = brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(*cc), 64, &brw->cc.state_offset); memset(cc, 0, sizeof(*cc)); /* _NEW_COLOR */ diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c index 775e1ce2c9c..5d14147db3d 100644 --- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -34,7 +34,8 @@ gen6_prepare_depth_stencil_state(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct gen6_depth_stencil_state *ds; - ds = brw_state_batch(brw, sizeof(*ds), 64, + ds = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*ds), 64, &brw->cc.depth_stencil_state_offset); memset(ds, 0, sizeof(*ds)); diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index 7492e508864..dc73b10f4cd 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -39,7 +39,8 @@ gen6_upload_scissor_state(struct brw_context *brw) struct gen6_scissor_rect *scissor; uint32_t scissor_state_offset; - scissor = brw_state_batch(brw, sizeof(*scissor), 32, &scissor_state_offset); + scissor = brw_state_batch(brw, AUB_TRACE_SCISSOR_STATE, + sizeof(*scissor), 32, &scissor_state_offset); /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index c6c55c926c7..a4bfa54837d 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -43,7 +43,8 @@ prepare_clip_vp(struct brw_context *brw) { struct brw_clipper_viewport *vp; - vp = brw_state_batch(brw, sizeof(*vp), 32, &brw->clip.vp_offset); + vp = brw_state_batch(brw, AUB_TRACE_CLIP_VP_STATE, + sizeof(*vp), 32, &brw->clip.vp_offset); vp->xmin = -1.0; vp->xmax = 1.0; @@ -72,7 +73,8 @@ prepare_sf_vp(struct brw_context *brw) const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); /* _NEW_BUFFERS */ diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 022e23e12b0..fb4cdbaadf9 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -60,7 +60,7 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) float *param; int i; - param = brw_state_batch(brw, + param = brw_state_batch(brw, AUB_TRACE_VS_CONSTANTS, (MAX_CLIP_PLANES + nr_params) * 4 * sizeof(float), 32, &brw->vs.push_const_offset); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 9ef6133e2b9..185da9c355f 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -54,7 +54,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, + constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index 95f6fbf7414..e787c21f4d1 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -183,7 +183,8 @@ gen7_prepare_samplers(struct brw_context *brw) if (brw->wm.sampler_count == 0) return; - samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->wm.sampler_offset); memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 99efe96a1fa..0f97cea652d 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -39,11 +39,12 @@ upload_sbe_state(struct brw_context *brw) uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); uint32_t dw1, dw10, dw11; int i; - int attr = 0; + int attr = 0, input_index = 0; /* _NEW_TRANSFORM */ int urb_start = ctx->Transform.ClipPlanesEnabled ? 2 : 1; /* _NEW_LIGHT */ int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + uint16_t attr_overrides[FRAG_ATTRIB_MAX]; /* FINISHME: Attribute Swizzle Control Mode? */ dw1 = @@ -57,12 +58,6 @@ upload_sbe_state(struct brw_context *brw) dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; dw10 = 0; - if (ctx->Point.PointSprite) { - for (i = 0; i < 8; i++) { - if (ctx->Point.CoordReplace[i]) - dw10 |= (1 << i); - } - } /* _NEW_LIGHT (flat shading) */ dw11 = 0; @@ -71,30 +66,43 @@ upload_sbe_state(struct brw_context *brw) ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1)); } + /* Create the mapping from the FS inputs we produce to the VS outputs + * they source from. + */ + for (; attr < FRAG_ATTRIB_MAX; attr++) { + if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr))) + continue; + + if (ctx->Point.PointSprite && + attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7 && + ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) { + dw10 |= (1 << input_index); + } + + if (attr == FRAG_ATTRIB_PNTC) + dw10 |= (1 << input_index); + + /* The hardware can only do the overrides on 16 overrides at a + * time, and the other up to 16 have to be lined up so that the + * input index = the output index. We'll need to do some + * tweaking to make sure that's the case. + */ + assert(input_index < 16 || attr == input_index); + + attr_overrides[input_index++] = get_attr_override(brw, attr, + two_side_color); + } + + for (; attr < FRAG_ATTRIB_MAX; attr++) + attr_overrides[input_index++] = 0; + BEGIN_BATCH(14); OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); OUT_BATCH(dw1); /* Output dwords 2 through 9 */ for (i = 0; i < 8; i++) { - uint32_t attr_overrides = 0; - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color); - attr++; - break; - } - } - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16; - attr++; - break; - } - } - OUT_BATCH(attr_overrides); + OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16); } OUT_BATCH(dw10); /* point sprite texcoord bitmask */ diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c index 838ad3a3948..e9aacd56317 100644 --- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c @@ -36,7 +36,8 @@ prepare_sf_clip_viewport(struct brw_context *brw) const GLfloat *v = ctx->Viewport._WindowMap.m; struct gen7_sf_clip_viewport *vp; - vp = brw_state_batch(brw, sizeof(vp), 64, &brw->sf.vp_offset); + vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(vp), 64, &brw->sf.vp_offset); /* Also assign to clip.vp_offset in case something uses it. */ brw->clip.vp_offset = brw->sf.vp_offset; diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 17f75354f1d..a102ca772b3 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -51,7 +51,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, + constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); @@ -138,11 +138,9 @@ upload_wm_state(struct brw_context *brw) const struct brw_tracked_state gen7_wm_state = { .dirty = { - .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_POLYGONSTIPPLE | + .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_COLOR | _NEW_BUFFERS), - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_NR_WM_SURFACES | + .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), .cache = 0, @@ -240,10 +238,7 @@ upload_ps_state(struct brw_context *brw) const struct brw_tracked_state gen7_ps_state = { .dirty = { - .mesa = (_NEW_LINE | - _NEW_POLYGON | - _NEW_POLYGONSTIPPLE | - _NEW_PROGRAM_CONSTANTS), + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NR_WM_SURFACES | diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 9994b67bfc5..4add1a69f02 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -65,8 +65,8 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit) const GLuint surf_index = SURF_INDEX_TEXTURE(unit); struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[surf_index]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = translate_tex_target(tObj->Target); @@ -135,7 +135,8 @@ gen7_create_constant_surface(struct brw_context *brw, const GLint w = width - 1; struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, out_offset); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, out_offset); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = BRW_SURFACE_BUFFER; @@ -210,8 +211,8 @@ gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit) { struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = BRW_SURFACE_NULL; @@ -235,8 +236,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, struct gen7_surface_state *surf; uint32_t tile_x, tile_y; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); switch (irb->Base.Format) { diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 292b7b034ee..2ba13632569 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -56,7 +56,7 @@ #include "drirenderbuffer.h" #include "utils.h" - +#include "../glsl/ralloc.h" #ifndef INTEL_DEBUG int INTEL_DEBUG = (0); @@ -924,7 +924,7 @@ intelDestroyContext(__DRIcontext * driContextPriv) _math_matrix_dtr(&intel->ViewportMatrix); - FREE(intel); + ralloc_free(intel); driContextPriv->driverPrivate = NULL; } } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 148fb0c2c9a..1727badb704 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -199,6 +199,7 @@ struct intel_context drm_intel_bo *first_post_swapbuffers_batch; GLboolean need_throttle; GLboolean no_batch_wrap; + bool tnl_pipeline_running; /**< Set while i915's _tnl_run_pipeline. */ struct { diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 6cf37c4c40c..4c4945c7941 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -271,7 +271,8 @@ intel_region_reference(struct intel_region **dst, struct intel_region *src) if (*dst) intel_region_release(dst); - src->refcount++; + if (src) + src->refcount++; *dst = src; } } diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index eda07a43dee..1a3643da593 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -55,15 +55,11 @@ get_teximage_readbuffer(struct intel_context *intel, GLenum internalFormat) DBG("%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(internalFormat)); - switch (internalFormat) { - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH24_STENCIL8_EXT: - case GL_DEPTH_STENCIL_EXT: + if (_mesa_is_depth_format(internalFormat) || + _mesa_is_depthstencil_format(internalFormat)) return intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); - default: - return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); - } + + return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 269faefa1c0..1f8b885bbec 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -120,25 +120,6 @@ intel_miptree_create_for_teximage(struct intel_context *intel, expect_accelerated_upload); } - - - -static GLuint -target_to_face(GLenum target) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - return ((GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X); - default: - return 0; - } -} - /* There are actually quite a few combinations this will work for, * more than what I've listed here. */ @@ -426,7 +407,7 @@ intelTexImage(struct gl_context * ctx, DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = level; if (_mesa_is_format_compressed(texImage->TexFormat)) { @@ -835,7 +816,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, rb->region->width, rb->region->height, 1, 0, internalFormat, texFormat); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = level; texImage->RowStride = rb->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); @@ -893,7 +874,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, image->region->width, image->region->height, 1, 0, image->internal_format, image->format); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = 0; texImage->RowStride = image->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); diff --git a/src/mesa/drivers/dri/nouveau/nv10_render.c b/src/mesa/drivers/dri/nouveau/nv10_render.c index 20fb4478426..6134650346d 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_render.c +++ b/src/mesa/drivers/dri/nouveau/nv10_render.c @@ -99,7 +99,7 @@ get_hw_format(int type) case GL_UNSIGNED_SHORT: return NV10_3D_VTXBUF_FMT_TYPE_V16_SNORM; case GL_UNSIGNED_BYTE: - return NV10_3D_VTXBUF_FMT_TYPE_B8G8R8A8_UNORM; + return NV10_3D_VTXBUF_FMT_TYPE_U8_UNORM; default: assert(0); } diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 48657b44be1..1f5fc33d775 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -1097,12 +1097,16 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) parselist++; break; case GLX_FBCONFIG_ID: + case GLX_VISUAL_ID: if (!fbConfig) return NULL; parselist++; desiredVisualID = *parselist++; break; case GLX_X_RENDERABLE: + case GLX_MAX_PBUFFER_WIDTH: + case GLX_MAX_PBUFFER_HEIGHT: + case GLX_MAX_PBUFFER_PIXELS: if (!fbConfig) return NULL; parselist += 2; |