diff options
author | Eric Anholt <[email protected]> | 2008-01-02 16:55:21 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2008-01-03 15:46:16 -0800 |
commit | 8abffada70fcd62e3c2dcbcdc6d00d258805326b (patch) | |
tree | d04daf5de22d6bbdb97beda35da6b33f4f1bc6c1 /src/mesa/drivers | |
parent | 0ff3eb637bac9e100aab0a109a46e81bea9203a3 (diff) |
[intel] Convert relocations to not be cleared out on buffer submit.
We have two consumers of relocations. One is static state buffers, which
want the same relocation every time. The other is the batchbuffer, which gets
thrown out immediately after submit. This lets us reduce repeated computation
for static state buffers, and clean up the code by moving relocations nearer
to where the state buffer is computed.
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/common/dri_bufmgr.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/common/dri_bufmgr_fake.c | 294 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_cc.c | 31 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_clip_state.c | 36 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 9 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_gs_state.c | 36 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_sf_state.c | 46 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_state.c | 32 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 31 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_state.c | 69 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 87 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c | 125 |
13 files changed, 388 insertions, 417 deletions
diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h index cdf27b903fb..36340d4d57d 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.h +++ b/src/mesa/drivers/dri/common/dri_bufmgr.h @@ -136,7 +136,10 @@ struct _dri_bufmgr { void (*destroy)(dri_bufmgr *bufmgr); /** - * Add relocation entry in reloc_buf, to be set on command submission. + * Add relocation entry in reloc_buf, which will be updated with the + * target buffer's real offset on on command submission. + * + * Relocations remain in place for the lifetime of the buffer object. * * \param reloc_buf Buffer to write the relocation into. * \param flags BO flags to be used in validating the target buffer. diff --git a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c index 65b2c174d5c..ae7154daa20 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c @@ -62,12 +62,16 @@ struct fake_buffer_reloc { - dri_bo *reloc_buf; + /** Buffer object that the relocation points at. */ dri_bo *target_buf; + /** Offset of the relocation entry within reloc_buf. */ GLuint offset; + /** Cached value of the offset when we last performed this relocation. */ + GLuint last_target_offset; + /** Value added to target_buf's offset to get the relocation entry. */ GLuint delta; + /** Flags to validate the target buffer under. */ uint64_t validate_flags; - GLboolean relocated; }; struct block { @@ -128,25 +132,9 @@ typedef struct _bufmgr_fake { GLboolean debug; - /** fake relocation list */ - struct fake_buffer_reloc reloc[MAX_RELOCS]; - GLuint nr_relocs; GLboolean performed_rendering; - GLboolean in_relocation; } dri_bufmgr_fake; -#define RELOC_CACHE_COUNT 10 -/** - * Relocation cache entry. - * - * These are used in buffer relocation to avoid re-mapping (and therefore - * dirtying) a buffer to emit constant relocations. - */ -struct reloc_cache { - unsigned int offset; - uint32_t data; -}; - typedef struct _dri_bo_fake { dri_bo bo; @@ -163,19 +151,13 @@ typedef struct _dri_bo_fake { unsigned int alignment; GLboolean is_static, validated; unsigned int map_count; - /** - * Relocation count with this as reloc_buffer, to assist in determining the - * order to perform relocations. - */ - unsigned int nr_relocs; - struct reloc_cache reloc_cache[RELOC_CACHE_COUNT]; /* Flags for the buffer to be validated with in command submission */ uint64_t validate_flags; - /* Number of entries in the relocation data cache */ - unsigned int reloc_cache_count; - + /** relocation list */ + struct fake_buffer_reloc *relocs; + GLuint nr_relocs; struct block *block; void *backing_store; @@ -659,6 +641,7 @@ dri_fake_bo_unreference(dri_bo *bo) if (bo_fake->block) free_block(bufmgr_fake, bo_fake->block); free_backing_store(bo); + free(bo_fake->relocs); free(bo); DBG("drm_bo_unreference: free %s\n", bo_fake->name); return; @@ -713,11 +696,6 @@ dri_fake_bo_map(dri_bo *bo, GLboolean write_enable) if (bo_fake->map_count++ != 0) return 0; - /* Clear the relocation cache if unknown data is going to be written in. */ - if (!bufmgr_fake->in_relocation && write_enable) { - bo_fake->reloc_cache_count = 0; - } - { DBG("drm_bo_map: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, bo_fake->bo.size / 1024); @@ -838,6 +816,7 @@ dri_fake_bo_validate(dri_bo *bo, uint64_t flags) bo_fake->block->on_hardware = 1; move_to_tail(&bufmgr_fake->on_hardware, bo_fake->block); + bo_fake->validated = GL_TRUE; bufmgr_fake->need_fence = 1; return 0; @@ -915,168 +894,164 @@ static void dri_fake_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, GLuint offset, dri_bo *target_buf) { - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)reloc_buf->bufmgr; - struct fake_buffer_reloc *r = &bufmgr_fake->reloc[bufmgr_fake->nr_relocs++]; - dri_bo_fake *target_fake = (dri_bo_fake *)target_buf; + struct fake_buffer_reloc *r; dri_bo_fake *reloc_fake = (dri_bo_fake *)reloc_buf; int i; - assert(bufmgr_fake->nr_relocs <= MAX_RELOCS); + if (reloc_fake->relocs == NULL) { + reloc_fake->relocs = malloc(sizeof(struct fake_buffer_reloc) * + MAX_RELOCS); + } - dri_bo_reference(target_buf); + r = &reloc_fake->relocs[reloc_fake->nr_relocs++]; - if (target_fake->flags == 0) { - target_fake->validate_flags = flags; - } else { - /* Mask the memory location to the intersection of all the memory - * locations the buffer is being validated to. - */ - target_fake->validate_flags = - (target_fake->validate_flags & ~DRM_BO_MASK_MEM) | - (flags & target_fake->validate_flags & DRM_BO_MASK_MEM); - /* All the other flags just accumulate. */ - target_fake->validate_flags |= flags & ~DRM_BO_MASK_MEM; - } - reloc_fake->nr_relocs++; + assert(reloc_fake->nr_relocs <= MAX_RELOCS); + + dri_bo_reference(target_buf); - r->reloc_buf = reloc_buf; r->target_buf = target_buf; r->offset = offset; + r->last_target_offset = target_buf->offset; r->delta = delta; r->validate_flags = flags; /* Check that a conflicting relocation hasn't already been emitted. */ - for (i = 0; i < bufmgr_fake->nr_relocs - 1; i++) { - struct fake_buffer_reloc *r2 = &bufmgr_fake->reloc[i]; - - assert(r->reloc_buf != r2->reloc_buf || - r->offset != r2->offset || - (r->target_buf == r2->target_buf && - r->delta == r2->delta && - r->validate_flags == r2->validate_flags)); + for (i = 0; i < reloc_fake->nr_relocs - 1; i++) { + struct fake_buffer_reloc *r2 = &reloc_fake->relocs[i]; + + assert(r->offset != r2->offset); } return; } -static void * -dri_fake_process_relocs(dri_bo *batch_buf, GLuint *count_p) +/** + * Incorporates the validation flags associated with each relocation into + * the combined validation flags for the buffer on this batchbuffer submission. + */ +static void +dri_fake_calculate_validate_flags(dri_bo *bo) { - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; - GLuint i; - GLuint count = 0; - GLboolean cont; - int ret; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + int i; - bufmgr_fake->performed_rendering = GL_FALSE; - bufmgr_fake->in_relocation = GL_TRUE; + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; - /* Loop over the relocation list validating and writing the relocation - * entries for target buffers that don't contain any remaining relocations. - * In the current examples we have, the depth of the tree of relocations - * is small (up to 3), so this loop shouldn't hurt too bad. - */ - do { - cont = GL_FALSE; - - for (i = 0; i < bufmgr_fake->nr_relocs; i++) { - struct fake_buffer_reloc *r = &bufmgr_fake->reloc[i]; - dri_bo_fake *reloc_fake = (dri_bo_fake *)r->reloc_buf; - dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; - uint32_t reloc_data; - int c; - GLboolean cached = GL_FALSE; - - if (r->relocated) - continue; - - /* If there are still relocations to be done in the buffer, don't - * validate it yet. + /* Do the same for the tree of buffers we depend on */ + dri_fake_calculate_validate_flags(r->target_buf); + + if (target_fake->flags == 0) { + target_fake->validate_flags = r->validate_flags; + } else { + /* Mask the memory location to the intersection of all the memory + * locations the buffer is being validated to. */ - if (target_fake->nr_relocs != 0) - continue; - - /* Validate the target buffer if that hasn't been done. */ - if (!target_fake->validated) { - ret = dri_fake_bo_validate(r->target_buf, - target_fake->validate_flags); - if (ret != 0) { - dri_fence *fo; - - dri_bo_unmap(r->reloc_buf); - fo = dri_fake_fence_validated(batch_buf->bufmgr, - "batchbuffer failure fence", - GL_TRUE); - dri_fence_unreference(fo); - goto done; - } - if (target_fake->validate_flags & DRM_BO_FLAG_WRITE) - bufmgr_fake->performed_rendering = GL_TRUE; - count++; - } + target_fake->validate_flags = + (target_fake->validate_flags & ~DRM_BO_MASK_MEM) | + (r->validate_flags & target_fake->validate_flags & + DRM_BO_MASK_MEM); + /* All the other flags just accumulate. */ + target_fake->validate_flags |= r->validate_flags & ~DRM_BO_MASK_MEM; + } + } +} + + +static int +dri_fake_reloc_and_validate_buffer(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + int i, ret; + + assert(bo_fake->map_count == 0); + + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; + uint32_t reloc_data; - /* Calculate the value of the relocation entry. */ + /* Validate the target buffer if that hasn't been done. */ + if (!target_fake->validated) { + ret = dri_fake_reloc_and_validate_buffer(r->target_buf); + if (ret != 0) + return ret; + } + /* Calculate the value of the relocation entry. */ + if (r->target_buf->offset != r->last_target_offset) { reloc_data = r->target_buf->offset + r->delta; - /* Check the relocation cache of the buffer to see if we don't need - * to bother writing this one. - */ - for (c = 0; c < reloc_fake->reloc_cache_count; c++) { - if (reloc_fake->reloc_cache[c].offset == r->offset && - reloc_fake->reloc_cache[c].data == reloc_data) { - cached = GL_TRUE; - } - } + if (bo->virtual == NULL) + dri_bo_map(bo, GL_TRUE); - if (!cached) { - /* Map and write in the relocation to reloc_buf */ - if (reloc_fake->map_count == 0) - dri_bo_map(r->reloc_buf, GL_TRUE); + *(uint32_t *)(bo->virtual + r->offset) = reloc_data; - *(uint32_t *)(r->reloc_buf->virtual + r->offset) = reloc_data; + r->last_target_offset = r->target_buf->offset; + } + } - /* Stick this new entry in the relocation cache if possible */ - if (reloc_fake->reloc_cache_count < RELOC_CACHE_COUNT) { - struct reloc_cache *entry; + if (bo->virtual != NULL) + dri_bo_unmap(bo); - entry = &reloc_fake->reloc_cache[reloc_fake->reloc_cache_count]; - entry->offset = r->offset; - entry->data = reloc_data; + if (bo_fake->validate_flags & DRM_BO_FLAG_WRITE) + bufmgr_fake->performed_rendering = GL_TRUE; - reloc_fake->reloc_cache_count++; - } - } + return dri_fake_bo_validate(bo, bo_fake->validate_flags); +} - /* Mark this relocation in reloc_buf as done. If it was the last - * reloc to be done to it, unmap the buffer so it can be validated - * next. - */ - reloc_fake->nr_relocs--; - if (reloc_fake->nr_relocs == 0 && reloc_fake->map_count != 0) - dri_bo_unmap(r->reloc_buf); +static void * +dri_fake_process_relocs(dri_bo *batch_buf, GLuint *count_p) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; + dri_bo_fake *batch_fake = (dri_bo_fake *)batch_buf; + int ret; - r->relocated = GL_TRUE; + bufmgr_fake->performed_rendering = GL_FALSE; - cont = GL_TRUE; - } - } while (cont); + dri_fake_calculate_validate_flags(batch_buf); - ret = dri_fake_bo_validate(batch_buf, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); + batch_fake->validate_flags = DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ; + ret = dri_fake_reloc_and_validate_buffer(batch_buf); assert(ret == 0); - *count_p = count; - bufmgr_fake->in_relocation = GL_FALSE; - done: + *count_p = 0; /* junk */ + return NULL; } static void +dri_bo_fake_post_submit(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + int i; + + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; + + if (target_fake->validated) + dri_bo_fake_post_submit(r->target_buf); + + DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n", + bo_fake->name, (uint32_t)bo->offset, r->offset, + target_fake->name, (uint32_t)r->target_buf->offset, r->delta); + } + + assert(bo_fake->map_count == 0); + bo_fake->validated = GL_FALSE; + bo_fake->validate_flags = 0; +} + + +static void dri_fake_post_submit(dri_bo *batch_buf, dri_fence **last_fence) { dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; dri_fence *fo; - int i; fo = dri_fake_fence_validated(batch_buf->bufmgr, "Batch fence", GL_TRUE); @@ -1087,24 +1062,7 @@ dri_fake_post_submit(dri_bo *batch_buf, dri_fence **last_fence) dri_fence_unreference(fo); } - /* Clean up the validation list. */ - for (i = 0; i < bufmgr_fake->nr_relocs; i++) { - struct fake_buffer_reloc *r = &bufmgr_fake->reloc[i]; - dri_bo_fake *reloc_fake = (dri_bo_fake *)r->reloc_buf; - dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; - - assert(r->relocated); - assert(reloc_fake->map_count == 0); - DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n", - reloc_fake->name, (uint32_t)r->reloc_buf->offset, r->offset, - target_fake->name, (uint32_t)r->target_buf->offset, r->delta); - - reloc_fake->validate_flags = 0; - target_fake->validated = GL_FALSE; - r->relocated = GL_FALSE; - dri_bo_unreference(r->target_buf); - } - bufmgr_fake->nr_relocs = 0; + dri_bo_fake_post_submit(batch_buf); } dri_bufmgr * diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 02ebfca57d2..80aaebdb2f4 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -147,6 +147,7 @@ static dri_bo * cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) { struct brw_cc_unit_state cc; + dri_bo *bo; memset(&cc, 0, sizeof(cc)); @@ -248,11 +249,20 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) if (INTEL_DEBUG & DEBUG_STATS) cc.cc5.statistics_enable = 1; - return brw_upload_cache(&brw->cache, BRW_CC_UNIT, - key, sizeof(*key), - &brw->cc.vp_bo, 1, - &cc, sizeof(cc), - NULL, NULL); + bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, + key, sizeof(*key), + &brw->cc.vp_bo, 1, + &cc, sizeof(cc), + NULL, NULL); + + /* Emit CC viewport relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + offsetof(struct brw_cc_unit_state, cc4), + brw->cc.vp_bo); + + return bo; } static void upload_cc_unit( struct brw_context *brw ) @@ -271,16 +281,6 @@ static void upload_cc_unit( struct brw_context *brw ) brw->cc.state_bo = cc_unit_create_from_key(brw, &key); } -static void emit_reloc_cc_unit(struct brw_context *brw) -{ - /* Emit CC viewport relocation */ - dri_emit_reloc(brw->cc.state_bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - 0, - offsetof(struct brw_cc_unit_state, cc4), - brw->cc.vp_bo); -} - const struct brw_tracked_state brw_cc_unit = { .dirty = { .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH, @@ -288,7 +288,6 @@ const struct brw_tracked_state brw_cc_unit = { .cache = CACHE_NEW_CC_VP }, .update = upload_cc_unit, - .emit_reloc = emit_reloc_cc_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index cec717ee26e..e6e9fceb14d 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -69,6 +69,7 @@ clip_unit_create_from_key(struct brw_context *brw, struct brw_clip_unit_key *key) { struct brw_clip_unit_state clip; + dri_bo *bo; memset(&clip, 0, sizeof(clip)); @@ -106,12 +107,22 @@ clip_unit_create_from_key(struct brw_context *brw, clip.viewport_ymin = -1; clip.viewport_ymax = 1; - brw->clip.thread0_delta = clip.thread0.grf_reg_count << 1; - return brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, - key, sizeof(*key), - &brw->clip.prog_bo, 1, - &clip, sizeof(clip), - NULL, NULL); + bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, + key, sizeof(*key), + &brw->clip.prog_bo, 1, + &clip, sizeof(clip), + NULL, NULL); + + if (!brw->metaops.active) { + /* Emit clip program relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + clip.thread0.grf_reg_count << 1, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); + } + + return bo; } static void upload_clip_unit( struct brw_context *brw ) @@ -130,18 +141,6 @@ static void upload_clip_unit( struct brw_context *brw ) } } -static void emit_reloc_clip_unit(struct brw_context *brw) -{ - if (!brw->metaops.active) { - /* Emit clip program relocation */ - dri_emit_reloc(brw->clip.state_bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - brw->clip.thread0_delta, - offsetof(struct brw_clip_unit_state, thread0), - brw->clip.prog_bo); - } -} - const struct brw_tracked_state brw_clip_unit = { .dirty = { .mesa = 0, @@ -150,5 +149,4 @@ const struct brw_tracked_state brw_clip_unit = { .cache = CACHE_NEW_CLIP_PROG }, .update = upload_clip_unit, - .emit_reloc = emit_reloc_clip_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 05111b351ab..5a6ef463ea0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -328,7 +328,6 @@ struct brw_state_pointers { struct brw_tracked_state { struct brw_state_flags dirty; void (*update)( struct brw_context *brw ); - void (*emit_reloc)( struct brw_context *brw ); GLboolean always_update; }; @@ -540,7 +539,6 @@ struct brw_context struct { struct brw_vs_prog_data *prog_data; - GLuint thread0_delta; dri_bo *prog_bo; dri_bo *state_bo; } vs; @@ -549,7 +547,6 @@ struct brw_context struct brw_gs_prog_data *prog_data; GLboolean prog_active; - GLuint thread0_delta; dri_bo *prog_bo; dri_bo *state_bo; } gs; @@ -557,7 +554,6 @@ struct brw_context struct { struct brw_clip_prog_data *prog_data; - GLuint thread0_delta; dri_bo *prog_bo; dri_bo *state_bo; dri_bo *vp_bo; @@ -567,8 +563,6 @@ struct brw_context struct { struct brw_sf_prog_data *prog_data; - GLuint thread0_delta; - GLuint sf5_delta; dri_bo *prog_bo; dri_bo *state_bo; dri_bo *vp_bo; @@ -598,9 +592,6 @@ struct brw_context dri_bo *bind_bo; dri_bo *surf_bo[BRW_WM_MAX_SURF]; - GLuint thread0_delta; - GLuint thread2_delta; - GLuint wm4_delta; dri_bo *prog_bo; dri_bo *state_bo; } wm; diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 5cff15a7a4b..bf38fd73851 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -73,6 +73,7 @@ static dri_bo * gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) { struct brw_gs_unit_state gs; + dri_bo *bo; memset(&gs, 0, sizeof(gs)); @@ -97,12 +98,22 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (INTEL_DEBUG & DEBUG_STATS) gs.thread4.stats_enable = 1; - brw->gs.thread0_delta = gs.thread0.grf_reg_count << 1; - return brw_upload_cache(&brw->cache, BRW_GS_UNIT, - key, sizeof(*key), - &brw->gs.prog_bo, 1, - &gs, sizeof(gs), - NULL, NULL); + bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, + key, sizeof(*key), + &brw->gs.prog_bo, 1, + &gs, sizeof(gs), + NULL, NULL); + + if (key->prog_active) { + /* Emit GS program relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + gs.thread0.grf_reg_count << 1, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); + } + + return bo; } static void upload_gs_unit( struct brw_context *brw ) @@ -121,18 +132,6 @@ static void upload_gs_unit( struct brw_context *brw ) } } -static void emit_reloc_gs_unit(struct brw_context *brw) -{ - if (brw->gs.prog_active) { - /* Emit GS program relocation */ - dri_emit_reloc(brw->gs.state_bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - brw->gs.thread0_delta, - offsetof(struct brw_gs_unit_state, thread0), - brw->gs.prog_bo); - } -} - const struct brw_tracked_state brw_gs_unit = { .dirty = { .mesa = 0, @@ -141,5 +140,4 @@ const struct brw_tracked_state brw_gs_unit = { .cache = CACHE_NEW_GS_PROG }, .update = upload_gs_unit, - .emit_reloc = emit_reloc_gs_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index ccea31d8dc1..05c64909495 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -157,6 +157,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, dri_bo **reloc_bufs) { struct brw_sf_unit_state sf; + dri_bo *bo; memset(&sf, 0, sizeof(sf)); @@ -242,14 +243,27 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.sf6.dest_org_vbias = 0x8; sf.sf6.dest_org_hbias = 0x8; - brw->sf.thread0_delta = sf.thread0.grf_reg_count << 1; - brw->sf.sf5_delta = sf.sf5.front_winding | (sf.sf5.viewport_transform << 1); + bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, + key, sizeof(*key), + reloc_bufs, 2, + &sf, sizeof(sf), + NULL, NULL); - return brw_upload_cache(&brw->cache, BRW_SF_UNIT, - key, sizeof(*key), - reloc_bufs, 2, - &sf, sizeof(sf), - NULL, NULL); + /* Emit SF program relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + sf.thread0.grf_reg_count << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); + + /* Emit SF viewport relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); + + return bo; } static void upload_sf_unit( struct brw_context *brw ) @@ -272,23 +286,6 @@ static void upload_sf_unit( struct brw_context *brw ) } } -static void emit_reloc_sf_unit(struct brw_context *brw) -{ - /* Emit SF program relocation */ - dri_emit_reloc(brw->sf.state_bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - brw->sf.thread0_delta, - offsetof(struct brw_sf_unit_state, thread0), - brw->sf.prog_bo); - - /* Emit SF viewport relocation */ - dri_emit_reloc(brw->sf.state_bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - brw->sf.sf5_delta, - offsetof(struct brw_sf_unit_state, sf5), - brw->sf.vp_bo); -} - const struct brw_tracked_state brw_sf_unit = { .dirty = { .mesa = (_NEW_POLYGON | @@ -301,5 +298,4 @@ const struct brw_tracked_state brw_sf_unit = { CACHE_NEW_SF_PROG) }, .update = upload_sf_unit, - .emit_reloc = emit_reloc_sf_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 94165da8164..c8e3fb8ee8f 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -232,8 +232,6 @@ void brw_validate_state( struct brw_context *brw ) /* emit_foo(brw); */ } - if (atom->emit_reloc != NULL) - atom->emit_reloc(brw); accumulate_state(&examined, &atom->dirty); @@ -252,8 +250,6 @@ void brw_validate_state( struct brw_context *brw ) if (check_state(state, &atom->dirty) || atom->always_update) atom->update( brw ); - if (atom->emit_reloc != NULL) - atom->emit_reloc(brw); } } diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 22e32fe6a7f..573be01a2bb 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -76,6 +76,7 @@ static dri_bo * vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) { struct brw_vs_unit_state vs; + dri_bo *bo; memset(&vs, 0, sizeof(vs)); @@ -107,12 +108,20 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) */ vs.vs6.vs_enable = 1; - brw->vs.thread0_delta = vs.thread0.grf_reg_count << 1; - return brw_upload_cache(&brw->cache, BRW_VS_UNIT, - key, sizeof(*key), - &brw->vs.prog_bo, 1, - &vs, sizeof(vs), - NULL, NULL); + bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, + key, sizeof(*key), + &brw->vs.prog_bo, 1, + &vs, sizeof(vs), + NULL, NULL); + + /* Emit VS program relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + vs.thread0.grf_reg_count << 1, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); + + return bo; } static void upload_vs_unit( struct brw_context *brw ) @@ -131,16 +140,6 @@ static void upload_vs_unit( struct brw_context *brw ) } } -static void emit_reloc_vs_unit(struct brw_context *brw) -{ - /* Emit VS program relocation */ - dri_emit_reloc(brw->vs.state_bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - brw->vs.thread0_delta, - offsetof(struct brw_vs_unit_state, thread0), - brw->vs.prog_bo); -} - const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, @@ -149,5 +148,4 @@ const struct brw_tracked_state brw_vs_unit = { .cache = CACHE_NEW_VS_PROG }, .update = upload_vs_unit, - .emit_reloc = emit_reloc_vs_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 0f59584c6e3..0c750213240 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -296,27 +296,19 @@ static void upload_wm_samplers( struct brw_context *brw ) brw->wm.sdc_bo, key.sampler_count, &sampler, sizeof(sampler), NULL, NULL); - } -} -static void emit_reloc_wm_samplers(struct brw_context *brw) -{ - GLuint unit; - - if (brw->wm.sampler_count == 0) - return; + /* Emit SDC relocations */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + if (!brw->attribs.Texture->Unit[i]._ReallyEnabled) + continue; - /* Emit SDC relocations */ - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { - if (!brw->attribs.Texture->Unit[unit]._ReallyEnabled) - continue; - - dri_emit_reloc(brw->wm.sampler_bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - 0, - unit * sizeof(struct brw_sampler_state) + - offsetof(struct brw_sampler_state, ss2), - brw->wm.sdc_bo[unit]); + dri_emit_reloc(brw->wm.sampler_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + i * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + brw->wm.sdc_bo[i]); + } } } @@ -327,7 +319,6 @@ const struct brw_tracked_state brw_wm_samplers = { .cache = 0 }, .update = upload_wm_samplers, - .emit_reloc = emit_reloc_wm_samplers, }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index c2735251029..6e430645d62 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -112,6 +112,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, dri_bo **reloc_bufs) { struct brw_wm_unit_state wm; + dri_bo *bo; memset(&wm, 0, sizeof(wm)); @@ -186,15 +187,38 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm) wm.wm4.stats_enable = 1; - brw->wm.thread0_delta = wm.thread0.grf_reg_count << 1; - brw->wm.thread2_delta = wm.thread2.per_thread_scratch_space; - brw->wm.wm4_delta = wm.wm4.stats_enable | (wm.wm4.sampler_count << 2); + bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, + key, sizeof(*key), + reloc_bufs, 3, + &wm, sizeof(wm), + NULL, NULL); - return brw_upload_cache(&brw->cache, BRW_WM_UNIT, - key, sizeof(*key), - reloc_bufs, 3, - &wm, sizeof(wm), - NULL, NULL); + /* Emit WM program relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + wm.thread0.grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); + + /* Emit scratch space relocation */ + if (key->total_scratch != 0) { + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + wm.thread2.per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_buffer); + } + + /* Emit sampler state relocation */ + if (key->sampler_count != 0) { + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); + } + + return bo; } @@ -240,34 +264,6 @@ static void upload_wm_unit( struct brw_context *brw ) } } -static void emit_reloc_wm_unit(struct brw_context *brw) -{ - /* Emit WM program relocation */ - dri_emit_reloc(brw->wm.state_bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - brw->wm.thread0_delta, - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo); - - /* Emit scratch space relocation */ - if (brw->wm.scratch_buffer != NULL) { - dri_emit_reloc(brw->wm.state_bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, - brw->wm.thread2_delta, - offsetof(struct brw_wm_unit_state, thread2), - brw->wm.scratch_buffer); - } - - /* Emit sampler state relocation */ - if (brw->wm.sampler_bo != NULL) { - dri_emit_reloc(brw->wm.state_bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - brw->wm.wm4_delta, - offsetof(struct brw_wm_unit_state, wm4), - brw->wm.sampler_bo); - } -} - const struct brw_tracked_state brw_wm_unit = { .dirty = { .mesa = (_NEW_POLYGON | @@ -284,6 +280,5 @@ const struct brw_tracked_state brw_wm_unit = { CACHE_NEW_SAMPLER) }, .update = upload_wm_unit, - .emit_reloc = emit_reloc_wm_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 0984068d352..cf253391b50 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -154,6 +154,7 @@ brw_create_texture_surface( struct brw_context *brw, struct brw_wm_surface_key *key ) { struct brw_surface_state surf; + dri_bo *bo; memset(&surf, 0, sizeof(surf)); @@ -187,11 +188,20 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.cube_neg_z = 1; } - return brw_upload_cache( &brw->cache, BRW_SS_SURFACE, - key, sizeof(*key), - &key->bo, 1, - &surf, sizeof(surf), - NULL, NULL ); + bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, 1, + &surf, sizeof(surf), + NULL, NULL); + + /* Emit relocation to surface contents */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + + return bo; } static void @@ -307,7 +317,15 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, &surf, sizeof(surf), NULL, NULL); - brw->wm.nr_surfaces = 1; + if (region_bo != NULL) { + dri_emit_reloc(brw->wm.surf_bo[unit], + DRM_BO_FLAG_MEM_TT | + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE, + 0, + offsetof(struct brw_surface_state, ss1), + region_bo); + } } } @@ -343,6 +361,19 @@ brw_wm_get_binding_table(struct brw_context *brw) data, data_size, NULL, NULL); + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_WM_MAX_SURF; i++) { + if (brw->wm.surf_bo[i] != NULL) { + dri_emit_reloc(bind_bo, + DRM_BO_FLAG_MEM_TT | + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE, + 0, + i * 4, + brw->wm.surf_bo[i]); + } + } + free(data); } @@ -356,6 +387,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) GLuint i; brw_update_region_surface(brw, brw->state.draw_region, 0); + brw->wm.nr_surfaces = 1; for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; @@ -385,48 +417,6 @@ static void upload_wm_surfaces(struct brw_context *brw ) brw->wm.bind_bo = brw_wm_get_binding_table(brw); } -static void emit_reloc_wm_surfaces(struct brw_context *brw) -{ - int unit, i; - - /* Emit SS framebuffer relocation */ - if (brw->state.draw_region != NULL) { - dri_emit_reloc(brw->wm.surf_bo[0], - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, - 0, - offsetof(struct brw_surface_state, ss1), - brw->state.draw_region->buffer); - } - - /* Emit SS relocations for texture buffers */ - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { - struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - - if (texUnit->_ReallyEnabled && intelObj->mt != NULL) { - dri_emit_reloc(brw->wm.surf_bo[unit + 1], - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - 0, - offsetof(struct brw_surface_state, ss1), - intelObj->mt->region->buffer); - } - } - - /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_WM_MAX_SURF; i++) { - if (brw->wm.surf_bo[i] != NULL) { - dri_emit_reloc(brw->wm.bind_bo, - DRM_BO_FLAG_MEM_TT | - DRM_BO_FLAG_READ | - DRM_BO_FLAG_WRITE, - 0, - i * 4, - brw->wm.surf_bo[i]); - } - } -} - const struct brw_tracked_state brw_wm_surfaces = { .dirty = { .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, @@ -434,7 +424,6 @@ const struct brw_tracked_state brw_wm_surfaces = { .cache = 0 }, .update = upload_wm_surfaces, - .emit_reloc = emit_reloc_wm_surfaces, }; diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c index b9eabb42c01..a5307f1939b 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c @@ -81,6 +81,15 @@ typedef struct _dri_bufmgr_ttm { struct intel_bo_list list; /* list of buffers to be validated */ } dri_bufmgr_ttm; +/** + * Private information associated with a relocation that isn't already stored + * in the relocation buffer to be passed to the kernel. + */ +struct dri_ttm_reloc { + dri_bo *target_buf; + uint64_t validate_flags; +}; + typedef struct _dri_bo_ttm { dri_bo bo; @@ -88,9 +97,15 @@ typedef struct _dri_bo_ttm { drmBO drm_bo; const char *name; + /* Index of the buffer within the validation list while preparing a + * batchbuffer execution. + */ + int validate_index; + /** DRM buffer object containing relocation list */ drmBO *reloc_buf; - uint32_t *relocs; + uint32_t *reloc_buf_data; + struct dri_ttm_reloc *relocs; } dri_bo_ttm; typedef struct _dri_fence_ttm @@ -130,14 +145,9 @@ intel_free_validate_list(dri_bufmgr_ttm *bufmgr_ttm) for (l = list->list.next; l != &list->list; l = list->list.next) { struct intel_bo_node *node = DRMLISTENTRY(struct intel_bo_node, l, head); - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)node->bo; DRMLISTDEL(l); - /* Clear relocation list */ - if (bo_ttm->relocs != NULL) - bo_ttm->relocs[0] = bo_ttm->relocs[0] & ~0xffff; - dri_bo_unreference(node->bo); drmFree(node); @@ -157,9 +167,10 @@ static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm) DRMLISTENTRY(struct intel_bo_node, l, head); dri_bo_ttm *bo_ttm = (dri_bo_ttm *)node->bo; - if (bo_ttm->relocs != NULL) { - for (j = 0; j < (bo_ttm->relocs[0] & 0xffff); j++) { - uint32_t *reloc_entry = bo_ttm->relocs + I915_RELOC_HEADER + + if (bo_ttm->reloc_buf_data != NULL) { + for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) { + uint32_t *reloc_entry = bo_ttm->reloc_buf_data + + I915_RELOC_HEADER + j * I915_RELOC0_STRIDE; DBG("%2d: %s@0x%08x -> %d + 0x%08x\n", @@ -234,11 +245,10 @@ intel_setup_validate_list(dri_bufmgr_ttm *bufmgr_ttm, GLuint *count_p) * access flags. */ static struct intel_bo_node * -intel_add_validate_buffer(dri_bufmgr_ttm *bufmgr_ttm, - dri_bo *buf, - uint64_t flags, - int *itemLoc) +intel_add_validate_buffer(dri_bo *buf, + uint64_t flags) { + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; struct intel_bo_list *list = &bufmgr_ttm->list; struct intel_bo_node *cur; dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; @@ -289,7 +299,8 @@ intel_add_validate_buffer(dri_bufmgr_ttm *bufmgr_ttm, } cur->flags = memFlags | modeFlags; } - *itemLoc = count; + + ttm_buf->validate_index = count; return cur; } @@ -312,6 +323,8 @@ intel_setup_reloc_list(dri_bo *bo) return 0; bo_ttm->reloc_buf = malloc(sizeof(bo_ttm->drm_bo)); + bo_ttm->relocs = malloc(sizeof(struct dri_ttm_reloc) * + bufmgr_ttm->max_relocs); ret = drmBOCreate(bufmgr_ttm->fd, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs), 0, @@ -330,7 +343,7 @@ intel_setup_reloc_list(dri_bo *bo) ret = drmBOMap(bufmgr_ttm->fd, bo_ttm->reloc_buf, DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, - 0, (void **)&bo_ttm->relocs); + 0, (void **)&bo_ttm->reloc_buf_data); if (ret) { fprintf(stderr, "Failed to map relocation BO: %s\n", strerror(-ret)); @@ -343,10 +356,10 @@ intel_setup_reloc_list(dri_bo *bo) * DWORD 2: unused * DWORD 3: unused */ - bo_ttm->relocs[0] = I915_RELOC_TYPE_0 << 16; - bo_ttm->relocs[1] = 0; - bo_ttm->relocs[2] = 0; - bo_ttm->relocs[3] = 0; + bo_ttm->reloc_buf_data[0] = I915_RELOC_TYPE_0 << 16; + bo_ttm->reloc_buf_data[1] = 0; + bo_ttm->reloc_buf_data[2] = 0; + bo_ttm->reloc_buf_data[3] = 0; return 0; } @@ -404,6 +417,7 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, ttm_buf->name = name; ttm_buf->refcount = 1; ttm_buf->reloc_buf = NULL; + ttm_buf->reloc_buf_data = NULL; ttm_buf->relocs = NULL; DBG("bo_create: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); @@ -455,6 +469,7 @@ intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, ttm_buf->name = name; ttm_buf->refcount = 1; ttm_buf->reloc_buf = NULL; + ttm_buf->reloc_buf_data = NULL; ttm_buf->relocs = NULL; DBG("bo_create_from_handle: %p %08x (%s)\n", @@ -484,6 +499,14 @@ dri_ttm_bo_unreference(dri_bo *buf) int ret; if (ttm_buf->reloc_buf) { + int i; + + /* Unreference all the target buffers */ + for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++) + dri_bo_unreference(ttm_buf->relocs[i].target_buf); + free(ttm_buf->relocs); + + /* Free the kernel BO containing relocation entries */ drmBOUnmap(bufmgr_ttm->fd, ttm_buf->reloc_buf); drmBOUnreference(bufmgr_ttm->fd, ttm_buf->reloc_buf); free(ttm_buf->reloc_buf); @@ -653,47 +676,83 @@ dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, { dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr; dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf; - struct intel_bo_node *node; - int index; int num_relocs; uint32_t *this_reloc; - node = intel_add_validate_buffer(bufmgr_ttm, target_buf, flags, &index); - intel_setup_reloc_list(reloc_buf); - num_relocs = (reloc_buf_ttm->relocs[0] & 0xffff); + num_relocs = (reloc_buf_ttm->reloc_buf_data[0] & 0xffff); /* Check overflow */ - assert((reloc_buf_ttm->relocs[0] & 0xffff) < bufmgr_ttm->max_relocs); + assert((reloc_buf_ttm->reloc_buf_data[0] & 0xffff) < + bufmgr_ttm->max_relocs); - this_reloc = reloc_buf_ttm->relocs + I915_RELOC_HEADER + + this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER + num_relocs * I915_RELOC0_STRIDE; this_reloc[0] = offset; this_reloc[1] = delta; - this_reloc[2] = index; + this_reloc[2] = -1; /* To be filled in at exec time */ this_reloc[3] = 0; - reloc_buf_ttm->relocs[0]++; /* Increment relocation count */ + reloc_buf_ttm->relocs[num_relocs].validate_flags = flags; + reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf; + dri_bo_reference(target_buf); + + reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */ /* Check wraparound */ - assert((reloc_buf_ttm->relocs[0] & 0xffff) != 0); + assert((reloc_buf_ttm->reloc_buf_data[0] & 0xffff) != 0); } +/** + * Walk the tree of relocations rooted at BO and accumulate the list of + * validations to be performed and update the relocation buffers with + * index values into the validation list. + */ +static void +dri_ttm_bo_process_reloc(dri_bo *bo) +{ + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + unsigned int nr_relocs; + int i; + + if (bo_ttm->reloc_buf_data == NULL) + return; + + nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; + + for (i = 0; i < nr_relocs; i++) { + struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; + dri_bo_ttm *target_ttm = (dri_bo_ttm *)r->target_buf; + uint32_t *reloc_entry; + + /* Continue walking the tree depth-first. */ + dri_ttm_bo_process_reloc(r->target_buf); + + /* Add the target to the validate list */ + intel_add_validate_buffer(r->target_buf, r->validate_flags); + + /* Update the index of the target in the relocation entry */ + reloc_entry = bo_ttm->reloc_buf_data + I915_RELOC_HEADER + + i * I915_RELOC0_STRIDE; + reloc_entry[2] = target_ttm->validate_index; + } +} static void * dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count) { dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; void *ptr; - int index; + + /* Update indices and set up the validate list. */ + dri_ttm_bo_process_reloc(batch_buf); /* Add the batch buffer to the validation list. There are no relocations * pointing to it. */ - intel_add_validate_buffer(bufmgr_ttm, batch_buf, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE, - &index); + intel_add_validate_buffer(batch_buf, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); ptr = intel_setup_validate_list(bufmgr_ttm, count); |