diff options
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_bufmgr.h | 53 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compute.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 12 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_draw.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_blorp_exec.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_batchbuffer.c | 213 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_batchbuffer.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_blit.c | 30 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_bufmgr_gem.c | 718 |
9 files changed, 225 insertions, 810 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h index 237f39bb078..d3db6a3967b 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h @@ -88,6 +88,15 @@ struct _drm_bacon_bo { * entries when calling drm_bacon_bo_emit_reloc() */ uint64_t offset64; + + /** + * Boolean of whether the GPU is definitely not accessing the buffer. + * + * This is only valid when reusable, since non-reusable + * buffers are those that have been shared with other + * processes, so we don't know their state. + */ + bool idle; }; #define BO_ALLOC_FOR_RENDER (1<<0) @@ -178,37 +187,6 @@ void drm_bacon_bo_wait_rendering(drm_bacon_bo *bo); */ void drm_bacon_bufmgr_destroy(drm_bacon_bufmgr *bufmgr); -/** Executes the command buffer pointed to by bo. */ -int drm_bacon_bo_exec(drm_bacon_bo *bo, int used); - -/** Executes the command buffer pointed to by bo on the selected ring buffer */ -int drm_bacon_bo_mrb_exec(drm_bacon_bo *bo, int used, unsigned int flags); -int drm_bacon_bufmgr_check_aperture_space(drm_bacon_bo ** bo_array, int count); - -/** - * Add relocation entry in reloc_buf, which will be updated with the - * target buffer's real offset on on command submission. - * - * Relocations remain in place for the lifetime of the buffer object. - * - * \param bo Buffer to write the relocation into. - * \param offset Byte offset within reloc_bo of the pointer to - * target_bo. - * \param target_bo Buffer whose offset should be written into the - * relocation entry. - * \param target_offset Constant value to be added to target_bo's - * offset in relocation entry. - * \param read_domains GEM read domains which the buffer will be - * read into by the command that this relocation - * is part of. - * \param write_domains GEM read domains which the buffer will be - * dirtied in by the command that this - * relocation is part of. - */ -int drm_bacon_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset, - drm_bacon_bo *target_bo, uint32_t target_offset, - uint32_t read_domains, uint32_t write_domain); - /** * Ask that the buffer be placed in tiling mode * @@ -271,9 +249,6 @@ int drm_bacon_bo_disable_reuse(drm_bacon_bo *bo); */ int drm_bacon_bo_is_reusable(drm_bacon_bo *bo); -/** Returns true if target_bo is in the relocation tree rooted at bo. */ -int drm_bacon_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo); - /* drm_bacon_bufmgr_gem.c */ drm_bacon_bufmgr *drm_bacon_bufmgr_gem_init(struct gen_device_info *devinfo, int fd, int batch_size); @@ -290,8 +265,6 @@ void *drm_bacon_gem_bo_map__cpu(drm_bacon_bo *bo); void *drm_bacon_gem_bo_map__gtt(drm_bacon_bo *bo); void *drm_bacon_gem_bo_map__wc(drm_bacon_bo *bo); -int drm_bacon_gem_bo_get_reloc_count(drm_bacon_bo *bo); -void drm_bacon_gem_bo_clear_relocs(drm_bacon_bo *bo, int start); void drm_bacon_gem_bo_start_gtt_access(drm_bacon_bo *bo, int write_enable); int drm_bacon_gem_bo_wait(drm_bacon_bo *bo, int64_t timeout_ns); @@ -300,14 +273,6 @@ drm_bacon_context *drm_bacon_gem_context_create(drm_bacon_bufmgr *bufmgr); int drm_bacon_gem_context_get_id(drm_bacon_context *ctx, uint32_t *ctx_id); void drm_bacon_gem_context_destroy(drm_bacon_context *ctx); -int drm_bacon_gem_bo_context_exec(drm_bacon_bo *bo, drm_bacon_context *ctx, - int used, unsigned int flags); -int drm_bacon_gem_bo_fence_exec(drm_bacon_bo *bo, - drm_bacon_context *ctx, - int used, - int in_fence, - int *out_fence, - unsigned int flags); int drm_bacon_bo_gem_export_to_prime(drm_bacon_bo *bo, int *prime_fd); drm_bacon_bo *drm_bacon_bo_gem_create_from_prime(drm_bacon_bufmgr *bufmgr, diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index d816d056aad..e924401c3af 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -212,7 +212,7 @@ brw_dispatch_compute_common(struct gl_context *ctx) brw->no_batch_wrap = false; - if (drm_bacon_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { + if (!brw_batch_has_aperture_space(brw, 0)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(brw); intel_batchbuffer_flush(brw); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 00e9224d7d7..186ce826801 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -477,9 +477,21 @@ struct intel_batchbuffer { bool needs_sol_reset; bool state_base_address_emitted; + struct drm_i915_gem_relocation_entry *relocs; + int reloc_count; + int reloc_array_size; + /** The validation list */ + struct drm_i915_gem_exec_object2 *exec_objects; + drm_bacon_bo **exec_bos; + int exec_count; + int exec_array_size; + /** The amount of aperture space (in bytes) used by all exec_bos */ + int aperture_space; + struct { uint32_t *map_next; int reloc_count; + int exec_count; } saved; /** Map from batch offset to brw_state_batch data (with DEBUG_BATCH) */ diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index a6e229f2210..bf09915d0c3 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -601,7 +601,7 @@ retry: brw->no_batch_wrap = false; - if (drm_bacon_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { + if (!brw_batch_has_aperture_space(brw, 0)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(brw); intel_batchbuffer_flush(brw); diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 145adf84f6d..5c99841568e 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -243,7 +243,7 @@ retry: * map all the BOs into the GPU at batch exec time later. If so, flush the * batch and try again with nothing else in the batch. */ - if (drm_bacon_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { + if (!brw_batch_has_aperture_space(brw, 0)) { if (!check_aperture_failed_once) { check_aperture_failed_once = true; intel_batchbuffer_reset_to_saved(brw); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index ab2ae961b9c..8bc42e27d20 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -38,6 +38,8 @@ #include <xf86drm.h> #include <i915_drm.h> +#define FILE_DEBUG_FLAG DEBUG_BUFMGR + static void intel_batchbuffer_reset(struct intel_batchbuffer *batch, drm_bacon_bufmgr *bufmgr, @@ -68,6 +70,17 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch, batch->map_next = batch->cpu_map; } + batch->reloc_count = 0; + batch->reloc_array_size = 250; + batch->relocs = malloc(batch->reloc_array_size * + sizeof(struct drm_i915_gem_relocation_entry)); + batch->exec_count = 0; + batch->exec_array_size = 100; + batch->exec_bos = + malloc(batch->exec_array_size * sizeof(batch->exec_bos[0])); + batch->exec_objects = + malloc(batch->exec_array_size * sizeof(batch->exec_objects[0])); + if (INTEL_DEBUG & DEBUG_BATCH) { batch->state_batch_sizes = _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare); @@ -117,14 +130,21 @@ void intel_batchbuffer_save_state(struct brw_context *brw) { brw->batch.saved.map_next = brw->batch.map_next; - brw->batch.saved.reloc_count = - drm_bacon_gem_bo_get_reloc_count(brw->batch.bo); + brw->batch.saved.reloc_count = brw->batch.reloc_count; + brw->batch.saved.exec_count = brw->batch.exec_count; } void intel_batchbuffer_reset_to_saved(struct brw_context *brw) { - drm_bacon_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count); + for (int i = brw->batch.saved.exec_count; + i < brw->batch.exec_count; i++) { + if (brw->batch.exec_bos[i] != brw->batch.bo) { + drm_bacon_bo_unreference(brw->batch.exec_bos[i]); + } + } + brw->batch.reloc_count = brw->batch.saved.reloc_count; + brw->batch.exec_count = brw->batch.saved.exec_count; brw->batch.map_next = brw->batch.saved.map_next; if (USED_BATCH(brw->batch) == 0) @@ -135,6 +155,16 @@ void intel_batchbuffer_free(struct intel_batchbuffer *batch) { free(batch->cpu_map); + + for (int i = 0; i < batch->exec_count; i++) { + if (batch->exec_bos[i] != batch->bo) { + drm_bacon_bo_unreference(batch->exec_bos[i]); + } + } + free(batch->relocs); + free(batch->exec_bos); + free(batch->exec_objects); + drm_bacon_bo_unreference(batch->last_bo); drm_bacon_bo_unreference(batch->bo); if (batch->state_batch_sizes) @@ -334,8 +364,18 @@ static void do_batch_dump(struct brw_context *brw) { } static void brw_new_batch(struct brw_context *brw) { + /* Unreference any BOs held by the previous batch, and reset counts. */ + for (int i = 0; i < brw->batch.exec_count; i++) { + if (brw->batch.exec_bos[i] != brw->batch.bo) { + drm_bacon_bo_unreference(brw->batch.exec_bos[i]); + } + brw->batch.exec_bos[i] = NULL; + } + brw->batch.reloc_count = 0; + brw->batch.exec_count = 0; + brw->batch.aperture_space = BATCH_SZ; + /* Create a new batchbuffer and reset the associated state: */ - drm_bacon_gem_bo_clear_relocs(brw->batch.bo, 0); intel_batchbuffer_reset_and_clear_render_cache(brw); /* If the kernel supports hardware contexts, then most hardware state is @@ -452,11 +492,110 @@ throttle(struct brw_context *brw) } } -/* TODO: Push this whole function into bufmgr. - */ +static void +add_exec_bo(struct intel_batchbuffer *batch, drm_bacon_bo *bo) +{ + if (bo != batch->bo) { + for (int i = 0; i < batch->exec_count; i++) { + if (batch->exec_bos[i] == bo) + return; + } + + drm_bacon_bo_reference(bo); + } + + if (batch->exec_count == batch->exec_array_size) { + batch->exec_array_size *= 2; + batch->exec_bos = + realloc(batch->exec_bos, + batch->exec_array_size * sizeof(batch->exec_bos[0])); + batch->exec_objects = + realloc(batch->exec_objects, + batch->exec_array_size * sizeof(batch->exec_objects[0])); + } + + struct drm_i915_gem_exec_object2 *validation_entry = + &batch->exec_objects[batch->exec_count]; + validation_entry->handle = bo->handle; + if (bo == batch->bo) { + validation_entry->relocation_count = batch->reloc_count; + validation_entry->relocs_ptr = (uintptr_t) batch->relocs; + } else { + validation_entry->relocation_count = 0; + validation_entry->relocs_ptr = 0; + } + validation_entry->alignment = bo->align; + validation_entry->offset = bo->offset64; + validation_entry->flags = 0; + validation_entry->rsvd1 = 0; + validation_entry->rsvd2 = 0; + + batch->exec_bos[batch->exec_count] = bo; + batch->exec_count++; + batch->aperture_space += bo->size; +} + +static int +execbuffer(int fd, + struct intel_batchbuffer *batch, + drm_bacon_context *ctx, + int used, + int in_fence, + int *out_fence, + int flags) +{ + uint32_t ctx_id = 0; + drm_bacon_gem_context_get_id(ctx, &ctx_id); + + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = (uintptr_t) batch->exec_objects, + .buffer_count = batch->exec_count, + .batch_start_offset = 0, + .batch_len = used, + .flags = flags, + .rsvd1 = ctx_id, /* rsvd1 is actually the context ID */ + }; + + unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2; + + if (in_fence != -1) { + execbuf.rsvd2 = in_fence; + execbuf.flags |= I915_EXEC_FENCE_IN; + } + + if (out_fence != NULL) { + cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2_WR; + *out_fence = -1; + execbuf.flags |= I915_EXEC_FENCE_OUT; + } + + int ret = drmIoctl(fd, cmd, &execbuf); + if (ret != 0) + ret = -errno; + + for (int i = 0; i < batch->exec_count; i++) { + drm_bacon_bo *bo = batch->exec_bos[i]; + + bo->idle = false; + + /* Update drm_bacon_bo::offset64 */ + if (batch->exec_objects[i].offset != bo->offset64) { + DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%llx\n", + bo->handle, bo->offset64, batch->exec_objects[i].offset); + bo->offset64 = batch->exec_objects[i].offset; + } + } + + if (ret == 0 && out_fence != NULL) + *out_fence = execbuf.rsvd2 >> 32; + + return ret; +} + static int do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd) { + __DRIscreen *dri_screen = brw->screen->driScrnPriv; struct intel_batchbuffer *batch = &brw->batch; int ret = 0; @@ -484,17 +623,14 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd) flags |= I915_EXEC_GEN7_SOL_RESET; if (ret == 0) { - if (brw->hw_ctx == NULL || batch->ring != RENDER_RING) { - assert(in_fence_fd == -1); - assert(out_fence_fd == NULL); - ret = drm_bacon_bo_mrb_exec(batch->bo, 4 * USED_BATCH(*batch), - flags); - } else { - ret = drm_bacon_gem_bo_fence_exec(batch->bo, brw->hw_ctx, - 4 * USED_BATCH(*batch), - in_fence_fd, out_fence_fd, - flags); - } + void *hw_ctx = batch->ring != RENDER_RING ? NULL : brw->hw_ctx; + + /* Add the batch itself to the end of the validation list */ + add_exec_bo(batch, batch->bo); + + ret = execbuffer(dri_screen->fd, batch, hw_ctx, + 4 * USED_BATCH(*batch), + in_fence_fd, out_fence_fd, flags); } throttle(brw); @@ -577,9 +713,20 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw, } bool +brw_batch_has_aperture_space(struct brw_context *brw, unsigned extra_space) +{ + return brw->batch.aperture_space + extra_space <= + brw->screen->aperture_threshold; +} + +bool brw_batch_references(struct intel_batchbuffer *batch, drm_bacon_bo *bo) { - return drm_bacon_bo_references(batch->bo, bo); + for (int i = 0; i < batch->exec_count; i++) { + if (batch->exec_bos[i] == bo) + return true; + } + return false; } /* This is the only way buffers get added to the validate list. @@ -589,13 +736,31 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset, drm_bacon_bo *target, uint32_t target_offset, uint32_t read_domains, uint32_t write_domain) { - int ret; + if (batch->reloc_count == batch->reloc_array_size) { + batch->reloc_array_size *= 2; + batch->relocs = realloc(batch->relocs, + batch->reloc_array_size * + sizeof(struct drm_i915_gem_relocation_entry)); + } + + /* Check args */ + assert(batch_offset <= BATCH_SZ - sizeof(uint32_t)); + assert(_mesa_bitcount(write_domain) <= 1); + + if (target != batch->bo) + add_exec_bo(batch, target); + + struct drm_i915_gem_relocation_entry *reloc = + &batch->relocs[batch->reloc_count]; + + batch->reloc_count++; - ret = drm_bacon_bo_emit_reloc(batch->bo, batch_offset, - target, target_offset, - read_domains, write_domain); - assert(ret == 0); - (void)ret; + reloc->offset = batch_offset; + reloc->delta = target_offset; + reloc->target_handle = target->handle; + reloc->read_domains = read_domains; + reloc->write_domain = write_domain; + reloc->presumed_offset = target->offset64; /* Using the old buffer offset, write in what the right data would be, in * case the buffer doesn't move and we can short-circuit the relocation diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index e67b18200f4..efee863cee2 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -65,6 +65,9 @@ void intel_batchbuffer_data(struct brw_context *brw, const void *data, GLuint bytes, enum brw_gpu_ring ring); +bool brw_batch_has_aperture_space(struct brw_context *brw, + unsigned extra_space_in_bytes); + bool brw_batch_references(struct intel_batchbuffer *batch, drm_bacon_bo *bo); uint64_t brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset, diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 29aa777e814..ebd4c529e9f 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -498,10 +498,9 @@ intelEmitCopyBlit(struct brw_context *brw, GLshort w, GLshort h, GLenum logic_op) { - GLuint CMD, BR13, pass = 0; + GLuint CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; - drm_bacon_bo *aper_array[3]; bool dst_y_tiled = dst_tiling == I915_TILING_Y; bool src_y_tiled = src_tiling == I915_TILING_Y; uint32_t src_tile_w, src_tile_h; @@ -510,20 +509,13 @@ intelEmitCopyBlit(struct brw_context *brw, if ((dst_y_tiled || src_y_tiled) && brw->gen < 6) return false; + const unsigned bo_sizes = dst_buffer->size + src_buffer->size; + /* do space check before going any further */ - do { - aper_array[0] = brw->batch.bo; - aper_array[1] = dst_buffer; - aper_array[2] = src_buffer; - - if (drm_bacon_bufmgr_check_aperture_space(aper_array, 3) != 0) { - intel_batchbuffer_flush(brw); - pass++; - } else - break; - } while (pass < 2); - - if (pass >= 2) + if (!brw_batch_has_aperture_space(brw, bo_sizes)) + intel_batchbuffer_flush(brw); + + if (!brw_batch_has_aperture_space(brw, bo_sizes)) return false; unsigned length = brw->gen >= 8 ? 10 : 8; @@ -780,7 +772,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, { uint32_t BR13, CMD; int pitch, cpp; - drm_bacon_bo *aper_array[2]; pitch = mt->pitch; cpp = mt->cpp; @@ -799,13 +790,8 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, BR13 |= pitch; /* do space check before going any further */ - aper_array[0] = brw->batch.bo; - aper_array[1] = mt->bo; - - if (drm_bacon_bufmgr_check_aperture_space(aper_array, - ARRAY_SIZE(aper_array)) != 0) { + if (!brw_batch_has_aperture_space(brw, mt->bo->size)) intel_batchbuffer_flush(brw); - } unsigned length = brw->gen >= 8 ? 7 : 6; bool dst_y_tiled = mt->tiling == I915_TILING_Y; diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c b/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c index 39489e6d32b..5c5ffdca232 100644 --- a/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c @@ -90,22 +90,6 @@ atomic_add_unless(int *v, int add, int unless) return c == unless; } -/** - * upper_32_bits - return bits 32-63 of a number - * @n: the number we're accessing - * - * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress - * the "right shift count >= width of type" warning when that quantity is - * 32-bits. - */ -#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16)) - -/** - * lower_32_bits - return bits 0-31 of a number - * @n: the number we're accessing - */ -#define lower_32_bits(n) ((__u32)(n)) - struct _drm_bacon_context { unsigned int ctx_id; struct _drm_bacon_bufmgr *bufmgr; @@ -121,15 +105,8 @@ struct drm_bacon_gem_bo_bucket { typedef struct _drm_bacon_bufmgr { int fd; - int max_relocs; - pthread_mutex_t lock; - struct drm_i915_gem_exec_object2 *exec2_objects; - drm_bacon_bo **exec_bos; - int exec_size; - int exec_count; - /** Array of lists of cached gem objects of power-of-two sizes */ struct drm_bacon_gem_bo_bucket cache_bucket[14 * 4]; int num_buckets; @@ -141,10 +118,8 @@ typedef struct _drm_bacon_bufmgr { struct list_head vma_cache; int vma_count, vma_open, vma_max; - uint64_t gtt_size; unsigned int has_llc : 1; unsigned int bo_reuse : 1; - unsigned int no_exec : 1; } drm_bacon_bufmgr; struct _drm_bacon_bo_gem { @@ -162,12 +137,6 @@ struct _drm_bacon_bo_gem { unsigned int global_name; /** - * Index of the buffer within the validation list while preparing a - * batchbuffer execution. - */ - int validate_index; - - /** * Current tiling mode */ uint32_t tiling_mode; @@ -176,14 +145,6 @@ struct _drm_bacon_bo_gem { time_t free_time; - /** Array passed to the DRM containing relocation information. */ - struct drm_i915_gem_relocation_entry *relocs; - /** - * Array of info structs corresponding to relocs[i].target_handle etc - */ - drm_bacon_bo **reloc_bos; - /** Number of entries in relocs */ - int reloc_count; /** Mapped address for the buffer, saved across map/unmap cycles */ void *mem_virtual; /** GTT virtual address for the buffer, saved across map/unmap cycles */ @@ -197,60 +158,16 @@ struct _drm_bacon_bo_gem { struct list_head head; /** - * Boolean of whether this BO and its children have been included in - * the current drm_bacon_bufmgr_check_aperture_space() total. - */ - bool included_in_check_aperture; - - /** - * Boolean of whether this buffer has been used as a relocation - * target and had its size accounted for, and thus can't have any - * further relocations added to it. - */ - bool used_as_reloc_target; - - /** - * Boolean of whether we have encountered an error whilst building the relocation tree. - */ - bool has_error; - - /** * Boolean of whether this buffer can be re-used */ bool reusable; - - /** - * Boolean of whether the GPU is definitely not accessing the buffer. - * - * This is only valid when reusable, since non-reusable - * buffers are those that have been shared with other - * processes, so we don't know their state. - */ - bool idle; - - /** - * Size in bytes of this buffer and its relocation descendents. - * - * Used to avoid costly tree walking in - * drm_bacon_bufmgr_check_aperture in the common case. - */ - int reloc_tree_size; }; -static unsigned int -drm_bacon_gem_estimate_batch_space(drm_bacon_bo ** bo_array, int count); - -static unsigned int -drm_bacon_gem_compute_batch_space(drm_bacon_bo ** bo_array, int count); - static int drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo, uint32_t tiling_mode, uint32_t stride); -static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo, - time_t time); - static void drm_bacon_gem_bo_free(drm_bacon_bo *bo); static inline drm_bacon_bo_gem *to_bo_gem(drm_bacon_bo *bo) @@ -331,42 +248,6 @@ drm_bacon_gem_bo_bucket_for_size(drm_bacon_bufmgr *bufmgr, return NULL; } -static void -drm_bacon_gem_dump_validation_list(drm_bacon_bufmgr *bufmgr) -{ - int i, j; - - for (i = 0; i < bufmgr->exec_count; i++) { - drm_bacon_bo *bo = bufmgr->exec_bos[i]; - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo; - - if (bo_gem->relocs == NULL) { - DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, - bo_gem->name); - continue; - } - - for (j = 0; j < bo_gem->reloc_count; j++) { - drm_bacon_bo *target_bo = bo_gem->reloc_bos[j]; - drm_bacon_bo_gem *target_gem = - (drm_bacon_bo_gem *) target_bo; - - DBG("%2d: %d (%s)@0x%08x %08x -> " - "%d (%s)@0x%08x %08x + 0x%08x\n", - i, - bo_gem->gem_handle, - bo_gem->name, - upper_32_bits(bo_gem->relocs[j].offset), - lower_32_bits(bo_gem->relocs[j].offset), - target_gem->gem_handle, - target_gem->name, - upper_32_bits(target_bo->offset64), - lower_32_bits(target_bo->offset64), - bo_gem->relocs[j].delta); - } - } -} - inline void drm_bacon_bo_reference(drm_bacon_bo *bo) { @@ -375,95 +256,6 @@ drm_bacon_bo_reference(drm_bacon_bo *bo) p_atomic_inc(&bo_gem->refcount); } -static void -drm_bacon_add_validate_buffer2(drm_bacon_bo *bo) -{ - drm_bacon_bufmgr *bufmgr = bo->bufmgr; - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo; - int index; - - if (bo_gem->validate_index != -1) - return; - - /* Extend the array of validation entries as necessary. */ - if (bufmgr->exec_count == bufmgr->exec_size) { - int new_size = bufmgr->exec_size * 2; - - if (new_size == 0) - new_size = 5; - - bufmgr->exec2_objects = - realloc(bufmgr->exec2_objects, - sizeof(*bufmgr->exec2_objects) * new_size); - bufmgr->exec_bos = - realloc(bufmgr->exec_bos, - sizeof(*bufmgr->exec_bos) * new_size); - bufmgr->exec_size = new_size; - } - - index = bufmgr->exec_count; - bo_gem->validate_index = index; - /* Fill in array entry */ - bufmgr->exec2_objects[index].handle = bo_gem->gem_handle; - bufmgr->exec2_objects[index].relocation_count = bo_gem->reloc_count; - bufmgr->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; - bufmgr->exec2_objects[index].alignment = bo->align; - bufmgr->exec2_objects[index].offset = bo->offset64; - bufmgr->exec2_objects[index].flags = 0; - bufmgr->exec2_objects[index].rsvd1 = 0; - bufmgr->exec2_objects[index].rsvd2 = 0; - bufmgr->exec_bos[index] = bo; - bufmgr->exec_count++; -} - -static void -drm_bacon_bo_gem_set_in_aperture_size(drm_bacon_bufmgr *bufmgr, - drm_bacon_bo_gem *bo_gem, - unsigned int alignment) -{ - unsigned int size; - - assert(!bo_gem->used_as_reloc_target); - - /* The older chipsets are far-less flexible in terms of tiling, - * and require tiled buffer to be size aligned in the aperture. - * This means that in the worst possible case we will need a hole - * twice as large as the object in order for it to fit into the - * aperture. Optimal packing is for wimps. - */ - size = bo_gem->bo.size; - - bo_gem->reloc_tree_size = size + alignment; -} - -static int -drm_bacon_setup_reloc_list(drm_bacon_bo *bo) -{ - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo; - drm_bacon_bufmgr *bufmgr = bo->bufmgr; - unsigned int max_relocs = bufmgr->max_relocs; - - if (bo->size / 4 < max_relocs) - max_relocs = bo->size / 4; - - bo_gem->relocs = malloc(max_relocs * - sizeof(struct drm_i915_gem_relocation_entry)); - bo_gem->reloc_bos = malloc(max_relocs * sizeof(drm_bacon_bo *)); - if (bo_gem->relocs == NULL || bo_gem->reloc_bos == NULL) { - bo_gem->has_error = true; - - free (bo_gem->relocs); - bo_gem->relocs = NULL; - - free (bo_gem->reloc_bos); - bo_gem->reloc_bos = NULL; - - return 1; - } - - return 0; -} - int drm_bacon_bo_busy(drm_bacon_bo *bo) { @@ -472,15 +264,12 @@ drm_bacon_bo_busy(drm_bacon_bo *bo) struct drm_i915_gem_busy busy; int ret; - if (bo_gem->reusable && bo_gem->idle) - return false; - memclear(busy); busy.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); if (ret == 0) { - bo_gem->idle = !busy.busy; + bo->idle = !busy.busy; return busy.busy; } else { return false; @@ -658,12 +447,8 @@ retry: bo_gem->name = name; p_atomic_set(&bo_gem->refcount, 1); - bo_gem->validate_index = -1; - bo_gem->used_as_reloc_target = false; - bo_gem->has_error = false; bo_gem->reusable = true; - drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, alignment); pthread_mutex_unlock(&bufmgr->lock); DBG("bo_create: buf %d (%s) %ldb\n", @@ -809,7 +594,6 @@ drm_bacon_bo_gem_create_from_name(drm_bacon_bufmgr *bufmgr, bo_gem->bo.virtual = NULL; bo_gem->bo.bufmgr = bufmgr; bo_gem->name = name; - bo_gem->validate_index = -1; bo_gem->gem_handle = open_arg.handle; bo_gem->bo.handle = open_arg.handle; bo_gem->global_name = handle; @@ -831,7 +615,6 @@ drm_bacon_bo_gem_create_from_name(drm_bacon_bufmgr *bufmgr, bo_gem->tiling_mode = get_tiling.tiling_mode; bo_gem->swizzle_mode = get_tiling.swizzle_mode; /* XXX stride is unknown */ - drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0); DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); out: @@ -1012,32 +795,10 @@ drm_bacon_gem_bo_unreference_final(drm_bacon_bo *bo, time_t time) drm_bacon_bufmgr *bufmgr = bo->bufmgr; drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo; struct drm_bacon_gem_bo_bucket *bucket; - int i; - - /* Unreference all the target buffers */ - for (i = 0; i < bo_gem->reloc_count; i++) { - if (bo_gem->reloc_bos[i] != bo) { - drm_bacon_gem_bo_unreference_locked_timed(bo_gem-> - reloc_bos[i], - time); - } - } - bo_gem->reloc_count = 0; - bo_gem->used_as_reloc_target = false; DBG("bo_unreference final: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); - /* release memory associated with this object */ - if (bo_gem->reloc_bos) { - free(bo_gem->reloc_bos); - bo_gem->reloc_bos = NULL; - } - if (bo_gem->relocs) { - free(bo_gem->relocs); - bo_gem->relocs = NULL; - } - /* Clear any left-over mappings */ if (bo_gem->map_count) { DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); @@ -1054,7 +815,6 @@ drm_bacon_gem_bo_unreference_final(drm_bacon_bo *bo, time_t time) bo_gem->free_time = time; bo_gem->name = NULL; - bo_gem->validate_index = -1; list_addtail(&bo_gem->head, &bucket->head); } else { @@ -1062,16 +822,6 @@ drm_bacon_gem_bo_unreference_final(drm_bacon_bo *bo, time_t time) } } -static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo, - time_t time) -{ - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo; - - assert(p_atomic_read(&bo_gem->refcount) > 0); - if (p_atomic_dec_zero(&bo_gem->refcount)) - drm_bacon_gem_bo_unreference_final(bo, time); -} - void drm_bacon_bo_unreference(drm_bacon_bo *bo) { @@ -1489,9 +1239,6 @@ drm_bacon_gem_bo_start_gtt_access(drm_bacon_bo *bo, int write_enable) void drm_bacon_bufmgr_destroy(drm_bacon_bufmgr *bufmgr) { - free(bufmgr->exec2_objects); - free(bufmgr->exec_bos); - pthread_mutex_destroy(&bufmgr->lock); /* Free any cached buffer objects we were going to reuse */ @@ -1515,282 +1262,6 @@ drm_bacon_bufmgr_destroy(drm_bacon_bufmgr *bufmgr) free(bufmgr); } -/** - * Adds the target buffer to the validation list and adds the relocation - * to the reloc_buffer's relocation list. - * - * The relocation entry at the given offset must already contain the - * precomputed relocation value, because the kernel will optimize out - * the relocation entry write when the buffer hasn't moved from the - * last known offset in target_bo. - */ -int -drm_bacon_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset, - drm_bacon_bo *target_bo, uint32_t target_offset, - uint32_t read_domains, uint32_t write_domain) -{ - drm_bacon_bufmgr *bufmgr = bo->bufmgr; - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo; - drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo; - - if (bo_gem->has_error) - return -ENOMEM; - - if (target_bo_gem->has_error) { - bo_gem->has_error = true; - return -ENOMEM; - } - - /* Create a new relocation list if needed */ - if (bo_gem->relocs == NULL && drm_bacon_setup_reloc_list(bo)) - return -ENOMEM; - - /* Check overflow */ - assert(bo_gem->reloc_count < bufmgr->max_relocs); - - /* Check args */ - assert(offset <= bo->size - 4); - assert((write_domain & (write_domain - 1)) == 0); - - /* Make sure that we're not adding a reloc to something whose size has - * already been accounted for. - */ - assert(!bo_gem->used_as_reloc_target); - if (target_bo_gem != bo_gem) { - target_bo_gem->used_as_reloc_target = true; - bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; - } - - bo_gem->reloc_bos[bo_gem->reloc_count] = target_bo; - if (target_bo != bo) - drm_bacon_bo_reference(target_bo); - - bo_gem->relocs[bo_gem->reloc_count].offset = offset; - bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; - bo_gem->relocs[bo_gem->reloc_count].target_handle = - target_bo_gem->gem_handle; - bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; - bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; - bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; - bo_gem->reloc_count++; - - return 0; -} - -int -drm_bacon_gem_bo_get_reloc_count(drm_bacon_bo *bo) -{ - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo; - - return bo_gem->reloc_count; -} - -/** - * Removes existing relocation entries in the BO after "start". - * - * This allows a user to avoid a two-step process for state setup with - * counting up all the buffer objects and doing a - * drm_bacon_bufmgr_check_aperture_space() before emitting any of the - * relocations for the state setup. Instead, save the state of the - * batchbuffer including drm_bacon_gem_get_reloc_count(), emit all the - * state, and then check if it still fits in the aperture. - * - * Any further drm_bacon_bufmgr_check_aperture_space() queries - * involving this buffer in the tree are undefined after this call. - */ -void -drm_bacon_gem_bo_clear_relocs(drm_bacon_bo *bo, int start) -{ - drm_bacon_bufmgr *bufmgr = bo->bufmgr; - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo; - int i; - struct timespec time; - - clock_gettime(CLOCK_MONOTONIC, &time); - - assert(bo_gem->reloc_count >= start); - - /* Unreference the cleared target buffers */ - pthread_mutex_lock(&bufmgr->lock); - - for (i = start; i < bo_gem->reloc_count; i++) { - drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) bo_gem->reloc_bos[i]; - if (&target_bo_gem->bo != bo) { - drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem->bo, - time.tv_sec); - } - } - bo_gem->reloc_count = start; - - pthread_mutex_unlock(&bufmgr->lock); - -} - -static void -drm_bacon_gem_bo_process_reloc2(drm_bacon_bo *bo) -{ - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo; - int i; - - if (bo_gem->relocs == NULL) - return; - - for (i = 0; i < bo_gem->reloc_count; i++) { - drm_bacon_bo *target_bo = bo_gem->reloc_bos[i]; - - if (target_bo == bo) - continue; - - drm_bacon_gem_bo_mark_mmaps_incoherent(bo); - - /* Continue walking the tree depth-first. */ - drm_bacon_gem_bo_process_reloc2(target_bo); - - /* Add the target to the validate list */ - drm_bacon_add_validate_buffer2(target_bo); - } -} - -static void -drm_bacon_update_buffer_offsets2 (drm_bacon_bufmgr *bufmgr) -{ - int i; - - for (i = 0; i < bufmgr->exec_count; i++) { - drm_bacon_bo *bo = bufmgr->exec_bos[i]; - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo; - - /* Update the buffer offset */ - if (bufmgr->exec2_objects[i].offset != bo->offset64) { - DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", - bo_gem->gem_handle, bo_gem->name, - upper_32_bits(bo->offset64), - lower_32_bits(bo->offset64), - upper_32_bits(bufmgr->exec2_objects[i].offset), - lower_32_bits(bufmgr->exec2_objects[i].offset)); - bo->offset64 = bufmgr->exec2_objects[i].offset; - } - } -} - -static int -do_exec2(drm_bacon_bo *bo, int used, drm_bacon_context *ctx, - int in_fence, int *out_fence, - unsigned int flags) -{ - drm_bacon_bufmgr *bufmgr = bo->bufmgr; - struct drm_i915_gem_execbuffer2 execbuf; - int ret = 0; - int i; - - if (to_bo_gem(bo)->has_error) - return -ENOMEM; - - pthread_mutex_lock(&bufmgr->lock); - /* Update indices and set up the validate list. */ - drm_bacon_gem_bo_process_reloc2(bo); - - /* Add the batch buffer to the validation list. There are no relocations - * pointing to it. - */ - drm_bacon_add_validate_buffer2(bo); - - memclear(execbuf); - execbuf.buffers_ptr = (uintptr_t)bufmgr->exec2_objects; - execbuf.buffer_count = bufmgr->exec_count; - execbuf.batch_start_offset = 0; - execbuf.batch_len = used; - execbuf.cliprects_ptr = 0; - execbuf.num_cliprects = 0; - execbuf.DR1 = 0; - execbuf.DR4 = 0; - execbuf.flags = flags; - if (ctx == NULL) - i915_execbuffer2_set_context_id(execbuf, 0); - else - i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); - execbuf.rsvd2 = 0; - if (in_fence != -1) { - execbuf.rsvd2 = in_fence; - execbuf.flags |= I915_EXEC_FENCE_IN; - } - if (out_fence != NULL) { - *out_fence = -1; - execbuf.flags |= I915_EXEC_FENCE_OUT; - } - - if (bufmgr->no_exec) - goto skip_execution; - - ret = drmIoctl(bufmgr->fd, - DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, - &execbuf); - if (ret != 0) { - ret = -errno; - if (ret == -ENOSPC) { - DBG("Execbuffer fails to pin. " - "Estimate: %u. Actual: %u. Available: %u\n", - drm_bacon_gem_estimate_batch_space(bufmgr->exec_bos, - bufmgr->exec_count), - drm_bacon_gem_compute_batch_space(bufmgr->exec_bos, - bufmgr->exec_count), - (unsigned int) bufmgr->gtt_size); - } - } - drm_bacon_update_buffer_offsets2(bufmgr); - - if (ret == 0 && out_fence != NULL) - *out_fence = execbuf.rsvd2 >> 32; - -skip_execution: - if (INTEL_DEBUG & DEBUG_BUFMGR) - drm_bacon_gem_dump_validation_list(bufmgr); - - for (i = 0; i < bufmgr->exec_count; i++) { - drm_bacon_bo_gem *bo_gem = to_bo_gem(bufmgr->exec_bos[i]); - - bo_gem->idle = false; - - /* Disconnect the buffer from the validate list */ - bo_gem->validate_index = -1; - bufmgr->exec_bos[i] = NULL; - } - bufmgr->exec_count = 0; - pthread_mutex_unlock(&bufmgr->lock); - - return ret; -} - -int -drm_bacon_bo_exec(drm_bacon_bo *bo, int used) -{ - return do_exec2(bo, used, NULL, -1, NULL, I915_EXEC_RENDER); -} - -int -drm_bacon_bo_mrb_exec(drm_bacon_bo *bo, int used, unsigned int flags) -{ - return do_exec2(bo, used, NULL, -1, NULL, flags); -} - -int -drm_bacon_gem_bo_context_exec(drm_bacon_bo *bo, drm_bacon_context *ctx, - int used, unsigned int flags) -{ - return do_exec2(bo, used, ctx, -1, NULL, flags); -} - -int -drm_bacon_gem_bo_fence_exec(drm_bacon_bo *bo, - drm_bacon_context *ctx, - int used, - int in_fence, - int *out_fence, - unsigned int flags) -{ - return do_exec2(bo, used, ctx, in_fence, out_fence, flags); -} - static int drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo, uint32_t tiling_mode, @@ -1833,7 +1304,6 @@ int drm_bacon_bo_set_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode, uint32_t stride) { - drm_bacon_bufmgr *bufmgr = bo->bufmgr; drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo; int ret; @@ -1844,8 +1314,6 @@ drm_bacon_bo_set_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode, stride = 0; ret = drm_bacon_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); - if (ret == 0) - drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0); *tiling_mode = bo_gem->tiling_mode; return ret; @@ -1915,9 +1383,6 @@ drm_bacon_bo_gem_create_from_prime(drm_bacon_bufmgr *bufmgr, int prime_fd, int s &bo_gem->gem_handle, bo_gem); bo_gem->name = "prime"; - bo_gem->validate_index = -1; - bo_gem->used_as_reloc_target = false; - bo_gem->has_error = false; bo_gem->reusable = false; memclear(get_tiling); @@ -1930,7 +1395,6 @@ drm_bacon_bo_gem_create_from_prime(drm_bacon_bufmgr *bufmgr, int prime_fd, int s bo_gem->tiling_mode = get_tiling.tiling_mode; bo_gem->swizzle_mode = get_tiling.swizzle_mode; /* XXX stride is unknown */ - drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0); out: pthread_mutex_unlock(&bufmgr->lock); @@ -1999,141 +1463,6 @@ drm_bacon_bufmgr_gem_enable_reuse(drm_bacon_bufmgr *bufmgr) bufmgr->bo_reuse = true; } -/** - * Return the additional aperture space required by the tree of buffer objects - * rooted at bo. - */ -static int -drm_bacon_gem_bo_get_aperture_space(drm_bacon_bo *bo) -{ - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo; - int i; - int total = 0; - - if (bo == NULL || bo_gem->included_in_check_aperture) - return 0; - - total += bo->size; - bo_gem->included_in_check_aperture = true; - - for (i = 0; i < bo_gem->reloc_count; i++) - total += - drm_bacon_gem_bo_get_aperture_space(bo_gem->reloc_bos[i]); - - return total; -} - -/** - * Clear the flag set by drm_bacon_gem_bo_get_aperture_space() so we're ready - * for the next drm_bacon_bufmgr_check_aperture_space() call. - */ -static void -drm_bacon_gem_bo_clear_aperture_space_flag(drm_bacon_bo *bo) -{ - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo; - int i; - - if (bo == NULL || !bo_gem->included_in_check_aperture) - return; - - bo_gem->included_in_check_aperture = false; - - for (i = 0; i < bo_gem->reloc_count; i++) - drm_bacon_gem_bo_clear_aperture_space_flag(bo_gem->reloc_bos[i]); -} - -/** - * Return a conservative estimate for the amount of aperture required - * for a collection of buffers. This may double-count some buffers. - */ -static unsigned int -drm_bacon_gem_estimate_batch_space(drm_bacon_bo **bo_array, int count) -{ - int i; - unsigned int total = 0; - - for (i = 0; i < count; i++) { - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo_array[i]; - if (bo_gem != NULL) - total += bo_gem->reloc_tree_size; - } - return total; -} - -/** - * Return the amount of aperture needed for a collection of buffers. - * This avoids double counting any buffers, at the cost of looking - * at every buffer in the set. - */ -static unsigned int -drm_bacon_gem_compute_batch_space(drm_bacon_bo **bo_array, int count) -{ - int i; - unsigned int total = 0; - - for (i = 0; i < count; i++) { - total += drm_bacon_gem_bo_get_aperture_space(bo_array[i]); - /* For the first buffer object in the array, we get an - * accurate count back for its reloc_tree size (since nothing - * had been flagged as being counted yet). We can save that - * value out as a more conservative reloc_tree_size that - * avoids double-counting target buffers. Since the first - * buffer happens to usually be the batch buffer in our - * callers, this can pull us back from doing the tree - * walk on every new batch emit. - */ - if (i == 0) { - drm_bacon_bo_gem *bo_gem = - (drm_bacon_bo_gem *) bo_array[i]; - bo_gem->reloc_tree_size = total; - } - } - - for (i = 0; i < count; i++) - drm_bacon_gem_bo_clear_aperture_space_flag(bo_array[i]); - return total; -} - -/** - * Return -1 if the batchbuffer should be flushed before attempting to - * emit rendering referencing the buffers pointed to by bo_array. - * - * This is required because if we try to emit a batchbuffer with relocations - * to a tree of buffers that won't simultaneously fit in the aperture, - * the rendering will return an error at a point where the software is not - * prepared to recover from it. - * - * However, we also want to emit the batchbuffer significantly before we reach - * the limit, as a series of batchbuffers each of which references buffers - * covering almost all of the aperture means that at each emit we end up - * waiting to evict a buffer from the last rendering, and we get synchronous - * performance. By emitting smaller batchbuffers, we eat some CPU overhead to - * get better parallelism. - */ -int -drm_bacon_bufmgr_check_aperture_space(drm_bacon_bo **bo_array, int count) -{ - drm_bacon_bufmgr *bufmgr = bo_array[0]->bufmgr; - unsigned int total = 0; - unsigned int threshold = bufmgr->gtt_size * 3 / 4; - - total = drm_bacon_gem_estimate_batch_space(bo_array, count); - - if (total > threshold) - total = drm_bacon_gem_compute_batch_space(bo_array, count); - - if (total > threshold) { - DBG("check_space: overflowed available aperture, " - "%dkb vs %dkb\n", - total / 1024, (int)bufmgr->gtt_size / 1024); - return -ENOSPC; - } else { - DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, - (int)bufmgr->gtt_size / 1024); - return 0; - } -} - /* * Disable buffer reuse for objects which are shared with the kernel * as scanout buffers @@ -2155,38 +1484,6 @@ drm_bacon_bo_is_reusable(drm_bacon_bo *bo) return bo_gem->reusable; } -static int -_drm_bacon_gem_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo) -{ - drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo; - int i; - - for (i = 0; i < bo_gem->reloc_count; i++) { - if (bo_gem->reloc_bos[i] == target_bo) - return 1; - if (bo == bo_gem->reloc_bos[i]) - continue; - if (_drm_bacon_gem_bo_references(bo_gem->reloc_bos[i], - target_bo)) - return 1; - } - - return 0; -} - -/** Return true if target_bo is referenced by bo's relocation tree. */ -int -drm_bacon_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo) -{ - drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo; - - if (bo == NULL || target_bo == NULL) - return 0; - if (target_bo_gem->used_as_reloc_target) - return _drm_bacon_gem_bo_references(bo, target_bo); - return 0; -} - static void add_bucket(drm_bacon_bufmgr *bufmgr, int size) { @@ -2476,7 +1773,6 @@ drm_bacon_bufmgr_gem_init(struct gen_device_info *devinfo, int fd, int batch_size) { drm_bacon_bufmgr *bufmgr; - struct drm_i915_gem_get_aperture aperture; bufmgr = calloc(1, sizeof(*bufmgr)); if (bufmgr == NULL) @@ -2498,20 +1794,8 @@ drm_bacon_bufmgr_gem_init(struct gen_device_info *devinfo, return NULL; } - memclear(aperture); - drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); - bufmgr->gtt_size = aperture.aper_available_size; - bufmgr->has_llc = devinfo->has_llc; - /* Let's go with one relocation per every 2 dwords (but round down a bit - * since a power of two will mean an extra page allocation for the reloc - * buffer). - * - * Every 4 was too few for the blender benchmark. - */ - bufmgr->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; - init_cache_buckets(bufmgr); list_inithead(&bufmgr->vma_cache); |