diff options
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c | 188 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_context.c | 17 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_context.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_screen.c | 11 |
5 files changed, 209 insertions, 15 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c index fb65e66555a..13455e685d9 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c @@ -72,6 +72,28 @@ struct intel_validate_entry { struct drm_i915_op_arg bo_arg; }; +struct dri_ttm_bo_bucket_entry { + drmBO drm_bo; + struct dri_ttm_bo_bucket_entry *next; +}; + +struct dri_ttm_bo_bucket { + struct dri_ttm_bo_bucket_entry *head; + struct dri_ttm_bo_bucket_entry **tail; + /** + * Limit on the number of entries in this bucket. + * + * 0 means that this caching at this bucket size is disabled. + * -1 means that there is no limit to caching at this size. + */ + int max_entries; + int num_entries; +}; + +/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse + * is 1 << 16 pages, or 256MB. + */ +#define INTEL_TTM_BO_BUCKETS 16 typedef struct _dri_bufmgr_ttm { dri_bufmgr bufmgr; @@ -84,6 +106,9 @@ typedef struct _dri_bufmgr_ttm { struct intel_validate_entry *validate_array; int validate_array_size; int validate_count; + + /** Array of lists of cached drmBOs of power-of-two sizes */ + struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS]; } dri_bufmgr_ttm; /** @@ -137,6 +162,41 @@ typedef struct _dri_fence_ttm drmFence drm_fence; } dri_fence_ttm; +static int +logbase2(int n) +{ + GLint i = 1; + GLint log2 = 0; + + while (n > i) { + i *= 2; + log2++; + } + + return log2; +} + +static struct dri_ttm_bo_bucket * +dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size) +{ + int i; + + /* We only do buckets in power of two increments */ + if ((size & (size - 1)) != 0) + return NULL; + + /* We should only see sizes rounded to pages. */ + assert((size % 4096) == 0); + + /* We always allocate in units of pages */ + i = ffs(size / 4096) - 1; + if (i >= INTEL_TTM_BO_BUCKETS) + return NULL; + + return &bufmgr_ttm->cache_bucket[i]; +} + + static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm) { int i, j; @@ -338,6 +398,9 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, int ret; uint64_t flags; unsigned int hint; + unsigned long alloc_size; + struct dri_ttm_bo_bucket *bucket; + GLboolean alloc_from_cache = GL_FALSE; ttm_buf = calloc(1, sizeof(*ttm_buf)); if (!ttm_buf) @@ -352,13 +415,48 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, /* No hints we want to use. */ hint = 0; - ret = drmBOCreate(bufmgr_ttm->fd, size, alignment / pageSize, - NULL, flags, hint, &ttm_buf->drm_bo); - if (ret != 0) { - free(ttm_buf); - return NULL; + /* Round the allocated size up to a power of two number of pages. */ + alloc_size = 1 << logbase2(size); + if (alloc_size < pageSize) + alloc_size = pageSize; + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size); + + /* If we don't have caching at this size, don't actually round the + * allocation up. + */ + if (bucket == NULL || bucket->max_entries == 0) + alloc_size = size; + + /* Get a buffer out of the cache if available */ + if (bucket != NULL && bucket->num_entries > 0) { + struct dri_ttm_bo_bucket_entry *entry = bucket->head; + int busy; + + /* Check if the buffer is still in flight. If not, reuse it. */ + ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy); + alloc_from_cache = (ret == 0 && busy == 0); + + if (alloc_from_cache) { + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + ttm_buf->drm_bo = entry->drm_bo; + free(entry); + } } - ttm_buf->bo.size = ttm_buf->drm_bo.size; + + if (!alloc_from_cache) { + ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize, + NULL, flags, hint, &ttm_buf->drm_bo); + if (ret != 0) { + free(ttm_buf); + return NULL; + } + } + + ttm_buf->bo.size = size; ttm_buf->bo.offset = ttm_buf->drm_bo.offset; ttm_buf->bo.virtual = NULL; ttm_buf->bo.bufmgr = bufmgr; @@ -450,6 +548,7 @@ dri_ttm_bo_unreference(dri_bo *buf) return; if (--ttm_buf->refcount == 0) { + struct dri_ttm_bo_bucket *bucket; int ret; assert(ttm_buf->map_count == 0); @@ -476,11 +575,32 @@ dri_ttm_bo_unreference(dri_bo *buf) } } - ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); - if (ret != 0) { - fprintf(stderr, "drmBOUnreference failed (%s): %s\n", - ttm_buf->name, strerror(-ret)); + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size); + /* Put the buffer into our internal cache for reuse if we can. */ + if (!ttm_buf->shared && + bucket != NULL && + (bucket->max_entries == -1 || + (bucket->max_entries > 0 && + bucket->num_entries < bucket->max_entries))) + { + struct dri_ttm_bo_bucket_entry *entry; + + entry = calloc(1, sizeof(*entry)); + entry->drm_bo = ttm_buf->drm_bo; + + entry->next = NULL; + *bucket->tail = entry; + bucket->tail = &entry->next; + bucket->num_entries++; + } else { + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed (%s): %s\n", + ttm_buf->name, strerror(-ret)); + } } + DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); free(buf); @@ -657,9 +777,34 @@ static void dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) { dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; free(bufmgr_ttm->validate_array); + /* Free any cached buffer objects we were going to reuse */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i]; + struct dri_ttm_bo_bucket_entry *entry; + + while ((entry = bucket->head) != NULL) { + int ret; + + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed: %s\n", + strerror(-ret)); + } + + free(entry); + } + } + free(bufmgr); } @@ -877,6 +1022,24 @@ dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence) } /** + * Enables unlimited caching of buffer objects for reuse. + * + * This is potentially very memory expensive, as the cache at each bucket + * size is only bounded by how many buffers of that size we've managed to have + * in flight at once. + */ +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; + + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + bufmgr_ttm->cache_bucket[i].max_entries = -1; + } +} + +/** * Initializes the TTM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. * @@ -890,6 +1053,7 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type, unsigned int fence_type_flush, int batch_size) { dri_bufmgr_ttm *bufmgr_ttm; + int i; bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm)); bufmgr_ttm->fd = fd; @@ -919,6 +1083,10 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type, bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; bufmgr_ttm->bufmgr.debug = GL_FALSE; + /* Initialize the linked lists for BO reuse cache. */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) + bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head; + return &bufmgr_ttm->bufmgr; } diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h index 0738839cefb..d267a168cd4 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h @@ -14,4 +14,7 @@ dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type, unsigned int fence_type_flush, int batch_size); +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr); + #endif diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index d3f0681807e..6c8ab1fa1e2 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -456,6 +456,7 @@ intel_init_bufmgr(struct intel_context *intel) ttm_supported = GL_FALSE; if (!ttm_disable && ttm_supported) { + int bo_reuse_mode; intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd, DRM_FENCE_TYPE_EXE, DRM_FENCE_TYPE_EXE | @@ -463,6 +464,15 @@ intel_init_bufmgr(struct intel_context *intel) BATCH_SZ); if (intel->bufmgr != NULL) intel->ttm = GL_TRUE; + + bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse"); + switch (bo_reuse_mode) { + case DRI_CONF_BO_REUSE_DISABLED: + break; + case DRI_CONF_BO_REUSE_ALL: + intel_ttm_enable_bo_reuse(intel->bufmgr); + break; + } } /* Otherwise, use the classic buffer manager. */ if (intel->bufmgr == NULL) { @@ -548,6 +558,9 @@ intelInitContext(struct intel_context *intel, intel->width = intelScreen->width; intel->height = intelScreen->height; + driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, + intel->driScreen->myNum, + IS_965(intelScreen->deviceID) ? "i965" : "i915"); if (intelScreen->deviceID == PCI_CHIP_I865_G) intel->maxBatchSize = 4096; else @@ -556,10 +569,6 @@ intelInitContext(struct intel_context *intel, if (!intel_init_bufmgr(intel)) return GL_FALSE; - driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, - intel->driScreen->myNum, - IS_965(intelScreen->deviceID) ? "i965" : "i915"); - ctx->Const.MaxTextureMaxAnisotropy = 2.0; /* This doesn't yet catch all non-conformant rendering, but it's a diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 6c97955b145..809cb6ea578 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -476,6 +476,11 @@ extern void intelInitStateFuncs(struct dd_function_table *functions); #define BLENDFACT_INV_CONST_ALPHA 0x0f #define BLENDFACT_MASK 0x0f +enum { + DRI_CONF_BO_REUSE_DISABLED, + DRI_CONF_BO_REUSE_ALL +}; + extern int intel_translate_shadow_compare_func(GLenum func); extern int intel_translate_compare_func(GLenum func); extern int intel_translate_stencil_op(GLenum op); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 8b8eeb77aa3..1c79cf2cff4 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -56,6 +56,15 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_PERFORMANCE DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, + * DRI_CONF_BO_REUSE_ALL + */ + DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1") + DRI_CONF_DESC_BEGIN(en, "Buffer object reuse") + DRI_CONF_ENUM(0, "Disable buffer object reuse") + DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") + DRI_CONF_DESC_END + DRI_CONF_OPT_END DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) @@ -66,7 +75,7 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 5; +const GLuint __driNConfigOptions = 6; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; |