From fe91c05b5494b889c8adda77ff562712116d2e59 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 5 Mar 2008 14:14:54 -0800 Subject: [intel] Add a driconf option to cache freed buffer objects for reuse. This is defaulted off as it has potentially large memory costs for a modest performance gain. Ideally we will improve DRM performance to the point where this optimization is not worth the memory cost in any case, or find some middle ground in caching only limited numbers of certain buffers. For now, this provides a modest 4% improvement in openarena on GM965 and 10% in openarena on GM945. --- src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c | 188 ++++++++++++++++++++++++-- src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h | 3 + src/mesa/drivers/dri/intel/intel_context.c | 17 ++- src/mesa/drivers/dri/intel/intel_context.h | 5 + src/mesa/drivers/dri/intel/intel_screen.c | 11 +- 5 files changed, 209 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c index fb65e66555a..13455e685d9 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c @@ -72,6 +72,28 @@ struct intel_validate_entry { struct drm_i915_op_arg bo_arg; }; +struct dri_ttm_bo_bucket_entry { + drmBO drm_bo; + struct dri_ttm_bo_bucket_entry *next; +}; + +struct dri_ttm_bo_bucket { + struct dri_ttm_bo_bucket_entry *head; + struct dri_ttm_bo_bucket_entry **tail; + /** + * Limit on the number of entries in this bucket. + * + * 0 means that this caching at this bucket size is disabled. + * -1 means that there is no limit to caching at this size. + */ + int max_entries; + int num_entries; +}; + +/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse + * is 1 << 16 pages, or 256MB. + */ +#define INTEL_TTM_BO_BUCKETS 16 typedef struct _dri_bufmgr_ttm { dri_bufmgr bufmgr; @@ -84,6 +106,9 @@ typedef struct _dri_bufmgr_ttm { struct intel_validate_entry *validate_array; int validate_array_size; int validate_count; + + /** Array of lists of cached drmBOs of power-of-two sizes */ + struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS]; } dri_bufmgr_ttm; /** @@ -137,6 +162,41 @@ typedef struct _dri_fence_ttm drmFence drm_fence; } dri_fence_ttm; +static int +logbase2(int n) +{ + GLint i = 1; + GLint log2 = 0; + + while (n > i) { + i *= 2; + log2++; + } + + return log2; +} + +static struct dri_ttm_bo_bucket * +dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size) +{ + int i; + + /* We only do buckets in power of two increments */ + if ((size & (size - 1)) != 0) + return NULL; + + /* We should only see sizes rounded to pages. */ + assert((size % 4096) == 0); + + /* We always allocate in units of pages */ + i = ffs(size / 4096) - 1; + if (i >= INTEL_TTM_BO_BUCKETS) + return NULL; + + return &bufmgr_ttm->cache_bucket[i]; +} + + static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm) { int i, j; @@ -338,6 +398,9 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, int ret; uint64_t flags; unsigned int hint; + unsigned long alloc_size; + struct dri_ttm_bo_bucket *bucket; + GLboolean alloc_from_cache = GL_FALSE; ttm_buf = calloc(1, sizeof(*ttm_buf)); if (!ttm_buf) @@ -352,13 +415,48 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, /* No hints we want to use. */ hint = 0; - ret = drmBOCreate(bufmgr_ttm->fd, size, alignment / pageSize, - NULL, flags, hint, &ttm_buf->drm_bo); - if (ret != 0) { - free(ttm_buf); - return NULL; + /* Round the allocated size up to a power of two number of pages. */ + alloc_size = 1 << logbase2(size); + if (alloc_size < pageSize) + alloc_size = pageSize; + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size); + + /* If we don't have caching at this size, don't actually round the + * allocation up. + */ + if (bucket == NULL || bucket->max_entries == 0) + alloc_size = size; + + /* Get a buffer out of the cache if available */ + if (bucket != NULL && bucket->num_entries > 0) { + struct dri_ttm_bo_bucket_entry *entry = bucket->head; + int busy; + + /* Check if the buffer is still in flight. If not, reuse it. */ + ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy); + alloc_from_cache = (ret == 0 && busy == 0); + + if (alloc_from_cache) { + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + ttm_buf->drm_bo = entry->drm_bo; + free(entry); + } } - ttm_buf->bo.size = ttm_buf->drm_bo.size; + + if (!alloc_from_cache) { + ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize, + NULL, flags, hint, &ttm_buf->drm_bo); + if (ret != 0) { + free(ttm_buf); + return NULL; + } + } + + ttm_buf->bo.size = size; ttm_buf->bo.offset = ttm_buf->drm_bo.offset; ttm_buf->bo.virtual = NULL; ttm_buf->bo.bufmgr = bufmgr; @@ -450,6 +548,7 @@ dri_ttm_bo_unreference(dri_bo *buf) return; if (--ttm_buf->refcount == 0) { + struct dri_ttm_bo_bucket *bucket; int ret; assert(ttm_buf->map_count == 0); @@ -476,11 +575,32 @@ dri_ttm_bo_unreference(dri_bo *buf) } } - ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); - if (ret != 0) { - fprintf(stderr, "drmBOUnreference failed (%s): %s\n", - ttm_buf->name, strerror(-ret)); + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size); + /* Put the buffer into our internal cache for reuse if we can. */ + if (!ttm_buf->shared && + bucket != NULL && + (bucket->max_entries == -1 || + (bucket->max_entries > 0 && + bucket->num_entries < bucket->max_entries))) + { + struct dri_ttm_bo_bucket_entry *entry; + + entry = calloc(1, sizeof(*entry)); + entry->drm_bo = ttm_buf->drm_bo; + + entry->next = NULL; + *bucket->tail = entry; + bucket->tail = &entry->next; + bucket->num_entries++; + } else { + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed (%s): %s\n", + ttm_buf->name, strerror(-ret)); + } } + DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); free(buf); @@ -657,9 +777,34 @@ static void dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) { dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; free(bufmgr_ttm->validate_array); + /* Free any cached buffer objects we were going to reuse */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i]; + struct dri_ttm_bo_bucket_entry *entry; + + while ((entry = bucket->head) != NULL) { + int ret; + + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed: %s\n", + strerror(-ret)); + } + + free(entry); + } + } + free(bufmgr); } @@ -876,6 +1021,24 @@ dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence) bufmgr_ttm->validate_count = 0; } +/** + * Enables unlimited caching of buffer objects for reuse. + * + * This is potentially very memory expensive, as the cache at each bucket + * size is only bounded by how many buffers of that size we've managed to have + * in flight at once. + */ +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; + + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + bufmgr_ttm->cache_bucket[i].max_entries = -1; + } +} + /** * Initializes the TTM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. @@ -890,6 +1053,7 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type, unsigned int fence_type_flush, int batch_size) { dri_bufmgr_ttm *bufmgr_ttm; + int i; bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm)); bufmgr_ttm->fd = fd; @@ -919,6 +1083,10 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type, bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; bufmgr_ttm->bufmgr.debug = GL_FALSE; + /* Initialize the linked lists for BO reuse cache. */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) + bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head; + return &bufmgr_ttm->bufmgr; } diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h index 0738839cefb..d267a168cd4 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h @@ -14,4 +14,7 @@ dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type, unsigned int fence_type_flush, int batch_size); +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr); + #endif diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index d3f0681807e..6c8ab1fa1e2 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -456,6 +456,7 @@ intel_init_bufmgr(struct intel_context *intel) ttm_supported = GL_FALSE; if (!ttm_disable && ttm_supported) { + int bo_reuse_mode; intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd, DRM_FENCE_TYPE_EXE, DRM_FENCE_TYPE_EXE | @@ -463,6 +464,15 @@ intel_init_bufmgr(struct intel_context *intel) BATCH_SZ); if (intel->bufmgr != NULL) intel->ttm = GL_TRUE; + + bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse"); + switch (bo_reuse_mode) { + case DRI_CONF_BO_REUSE_DISABLED: + break; + case DRI_CONF_BO_REUSE_ALL: + intel_ttm_enable_bo_reuse(intel->bufmgr); + break; + } } /* Otherwise, use the classic buffer manager. */ if (intel->bufmgr == NULL) { @@ -548,6 +558,9 @@ intelInitContext(struct intel_context *intel, intel->width = intelScreen->width; intel->height = intelScreen->height; + driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, + intel->driScreen->myNum, + IS_965(intelScreen->deviceID) ? "i965" : "i915"); if (intelScreen->deviceID == PCI_CHIP_I865_G) intel->maxBatchSize = 4096; else @@ -556,10 +569,6 @@ intelInitContext(struct intel_context *intel, if (!intel_init_bufmgr(intel)) return GL_FALSE; - driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, - intel->driScreen->myNum, - IS_965(intelScreen->deviceID) ? "i965" : "i915"); - ctx->Const.MaxTextureMaxAnisotropy = 2.0; /* This doesn't yet catch all non-conformant rendering, but it's a diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 6c97955b145..809cb6ea578 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -476,6 +476,11 @@ extern void intelInitStateFuncs(struct dd_function_table *functions); #define BLENDFACT_INV_CONST_ALPHA 0x0f #define BLENDFACT_MASK 0x0f +enum { + DRI_CONF_BO_REUSE_DISABLED, + DRI_CONF_BO_REUSE_ALL +}; + extern int intel_translate_shadow_compare_func(GLenum func); extern int intel_translate_compare_func(GLenum func); extern int intel_translate_stencil_op(GLenum op); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 8b8eeb77aa3..1c79cf2cff4 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -56,6 +56,15 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_PERFORMANCE DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, + * DRI_CONF_BO_REUSE_ALL + */ + DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1") + DRI_CONF_DESC_BEGIN(en, "Buffer object reuse") + DRI_CONF_ENUM(0, "Disable buffer object reuse") + DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") + DRI_CONF_DESC_END + DRI_CONF_OPT_END DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) @@ -66,7 +75,7 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 5; +const GLuint __driNConfigOptions = 6; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; -- cgit v1.2.3