summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c188
-rw-r--r--src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h3
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c17
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h5
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c11
5 files changed, 209 insertions, 15 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
index fb65e66555a..13455e685d9 100644
--- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
+++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
@@ -72,6 +72,28 @@ struct intel_validate_entry {
struct drm_i915_op_arg bo_arg;
};
+struct dri_ttm_bo_bucket_entry {
+ drmBO drm_bo;
+ struct dri_ttm_bo_bucket_entry *next;
+};
+
+struct dri_ttm_bo_bucket {
+ struct dri_ttm_bo_bucket_entry *head;
+ struct dri_ttm_bo_bucket_entry **tail;
+ /**
+ * Limit on the number of entries in this bucket.
+ *
+ * 0 means that this caching at this bucket size is disabled.
+ * -1 means that there is no limit to caching at this size.
+ */
+ int max_entries;
+ int num_entries;
+};
+
+/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse
+ * is 1 << 16 pages, or 256MB.
+ */
+#define INTEL_TTM_BO_BUCKETS 16
typedef struct _dri_bufmgr_ttm {
dri_bufmgr bufmgr;
@@ -84,6 +106,9 @@ typedef struct _dri_bufmgr_ttm {
struct intel_validate_entry *validate_array;
int validate_array_size;
int validate_count;
+
+ /** Array of lists of cached drmBOs of power-of-two sizes */
+ struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS];
} dri_bufmgr_ttm;
/**
@@ -137,6 +162,41 @@ typedef struct _dri_fence_ttm
drmFence drm_fence;
} dri_fence_ttm;
+static int
+logbase2(int n)
+{
+ GLint i = 1;
+ GLint log2 = 0;
+
+ while (n > i) {
+ i *= 2;
+ log2++;
+ }
+
+ return log2;
+}
+
+static struct dri_ttm_bo_bucket *
+dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size)
+{
+ int i;
+
+ /* We only do buckets in power of two increments */
+ if ((size & (size - 1)) != 0)
+ return NULL;
+
+ /* We should only see sizes rounded to pages. */
+ assert((size % 4096) == 0);
+
+ /* We always allocate in units of pages */
+ i = ffs(size / 4096) - 1;
+ if (i >= INTEL_TTM_BO_BUCKETS)
+ return NULL;
+
+ return &bufmgr_ttm->cache_bucket[i];
+}
+
+
static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm)
{
int i, j;
@@ -338,6 +398,9 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
int ret;
uint64_t flags;
unsigned int hint;
+ unsigned long alloc_size;
+ struct dri_ttm_bo_bucket *bucket;
+ GLboolean alloc_from_cache = GL_FALSE;
ttm_buf = calloc(1, sizeof(*ttm_buf));
if (!ttm_buf)
@@ -352,13 +415,48 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
/* No hints we want to use. */
hint = 0;
- ret = drmBOCreate(bufmgr_ttm->fd, size, alignment / pageSize,
- NULL, flags, hint, &ttm_buf->drm_bo);
- if (ret != 0) {
- free(ttm_buf);
- return NULL;
+ /* Round the allocated size up to a power of two number of pages. */
+ alloc_size = 1 << logbase2(size);
+ if (alloc_size < pageSize)
+ alloc_size = pageSize;
+ bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size);
+
+ /* If we don't have caching at this size, don't actually round the
+ * allocation up.
+ */
+ if (bucket == NULL || bucket->max_entries == 0)
+ alloc_size = size;
+
+ /* Get a buffer out of the cache if available */
+ if (bucket != NULL && bucket->num_entries > 0) {
+ struct dri_ttm_bo_bucket_entry *entry = bucket->head;
+ int busy;
+
+ /* Check if the buffer is still in flight. If not, reuse it. */
+ ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy);
+ alloc_from_cache = (ret == 0 && busy == 0);
+
+ if (alloc_from_cache) {
+ bucket->head = entry->next;
+ if (entry->next == NULL)
+ bucket->tail = &bucket->head;
+ bucket->num_entries--;
+
+ ttm_buf->drm_bo = entry->drm_bo;
+ free(entry);
+ }
}
- ttm_buf->bo.size = ttm_buf->drm_bo.size;
+
+ if (!alloc_from_cache) {
+ ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize,
+ NULL, flags, hint, &ttm_buf->drm_bo);
+ if (ret != 0) {
+ free(ttm_buf);
+ return NULL;
+ }
+ }
+
+ ttm_buf->bo.size = size;
ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
ttm_buf->bo.virtual = NULL;
ttm_buf->bo.bufmgr = bufmgr;
@@ -450,6 +548,7 @@ dri_ttm_bo_unreference(dri_bo *buf)
return;
if (--ttm_buf->refcount == 0) {
+ struct dri_ttm_bo_bucket *bucket;
int ret;
assert(ttm_buf->map_count == 0);
@@ -476,11 +575,32 @@ dri_ttm_bo_unreference(dri_bo *buf)
}
}
- ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
- if (ret != 0) {
- fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
- ttm_buf->name, strerror(-ret));
+ bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size);
+ /* Put the buffer into our internal cache for reuse if we can. */
+ if (!ttm_buf->shared &&
+ bucket != NULL &&
+ (bucket->max_entries == -1 ||
+ (bucket->max_entries > 0 &&
+ bucket->num_entries < bucket->max_entries)))
+ {
+ struct dri_ttm_bo_bucket_entry *entry;
+
+ entry = calloc(1, sizeof(*entry));
+ entry->drm_bo = ttm_buf->drm_bo;
+
+ entry->next = NULL;
+ *bucket->tail = entry;
+ bucket->tail = &entry->next;
+ bucket->num_entries++;
+ } else {
+ /* Decrement the kernel refcount for the buffer. */
+ ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
+ if (ret != 0) {
+ fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
+ ttm_buf->name, strerror(-ret));
+ }
}
+
DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
free(buf);
@@ -657,9 +777,34 @@ static void
dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr)
{
dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
+ int i;
free(bufmgr_ttm->validate_array);
+ /* Free any cached buffer objects we were going to reuse */
+ for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
+ struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i];
+ struct dri_ttm_bo_bucket_entry *entry;
+
+ while ((entry = bucket->head) != NULL) {
+ int ret;
+
+ bucket->head = entry->next;
+ if (entry->next == NULL)
+ bucket->tail = &bucket->head;
+ bucket->num_entries--;
+
+ /* Decrement the kernel refcount for the buffer. */
+ ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo);
+ if (ret != 0) {
+ fprintf(stderr, "drmBOUnreference failed: %s\n",
+ strerror(-ret));
+ }
+
+ free(entry);
+ }
+ }
+
free(bufmgr);
}
@@ -877,6 +1022,24 @@ dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
}
/**
+ * Enables unlimited caching of buffer objects for reuse.
+ *
+ * This is potentially very memory expensive, as the cache at each bucket
+ * size is only bounded by how many buffers of that size we've managed to have
+ * in flight at once.
+ */
+void
+intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr)
+{
+ dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
+ int i;
+
+ for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
+ bufmgr_ttm->cache_bucket[i].max_entries = -1;
+ }
+}
+
+/**
* Initializes the TTM buffer manager, which uses the kernel to allocate, map,
* and manage map buffer objections.
*
@@ -890,6 +1053,7 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
unsigned int fence_type_flush, int batch_size)
{
dri_bufmgr_ttm *bufmgr_ttm;
+ int i;
bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm));
bufmgr_ttm->fd = fd;
@@ -919,6 +1083,10 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit;
bufmgr_ttm->bufmgr.debug = GL_FALSE;
+ /* Initialize the linked lists for BO reuse cache. */
+ for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++)
+ bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head;
+
return &bufmgr_ttm->bufmgr;
}
diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h
index 0738839cefb..d267a168cd4 100644
--- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h
+++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h
@@ -14,4 +14,7 @@ dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
unsigned int fence_type_flush, int batch_size);
+void
+intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr);
+
#endif
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index d3f0681807e..6c8ab1fa1e2 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -456,6 +456,7 @@ intel_init_bufmgr(struct intel_context *intel)
ttm_supported = GL_FALSE;
if (!ttm_disable && ttm_supported) {
+ int bo_reuse_mode;
intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd,
DRM_FENCE_TYPE_EXE,
DRM_FENCE_TYPE_EXE |
@@ -463,6 +464,15 @@ intel_init_bufmgr(struct intel_context *intel)
BATCH_SZ);
if (intel->bufmgr != NULL)
intel->ttm = GL_TRUE;
+
+ bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse");
+ switch (bo_reuse_mode) {
+ case DRI_CONF_BO_REUSE_DISABLED:
+ break;
+ case DRI_CONF_BO_REUSE_ALL:
+ intel_ttm_enable_bo_reuse(intel->bufmgr);
+ break;
+ }
}
/* Otherwise, use the classic buffer manager. */
if (intel->bufmgr == NULL) {
@@ -548,6 +558,9 @@ intelInitContext(struct intel_context *intel,
intel->width = intelScreen->width;
intel->height = intelScreen->height;
+ driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
+ intel->driScreen->myNum,
+ IS_965(intelScreen->deviceID) ? "i965" : "i915");
if (intelScreen->deviceID == PCI_CHIP_I865_G)
intel->maxBatchSize = 4096;
else
@@ -556,10 +569,6 @@ intelInitContext(struct intel_context *intel,
if (!intel_init_bufmgr(intel))
return GL_FALSE;
- driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
- intel->driScreen->myNum,
- IS_965(intelScreen->deviceID) ? "i965" : "i915");
-
ctx->Const.MaxTextureMaxAnisotropy = 2.0;
/* This doesn't yet catch all non-conformant rendering, but it's a
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 6c97955b145..809cb6ea578 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -476,6 +476,11 @@ extern void intelInitStateFuncs(struct dd_function_table *functions);
#define BLENDFACT_INV_CONST_ALPHA 0x0f
#define BLENDFACT_MASK 0x0f
+enum {
+ DRI_CONF_BO_REUSE_DISABLED,
+ DRI_CONF_BO_REUSE_ALL
+};
+
extern int intel_translate_shadow_compare_func(GLenum func);
extern int intel_translate_compare_func(GLenum func);
extern int intel_translate_stencil_op(GLenum op);
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 8b8eeb77aa3..1c79cf2cff4 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -56,6 +56,15 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_SECTION_PERFORMANCE
DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+ /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
+ * DRI_CONF_BO_REUSE_ALL
+ */
+ DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1")
+ DRI_CONF_DESC_BEGIN(en, "Buffer object reuse")
+ DRI_CONF_ENUM(0, "Disable buffer object reuse")
+ DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
+ DRI_CONF_DESC_END
+ DRI_CONF_OPT_END
DRI_CONF_SECTION_END
DRI_CONF_SECTION_QUALITY
DRI_CONF_FORCE_S3TC_ENABLE(false)
@@ -66,7 +75,7 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_SECTION_END
DRI_CONF_END;
-const GLuint __driNConfigOptions = 5;
+const GLuint __driNConfigOptions = 6;
#ifdef USE_NEW_INTERFACE
static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;