aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4/vc4_bufmgr.c
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-12-13 15:27:39 -0800
committerEric Anholt <[email protected]>2014-12-17 16:07:01 -0800
commit06890c444a893a96b6ec1cfb36f77915ec9acda8 (patch)
tree779fd6b1c624a69cbebae7ad4ab118d29889015a /src/gallium/drivers/vc4/vc4_bufmgr.c
parent39bc9360116e1c944c1d0d04f67a6ec5f010371f (diff)
vc4: Add a userspace BO cache.
Since our kernel BOs require CMA allocation, and the use of them requires new mmaps, it's pretty expensive and we should avoid it if possible. Copying my original design for Intel, make a userspace cache that reuses BOs that haven't been shared to other processes but frees BOs that have sat in the cache for over a second. Improves glxgears framerate on RPi by around 30%.
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_bufmgr.c')
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.c131
1 files changed, 130 insertions, 1 deletions
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 64fe2e40e42..34596be537d 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -29,14 +29,49 @@
#include <xf86drmMode.h>
#include "util/u_memory.h"
+#include "util/ralloc.h"
#include "vc4_context.h"
#include "vc4_screen.h"
+#define container_of(ptr, type, field) \
+ (type*)((char*)ptr - offsetof(type, field))
+
+static struct vc4_bo *
+vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
+{
+ struct vc4_bo_cache *cache = &screen->bo_cache;
+ uint32_t page_index = size / 4096 - 1;
+
+ if (cache->size_list_size <= page_index)
+ return NULL;
+
+ struct vc4_bo *bo = NULL;
+ pipe_mutex_lock(cache->lock);
+ if (!is_empty_list(&cache->size_list[page_index])) {
+ struct simple_node *node = last_elem(&cache->size_list[page_index]);
+ bo = container_of(node, struct vc4_bo, size_list);
+ pipe_reference_init(&bo->reference, 1);
+ remove_from_list(&bo->time_list);
+ remove_from_list(&bo->size_list);
+
+ bo->name = name;
+ }
+ pipe_mutex_unlock(cache->lock);
+ return bo;
+}
+
struct vc4_bo *
vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
{
- struct vc4_bo *bo = CALLOC_STRUCT(vc4_bo);
+ struct vc4_bo *bo;
+ size = align(size, 4096);
+
+ bo = vc4_bo_from_cache(screen, size, name);
+ if (bo)
+ return bo;
+
+ bo = CALLOC_STRUCT(vc4_bo);
if (!bo)
return NULL;
@@ -44,6 +79,7 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
bo->screen = screen;
bo->size = size;
bo->name = name;
+ bo->private = true;
struct drm_mode_create_dumb create;
memset(&create, 0, sizeof(create));
@@ -65,6 +101,18 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
}
void
+vc4_bo_last_unreference(struct vc4_bo *bo)
+{
+ struct vc4_screen *screen = bo->screen;
+
+ struct timespec time;
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ pipe_mutex_lock(screen->bo_cache.lock);
+ vc4_bo_last_unreference_locked_timed(bo, time.tv_sec);
+ pipe_mutex_unlock(screen->bo_cache.lock);
+}
+
+static void
vc4_bo_free(struct vc4_bo *bo)
{
struct vc4_screen *screen = bo->screen;
@@ -89,6 +137,69 @@ vc4_bo_free(struct vc4_bo *bo)
free(bo);
}
+static void
+free_stale_bos(struct vc4_screen *screen, time_t time)
+{
+ while (!is_empty_list(&screen->bo_cache.time_list)) {
+ struct simple_node *node =
+ first_elem(&screen->bo_cache.time_list);
+ struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list);
+
+ /* If it's more than a second old, free it. */
+ if (time - bo->free_time > 2) {
+ remove_from_list(&bo->time_list);
+ remove_from_list(&bo->size_list);
+ vc4_bo_free(bo);
+ } else {
+ break;
+ }
+ }
+}
+
+void
+vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time)
+{
+ struct vc4_screen *screen = bo->screen;
+ struct vc4_bo_cache *cache = &screen->bo_cache;
+ uint32_t page_index = bo->size / 4096 - 1;
+
+ if (!bo->private) {
+ vc4_bo_free(bo);
+ return;
+ }
+
+ if (cache->size_list_size <= page_index) {
+ struct simple_node *new_list =
+ ralloc_array(screen, struct simple_node, page_index + 1);
+
+ /* Move old list contents over (since the array has moved, and
+ * therefore the pointers to the list heads have to change.
+ */
+ for (int i = 0; i < cache->size_list_size; i++) {
+ struct simple_node *old_head = &cache->size_list[i];
+ if (is_empty_list(old_head))
+ make_empty_list(&new_list[i]);
+ else {
+ new_list[i].next = old_head->next;
+ new_list[i].prev = old_head->prev;
+ new_list[i].next->prev = &new_list[i];
+ new_list[i].prev->next = &new_list[i];
+ }
+ }
+ for (int i = cache->size_list_size; i < page_index + 1; i++)
+ make_empty_list(&new_list[i]);
+
+ cache->size_list = new_list;
+ cache->size_list_size = page_index + 1;
+ }
+
+ bo->free_time = time;
+ insert_at_tail(&cache->size_list[page_index], &bo->size_list);
+ insert_at_tail(&cache->time_list, &bo->time_list);
+
+ free_stale_bos(screen, time);
+}
+
static struct vc4_bo *
vc4_bo_open_handle(struct vc4_screen *screen,
uint32_t winsys_stride,
@@ -103,6 +214,7 @@ vc4_bo_open_handle(struct vc4_screen *screen,
bo->handle = handle;
bo->size = size;
bo->name = "winsys";
+ bo->private = false;
#ifdef USE_VC4_SIMULATOR
vc4_bo_map(bo);
@@ -194,6 +306,7 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
return false;
}
+ bo->private = false;
*name = flink.name;
return true;
@@ -289,3 +402,19 @@ vc4_bo_map(struct vc4_bo *bo)
return map;
}
+
+void
+vc4_bufmgr_destroy(struct pipe_screen *pscreen)
+{
+ struct vc4_screen *screen = vc4_screen(pscreen);
+ struct vc4_bo_cache *cache = &screen->bo_cache;
+
+ while (!is_empty_list(&cache->time_list)) {
+ struct simple_node *node = first_elem(&cache->time_list);
+ struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list);
+
+ remove_from_list(&bo->time_list);
+ remove_from_list(&bo->size_list);
+ vc4_bo_free(bo);
+ }
+}