summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVasily Khoruzhick <[email protected]>2019-09-07 19:33:07 -0700
committerVasily Khoruzhick <[email protected]>2019-09-22 19:20:59 -0700
commitd2147787534de87cd11015266293211b5188442f (patch)
tree823202fabe8f330fc6360a2a02dbc881e2280181
parent9f897a2b4cf2c0e222487470053a69de843f2084 (diff)
lima: implement BO cache
Allocating BOs is expensive, so we should avoid doing that by caching freed BOs. BO cache is modelled after one in v3d driver and works as follows: - in lima_bo_create() check if we have matching BO in cache and return it if there's one, allocate new BO otherwise. - in lima_bo_unreference() (renamed from lima_bo_free()): put BO in cache instead of freeing it and remove all stale BOs from cache Reviewed-by: Qiang Yu <[email protected]> Signed-off-by: Vasily Khoruzhick <[email protected]>
-rw-r--r--src/gallium/drivers/lima/lima_bo.c184
-rw-r--r--src/gallium/drivers/lima/lima_bo.h10
-rw-r--r--src/gallium/drivers/lima/lima_context.c6
-rw-r--r--src/gallium/drivers/lima/lima_program.c4
-rw-r--r--src/gallium/drivers/lima/lima_resource.c4
-rw-r--r--src/gallium/drivers/lima/lima_screen.c23
-rw-r--r--src/gallium/drivers/lima/lima_screen.h9
-rw-r--r--src/gallium/drivers/lima/lima_submit.c2
8 files changed, 212 insertions, 30 deletions
diff --git a/src/gallium/drivers/lima/lima_bo.c b/src/gallium/drivers/lima/lima_bo.c
index b082d32afd8..22263ea0054 100644
--- a/src/gallium/drivers/lima/lima_bo.c
+++ b/src/gallium/drivers/lima/lima_bo.c
@@ -30,6 +30,7 @@
#include "drm-uapi/lima_drm.h"
#include "util/u_hash_table.h"
+#include "util/u_math.h"
#include "util/os_time.h"
#include "os/os_mman.h"
@@ -37,6 +38,7 @@
#include "lima_screen.h"
#include "lima_bo.h"
+#include "lima_util.h"
#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
@@ -68,6 +70,16 @@ err_out0:
return false;
}
+bool lima_bo_cache_init(struct lima_screen *screen)
+{
+ mtx_init(&screen->bo_cache_lock, mtx_plain);
+ list_inithead(&screen->bo_cache_time);
+ for (int i = 0; i < NR_BO_CACHE_BUCKETS; i++)
+ list_inithead(&screen->bo_cache_buckets[i]);
+
+ return true;
+}
+
void lima_bo_table_fini(struct lima_screen *screen)
{
mtx_destroy(&screen->bo_table_lock);
@@ -75,6 +87,13 @@ void lima_bo_table_fini(struct lima_screen *screen)
util_hash_table_destroy(screen->bo_flink_names);
}
+static void
+lima_bo_cache_remove(struct lima_bo *bo)
+{
+ list_del(&bo->size_list);
+ list_del(&bo->time_list);
+}
+
static void lima_close_kms_handle(struct lima_screen *screen, uint32_t handle)
{
struct drm_gem_close args = {
@@ -84,6 +103,36 @@ static void lima_close_kms_handle(struct lima_screen *screen, uint32_t handle)
drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &args);
}
+static void
+lima_bo_free(struct lima_bo *bo)
+{
+ struct lima_screen *screen = bo->screen;
+ mtx_lock(&screen->bo_table_lock);
+ util_hash_table_remove(screen->bo_handles,
+ (void *)(uintptr_t)bo->handle);
+ if (bo->flink_name)
+ util_hash_table_remove(screen->bo_flink_names,
+ (void *)(uintptr_t)bo->flink_name);
+ mtx_unlock(&screen->bo_table_lock);
+
+ if (bo->map)
+ lima_bo_unmap(bo);
+
+ lima_close_kms_handle(screen, bo->handle);
+ free(bo);
+}
+
+void lima_bo_cache_fini(struct lima_screen *screen)
+{
+ mtx_destroy(&screen->bo_cache_lock);
+
+ list_for_each_entry_safe(struct lima_bo, entry,
+ &screen->bo_cache_time, time_list) {
+ lima_bo_cache_remove(entry);
+ lima_bo_free(entry);
+ }
+}
+
static bool lima_bo_get_info(struct lima_bo *bo)
{
struct drm_lima_gem_info req = {
@@ -98,10 +147,112 @@ static bool lima_bo_get_info(struct lima_bo *bo)
return true;
}
+static unsigned
+lima_bucket_index(unsigned size)
+{
+ /* Round down to POT to compute a bucket index */
+
+ unsigned bucket_index = util_logbase2(size);
+
+ /* Clamp the bucket index; all huge allocations will be
+ * sorted into the largest bucket */
+ bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET,
+ MAX_BO_CACHE_BUCKET);
+
+ /* Reindex from 0 */
+ return (bucket_index - MIN_BO_CACHE_BUCKET);
+}
+
+static struct list_head *
+lima_bo_cache_get_bucket(struct lima_screen *screen, unsigned size)
+{
+ return &screen->bo_cache_buckets[lima_bucket_index(size)];
+}
+
+static void
+lima_bo_cache_free_stale_bos(struct lima_screen *screen, time_t time)
+{
+ list_for_each_entry_safe(struct lima_bo, entry,
+ &screen->bo_cache_time, time_list) {
+ /* Free BOs that are sitting idle for longer than 5 seconds */
+ if (time - entry->free_time > 6) {
+ lima_bo_cache_remove(entry);
+ lima_bo_free(entry);
+ } else
+ break;
+ }
+}
+
+static bool
+lima_bo_cache_put(struct lima_bo *bo)
+{
+ if (!bo->cacheable)
+ return false;
+
+ struct lima_screen *screen = bo->screen;
+
+ mtx_lock(&screen->bo_cache_lock);
+ struct list_head *bucket = lima_bo_cache_get_bucket(screen, bo->size);
+
+ if (!bucket) {
+ mtx_unlock(&screen->bo_cache_lock);
+ return false;
+ }
+
+ struct timespec time;
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ bo->free_time = time.tv_sec;
+ list_addtail(&bo->size_list, bucket);
+ list_addtail(&bo->time_list, &screen->bo_cache_time);
+ lima_bo_cache_free_stale_bos(screen, time.tv_sec);
+ mtx_unlock(&screen->bo_cache_lock);
+
+ return true;
+}
+
+static struct lima_bo *
+lima_bo_cache_get(struct lima_screen *screen, uint32_t size, uint32_t flags)
+{
+ struct lima_bo *bo = NULL;
+ mtx_lock(&screen->bo_cache_lock);
+ struct list_head *bucket = lima_bo_cache_get_bucket(screen, size);
+
+ if (!bucket) {
+ mtx_unlock(&screen->bo_cache_lock);
+ return false;
+ }
+
+ list_for_each_entry_safe(struct lima_bo, entry, bucket, size_list) {
+ if (entry->size >= size &&
+ entry->flags == flags) {
+ /* Check if BO is idle. If it's not it's better to allocate new one */
+ if (!lima_bo_wait(entry, LIMA_GEM_WAIT_WRITE, 0))
+ break;
+
+ lima_bo_cache_remove(entry);
+ p_atomic_set(&entry->refcnt, 1);
+ bo = entry;
+ break;
+ }
+ }
+
+ mtx_unlock(&screen->bo_cache_lock);
+
+ return bo;
+}
+
struct lima_bo *lima_bo_create(struct lima_screen *screen,
uint32_t size, uint32_t flags)
{
struct lima_bo *bo;
+
+ /* Try to get bo from cache first */
+ bo = lima_bo_cache_get(screen, size, flags);
+ if (bo)
+ return bo;
+
+ size = align(size, LIMA_PAGE_SIZE);
+
struct drm_lima_gem_create req = {
.size = size,
.flags = flags,
@@ -110,12 +261,17 @@ struct lima_bo *lima_bo_create(struct lima_screen *screen,
if (!(bo = calloc(1, sizeof(*bo))))
return NULL;
+ list_inithead(&bo->time_list);
+ list_inithead(&bo->size_list);
+
if (drmIoctl(screen->fd, DRM_IOCTL_LIMA_GEM_CREATE, &req))
goto err_out0;
bo->screen = screen;
bo->size = req.size;
+ bo->flags = req.flags;
bo->handle = req.handle;
+ bo->cacheable = !(lima_debug & LIMA_DEBUG_NO_BO_CACHE);
p_atomic_set(&bo->refcnt, 1);
if (!lima_bo_get_info(bo))
@@ -130,25 +286,16 @@ err_out0:
return NULL;
}
-void lima_bo_free(struct lima_bo *bo)
+void lima_bo_unreference(struct lima_bo *bo)
{
if (!p_atomic_dec_zero(&bo->refcnt))
return;
- struct lima_screen *screen = bo->screen;
- mtx_lock(&screen->bo_table_lock);
- util_hash_table_remove(screen->bo_handles,
- (void *)(uintptr_t)bo->handle);
- if (bo->flink_name)
- util_hash_table_remove(screen->bo_flink_names,
- (void *)(uintptr_t)bo->flink_name);
- mtx_unlock(&screen->bo_table_lock);
-
- if (bo->map)
- lima_bo_unmap(bo);
+ /* Try to put it into cache */
+ if (lima_bo_cache_put(bo))
+ return;
- lima_close_kms_handle(screen, bo->handle);
- free(bo);
+ lima_bo_free(bo);
}
void *lima_bo_map(struct lima_bo *bo)
@@ -175,6 +322,9 @@ bool lima_bo_export(struct lima_bo *bo, struct winsys_handle *handle)
{
struct lima_screen *screen = bo->screen;
+ /* Don't cache exported BOs */
+ bo->cacheable = false;
+
switch (handle->type) {
case WINSYS_HANDLE_TYPE_SHARED:
if (!bo->flink_name) {
@@ -271,6 +421,8 @@ struct lima_bo *lima_bo_import(struct lima_screen *screen,
if (bo) {
p_atomic_inc(&bo->refcnt);
+ /* Don't cache imported BOs */
+ bo->cacheable = false;
mtx_unlock(&screen->bo_table_lock);
return bo;
}
@@ -282,6 +434,10 @@ struct lima_bo *lima_bo_import(struct lima_screen *screen,
return NULL;
}
+ /* Don't cache imported BOs */
+ bo->cacheable = false;
+ list_inithead(&bo->time_list);
+ list_inithead(&bo->size_list);
bo->screen = screen;
p_atomic_set(&bo->refcnt, 1);
diff --git a/src/gallium/drivers/lima/lima_bo.h b/src/gallium/drivers/lima/lima_bo.h
index 3f440b3b014..7bbd0063602 100644
--- a/src/gallium/drivers/lima/lima_bo.h
+++ b/src/gallium/drivers/lima/lima_bo.h
@@ -28,12 +28,18 @@
#include <stdint.h>
#include "util/u_atomic.h"
+#include "util/list.h"
struct lima_bo {
struct lima_screen *screen;
+ struct list_head time_list;
+ struct list_head size_list;
int refcnt;
+ bool cacheable;
+ time_t free_time;
uint32_t size;
+ uint32_t flags;
uint32_t handle;
uint64_t offset;
uint32_t flink_name;
@@ -44,10 +50,12 @@ struct lima_bo {
bool lima_bo_table_init(struct lima_screen *screen);
void lima_bo_table_fini(struct lima_screen *screen);
+bool lima_bo_cache_init(struct lima_screen *screen);
+void lima_bo_cache_fini(struct lima_screen *screen);
struct lima_bo *lima_bo_create(struct lima_screen *screen, uint32_t size,
uint32_t flags);
-void lima_bo_free(struct lima_bo *bo);
+void lima_bo_unreference(struct lima_bo *bo);
static inline void lima_bo_reference(struct lima_bo *bo)
{
diff --git a/src/gallium/drivers/lima/lima_context.c b/src/gallium/drivers/lima/lima_context.c
index 3c3887001bf..813e87361bb 100644
--- a/src/gallium/drivers/lima/lima_context.c
+++ b/src/gallium/drivers/lima/lima_context.c
@@ -138,13 +138,13 @@ lima_context_destroy(struct pipe_context *pctx)
for (int i = 0; i < LIMA_CTX_PLB_MAX_NUM; i++) {
if (ctx->plb[i])
- lima_bo_free(ctx->plb[i]);
+ lima_bo_unreference(ctx->plb[i]);
if (ctx->gp_tile_heap[i])
- lima_bo_free(ctx->gp_tile_heap[i]);
+ lima_bo_unreference(ctx->gp_tile_heap[i]);
}
if (ctx->plb_gp_stream)
- lima_bo_free(ctx->plb_gp_stream);
+ lima_bo_unreference(ctx->plb_gp_stream);
if (ctx->plb_pp_stream)
assert(!_mesa_hash_table_num_entries(ctx->plb_pp_stream));
diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index e3863feb80e..5e639748bfc 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -302,7 +302,7 @@ lima_delete_fs_state(struct pipe_context *pctx, void *hwcso)
struct lima_fs_shader_state *so = hwcso;
if (so->bo)
- lima_bo_free(so->bo);
+ lima_bo_unreference(so->bo);
ralloc_free(so);
}
@@ -396,7 +396,7 @@ lima_delete_vs_state(struct pipe_context *pctx, void *hwcso)
struct lima_vs_shader_state *so = hwcso;
if (so->bo)
- lima_bo_free(so->bo);
+ lima_bo_unreference(so->bo);
ralloc_free(so);
}
diff --git a/src/gallium/drivers/lima/lima_resource.c b/src/gallium/drivers/lima/lima_resource.c
index faa129998c3..e01e60c0465 100644
--- a/src/gallium/drivers/lima/lima_resource.c
+++ b/src/gallium/drivers/lima/lima_resource.c
@@ -259,7 +259,7 @@ lima_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *pres)
struct lima_resource *res = lima_resource(pres);
if (res->bo)
- lima_bo_free(res->bo);
+ lima_bo_unreference(res->bo);
if (res->scanout)
renderonly_scanout_destroy(res->scanout, screen->ro);
@@ -528,7 +528,7 @@ lima_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
struct lima_ctx_plb_pp_stream *s = entry->data;
if (--s->refcnt == 0) {
if (s->bo)
- lima_bo_free(s->bo);
+ lima_bo_unreference(s->bo);
_mesa_hash_table_remove(ctx->plb_pp_stream, entry);
ralloc_free(s);
}
diff --git a/src/gallium/drivers/lima/lima_screen.c b/src/gallium/drivers/lima/lima_screen.c
index 5e6ac1ffb08..3c1288c897f 100644
--- a/src/gallium/drivers/lima/lima_screen.c
+++ b/src/gallium/drivers/lima/lima_screen.c
@@ -61,8 +61,9 @@ lima_screen_destroy(struct pipe_screen *pscreen)
free(screen->ro);
if (screen->pp_buffer)
- lima_bo_free(screen->pp_buffer);
+ lima_bo_unreference(screen->pp_buffer);
+ lima_bo_cache_fini(screen);
lima_bo_table_fini(screen);
ralloc_free(screen);
}
@@ -418,6 +419,8 @@ static const struct debug_named_value debug_options[] = {
"dump GPU command stream to $PWD/lima.dump" },
{ "shaderdb", LIMA_DEBUG_SHADERDB,
"print shader information for shaderdb" },
+ { "nobocache", LIMA_DEBUG_NO_BO_CACHE,
+ "disable BO cache" },
{ NULL }
};
@@ -478,16 +481,20 @@ lima_screen_create(int fd, struct renderonly *ro)
if (!lima_screen_query_info(screen))
goto err_out0;
- if (!lima_bo_table_init(screen))
+ if (!lima_bo_cache_init(screen))
goto err_out0;
+ if (!lima_bo_table_init(screen))
+ goto err_out1;
+
screen->pp_ra = ppir_regalloc_init(screen);
if (!screen->pp_ra)
- goto err_out1;
+ goto err_out2;
screen->pp_buffer = lima_bo_create(screen, pp_buffer_size, 0);
if (!screen->pp_buffer)
- goto err_out1;
+ goto err_out2;
+ screen->pp_buffer->cacheable = false;
/* fs program for clear buffer?
* const0 1 0 0 -1.67773, mov.v0 $0 ^const0.xxxx, stop
@@ -534,7 +541,7 @@ lima_screen_create(int fd, struct renderonly *ro)
screen->ro = renderonly_dup(ro);
if (!screen->ro) {
fprintf(stderr, "Failed to dup renderonly object\n");
- goto err_out2;
+ goto err_out3;
}
}
@@ -559,10 +566,12 @@ lima_screen_create(int fd, struct renderonly *ro)
return &screen->base;
+err_out3:
+ lima_bo_unreference(screen->pp_buffer);
err_out2:
- lima_bo_free(screen->pp_buffer);
-err_out1:
lima_bo_table_fini(screen);
+err_out1:
+ lima_bo_cache_fini(screen);
err_out0:
ralloc_free(screen);
return NULL;
diff --git a/src/gallium/drivers/lima/lima_screen.h b/src/gallium/drivers/lima/lima_screen.h
index 547d083ecd0..62fa480738c 100644
--- a/src/gallium/drivers/lima/lima_screen.h
+++ b/src/gallium/drivers/lima/lima_screen.h
@@ -37,6 +37,7 @@
#define LIMA_DEBUG_PP (1 << 1)
#define LIMA_DEBUG_DUMP (1 << 2)
#define LIMA_DEBUG_SHADERDB (1 << 3)
+#define LIMA_DEBUG_NO_BO_CACHE (1 << 4)
extern uint32_t lima_debug;
extern FILE *lima_dump_command_stream;
@@ -46,6 +47,11 @@ extern int lima_ppir_force_spilling;
struct ra_regs;
+#define MIN_BO_CACHE_BUCKET (12) /* 2^12 = 4KB */
+#define MAX_BO_CACHE_BUCKET (22) /* 2^22 = 4MB */
+
+#define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
+
struct lima_screen {
struct pipe_screen base;
struct renderonly *ro;
@@ -60,8 +66,11 @@ struct lima_screen {
/* bo table */
mtx_t bo_table_lock;
+ mtx_t bo_cache_lock;
struct util_hash_table *bo_handles;
struct util_hash_table *bo_flink_names;
+ struct list_head bo_cache_buckets[NR_BO_CACHE_BUCKETS];
+ struct list_head bo_cache_time;
struct slab_parent_pool transfer_pool;
diff --git a/src/gallium/drivers/lima/lima_submit.c b/src/gallium/drivers/lima/lima_submit.c
index 3977af8078e..acc73d08054 100644
--- a/src/gallium/drivers/lima/lima_submit.c
+++ b/src/gallium/drivers/lima/lima_submit.c
@@ -145,7 +145,7 @@ bool lima_submit_start(struct lima_submit *submit, void *frame, uint32_t size)
bool ret = drmIoctl(submit->screen->fd, DRM_IOCTL_LIMA_GEM_SUBMIT, &req) == 0;
util_dynarray_foreach(&submit->bos, struct lima_bo *, bo) {
- lima_bo_free(*bo);
+ lima_bo_unreference(*bo);
}
util_dynarray_clear(&submit->gem_bos);