summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-07-17 13:23:53 +0200
committerMarek Olšák <[email protected]>2016-07-19 23:45:06 +0200
commitdea6fdadca56a5bef507d6a5818707eb8b135ef2 (patch)
tree76baffc9d53e0318c5bc72ec4e06d6ab8d77ebc3
parent8d5944199d0d76ef506529e9b13c2e223756a53e (diff)
winsys/radeon: use pb_cache buckets for fewer pb_cache misses
This makes Bioshock Infinite with deferred flushing 2.2% faster. Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.c29
1 files changed, 22 insertions, 7 deletions
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 82dec8ed981..897b5367092 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -503,7 +503,8 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
unsigned size, unsigned alignment,
unsigned usage,
unsigned initial_domains,
- unsigned flags)
+ unsigned flags,
+ unsigned pb_cache_bucket)
{
struct radeon_bo *bo;
struct drm_radeon_gem_create args;
@@ -551,7 +552,8 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
bo->va = 0;
bo->initial_domain = initial_domains;
pipe_mutex_init(bo->map_mutex);
- pb_cache_init_entry(&rws->bo_cache, &bo->cache_entry, &bo->base, 0);
+ pb_cache_init_entry(&rws->bo_cache, &bo->cache_entry, &bo->base,
+ pb_cache_bucket);
if (rws->info.has_virtual_memory) {
struct drm_radeon_gem_va va;
@@ -723,7 +725,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
{
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
struct radeon_bo *bo;
- unsigned usage = 0;
+ unsigned usage = 0, pb_cache_bucket;
/* Only 32-bit sizes are supported. */
if (size > UINT_MAX)
@@ -742,19 +744,32 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
if (domain == RADEON_DOMAIN_VRAM_GTT)
usage = 1 << 2;
else
- usage = domain >> 1;
+ usage = (unsigned)domain >> 1;
assert(flags < sizeof(usage) * 8 - 3);
usage |= 1 << (flags + 3);
- bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage, 0));
+ /* Determine the pb_cache bucket for minimizing pb_cache misses. */
+ pb_cache_bucket = 0;
+ if (size <= 4096) /* small buffers */
+ pb_cache_bucket += 1;
+ if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
+ pb_cache_bucket += 2;
+ if (flags == RADEON_FLAG_GTT_WC) /* WC */
+ pb_cache_bucket += 4;
+ assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
+
+ bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
+ usage, pb_cache_bucket));
if (bo)
return &bo->base;
- bo = radeon_create_bo(ws, size, alignment, usage, domain, flags);
+ bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
+ pb_cache_bucket);
if (!bo) {
/* Clear the cache and try again. */
pb_cache_release_all_buffers(&ws->bo_cache);
- bo = radeon_create_bo(ws, size, alignment, usage, domain, flags);
+ bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
+ pb_cache_bucket);
if (!bo)
return NULL;
}