From 5f9ccf827e0aaff3c8571b3d226e33d6e98d4a6f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 21 Nov 2018 02:15:11 -0500 Subject: winsys/amdgpu: optimize slab allocation for 2 MB amdgpu page tables - the slab buffer size increased from 128 KB to 2 MB (PTE fragment size) - the max suballocated buffer size increased from 64 KB to 256 KB, this increases memory usage because it wastes memory - the number of suballocators increased from 1 to 3 and they are layered on top of each other to minimize unused space in slabs The final increase in memory usage is: DeusEx:MD: 1.8% DOTA 2: 1.75% DiRT Rally: 0.2% The kernel driver will also receive fewer buffers. --- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 8 ++++++++ src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 +- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'src/gallium/winsys/amdgpu') diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 80958d08f74..e7db383c69b 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -613,6 +613,14 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, if (entry_size <= max_entry_size) { /* The slab size is twice the size of the largest possible entry. */ slab_size = max_entry_size * 2; + + /* The largest slab should have the same size as the PTE fragment + * size to get faster address translation. + */ + if (i == NUM_SLAB_ALLOCATORS - 1 && + slab_size < ws->info.pte_fragment_size) + slab_size = ws->info.pte_fragment_size; + break; } } assert(slab_size != 0); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 91120e3c474..6b7f484f239 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -311,7 +311,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, amdgpu_bo_destroy, amdgpu_bo_can_reclaim); unsigned min_slab_order = 9; /* 512 bytes */ - unsigned max_slab_order = 16; /* 64 KB - higher numbers increase memory usage */ + unsigned max_slab_order = 18; /* 256 KB - higher numbers increase memory usage */ unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index fc8f04544a9..5ae1d3e55a3 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -38,7 +38,7 @@ struct amdgpu_cs; -#define NUM_SLAB_ALLOCATORS 1 +#define NUM_SLAB_ALLOCATORS 3 struct amdgpu_winsys { struct radeon_winsys base; -- cgit v1.2.3