diff options
-rw-r--r-- | src/gallium/drivers/radeon/radeon_winsys.h | 4 | ||||
-rw-r--r-- | src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 55 |
2 files changed, 36 insertions, 23 deletions
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 6c03999d212..82feef39487 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -52,7 +52,9 @@ enum radeon_bo_layout { enum radeon_bo_domain { /* bitfield */ RADEON_DOMAIN_GTT = 2, RADEON_DOMAIN_VRAM = 4, - RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT + RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT, + RADEON_DOMAIN_GDS = 8, + RADEON_DOMAIN_OA = 16, }; enum radeon_bo_flag { /* bitfield */ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 95fb7ef3e36..b24f90674d7 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -191,8 +191,10 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf) util_hash_table_remove(ws->bo_export_table, bo->bo); simple_mtx_unlock(&ws->bo_export_table_lock); - amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); - amdgpu_va_range_free(bo->u.real.va_handle); + if (bo->initial_domain & RADEON_DOMAIN_VRAM_GTT) { + amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); + amdgpu_va_range_free(bo->u.real.va_handle); + } amdgpu_bo_free(bo->bo); amdgpu_bo_remove_fences(bo); @@ -457,11 +459,12 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, uint64_t va = 0; struct amdgpu_winsys_bo *bo; amdgpu_va_handle va_handle; - unsigned va_gap_size; int r; /* VRAM or GTT must be specified, but not both at the same time. */ - assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1); + assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT | + RADEON_DOMAIN_GDS | + RADEON_DOMAIN_OA)) == 1); /* Gfx9: Overallocate the size to the next power of two for faster address * translation if we don't waste too much memory. @@ -503,6 +506,10 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; if (initial_domain & RADEON_DOMAIN_GTT) request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; + if (initial_domain & RADEON_DOMAIN_GDS) + request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS; + if (initial_domain & RADEON_DOMAIN_OA) + request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA; /* Since VRAM and GTT have almost the same performance on APUs, we could * just set GTT. However, in order to decrease GTT(RAM) usage, which is @@ -532,27 +539,29 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, goto error_bo_alloc; } - va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; + if (initial_domain & RADEON_DOMAIN_VRAM_GTT) { + unsigned va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; - r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, - size + va_gap_size, - amdgpu_get_optimal_vm_alignment(ws, size, alignment), - 0, &va, &va_handle, - (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | - AMDGPU_VA_RANGE_HIGH); - if (r) - goto error_va_alloc; + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, + size + va_gap_size, + amdgpu_get_optimal_vm_alignment(ws, size, alignment), + 0, &va, &va_handle, + (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | + AMDGPU_VA_RANGE_HIGH); + if (r) + goto error_va_alloc; - unsigned vm_flags = AMDGPU_VM_PAGE_READABLE | - AMDGPU_VM_PAGE_EXECUTABLE; + unsigned vm_flags = AMDGPU_VM_PAGE_READABLE | + AMDGPU_VM_PAGE_EXECUTABLE; - if (!(flags & RADEON_FLAG_READ_ONLY)) - vm_flags |= AMDGPU_VM_PAGE_WRITEABLE; + if (!(flags & RADEON_FLAG_READ_ONLY)) + vm_flags |= AMDGPU_VM_PAGE_WRITEABLE; - r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags, + r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags, AMDGPU_VA_OP_MAP); - if (r) - goto error_va_map; + if (r) + goto error_va_map; + } simple_mtx_init(&bo->lock, mtx_plain); pipe_reference_init(&bo->base.reference, 1); @@ -1371,8 +1380,10 @@ no_slab: * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, * like constant/uniform buffers, can benefit from better and more reuse. */ - size = align64(size, ws->info.gart_page_size); - alignment = align(alignment, ws->info.gart_page_size); + if (domain & RADEON_DOMAIN_VRAM_GTT) { + size = align64(size, ws->info.gart_page_size); + alignment = align(alignment, ws->info.gart_page_size); + } bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING; |