diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/winsys/radeon/drm/.editorconfig | 3 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 2096 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 66 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 1171 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 130 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_surface.c | 720 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 1618 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 115 |
8 files changed, 2956 insertions, 2963 deletions
diff --git a/src/gallium/winsys/radeon/drm/.editorconfig b/src/gallium/winsys/radeon/drm/.editorconfig deleted file mode 100644 index 7b12a40ca00..00000000000 --- a/src/gallium/winsys/radeon/drm/.editorconfig +++ /dev/null @@ -1,3 +0,0 @@ -[*.{c,h}] -indent_style = space -indent_size = 4 diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 0f3a8aa67cd..f9275aba2d0 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -51,379 +51,379 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo) { - return (struct radeon_bo *)bo; + return (struct radeon_bo *)bo; } struct radeon_bo_va_hole { - struct list_head list; - uint64_t offset; - uint64_t size; + struct list_head list; + uint64_t offset; + uint64_t size; }; static bool radeon_real_bo_is_busy(struct radeon_bo *bo) { - struct drm_radeon_gem_busy args = {0}; + struct drm_radeon_gem_busy args = {0}; - args.handle = bo->handle; - return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, - &args, sizeof(args)) != 0; + args.handle = bo->handle; + return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, + &args, sizeof(args)) != 0; } static bool radeon_bo_is_busy(struct radeon_bo *bo) { - unsigned num_idle; - bool busy = false; - - if (bo->handle) - return radeon_real_bo_is_busy(bo); - - mtx_lock(&bo->rws->bo_fence_lock); - for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) { - if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) { - busy = true; - break; - } - radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL); - } - memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle], - (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0])); - bo->u.slab.num_fences -= num_idle; - mtx_unlock(&bo->rws->bo_fence_lock); - - return busy; + unsigned num_idle; + bool busy = false; + + if (bo->handle) + return radeon_real_bo_is_busy(bo); + + mtx_lock(&bo->rws->bo_fence_lock); + for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) { + if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) { + busy = true; + break; + } + radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL); + } + memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle], + (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0])); + bo->u.slab.num_fences -= num_idle; + mtx_unlock(&bo->rws->bo_fence_lock); + + return busy; } static void radeon_real_bo_wait_idle(struct radeon_bo *bo) { - struct drm_radeon_gem_wait_idle args = {0}; + struct drm_radeon_gem_wait_idle args = {0}; - args.handle = bo->handle; - while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, - &args, sizeof(args)) == -EBUSY); + args.handle = bo->handle; + while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, + &args, sizeof(args)) == -EBUSY); } static void radeon_bo_wait_idle(struct radeon_bo *bo) { - if (bo->handle) { - radeon_real_bo_wait_idle(bo); - } else { - mtx_lock(&bo->rws->bo_fence_lock); - while (bo->u.slab.num_fences) { - struct radeon_bo *fence = NULL; - radeon_bo_reference(&fence, bo->u.slab.fences[0]); - mtx_unlock(&bo->rws->bo_fence_lock); - - /* Wait without holding the fence lock. */ - radeon_real_bo_wait_idle(fence); - - mtx_lock(&bo->rws->bo_fence_lock); - if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) { - radeon_bo_reference(&bo->u.slab.fences[0], NULL); - memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1], - (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0])); - bo->u.slab.num_fences--; - } - radeon_bo_reference(&fence, NULL); - } - mtx_unlock(&bo->rws->bo_fence_lock); - } + if (bo->handle) { + radeon_real_bo_wait_idle(bo); + } else { + mtx_lock(&bo->rws->bo_fence_lock); + while (bo->u.slab.num_fences) { + struct radeon_bo *fence = NULL; + radeon_bo_reference(&fence, bo->u.slab.fences[0]); + mtx_unlock(&bo->rws->bo_fence_lock); + + /* Wait without holding the fence lock. */ + radeon_real_bo_wait_idle(fence); + + mtx_lock(&bo->rws->bo_fence_lock); + if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) { + radeon_bo_reference(&bo->u.slab.fences[0], NULL); + memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1], + (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0])); + bo->u.slab.num_fences--; + } + radeon_bo_reference(&fence, NULL); + } + mtx_unlock(&bo->rws->bo_fence_lock); + } } static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout, enum radeon_bo_usage usage) { - struct radeon_bo *bo = radeon_bo(_buf); - int64_t abs_timeout; + struct radeon_bo *bo = radeon_bo(_buf); + int64_t abs_timeout; - /* No timeout. Just query. */ - if (timeout == 0) - return !bo->num_active_ioctls && !radeon_bo_is_busy(bo); + /* No timeout. Just query. */ + if (timeout == 0) + return !bo->num_active_ioctls && !radeon_bo_is_busy(bo); - abs_timeout = os_time_get_absolute_timeout(timeout); + abs_timeout = os_time_get_absolute_timeout(timeout); - /* Wait if any ioctl is being submitted with this buffer. */ - if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout)) - return false; + /* Wait if any ioctl is being submitted with this buffer. */ + if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout)) + return false; - /* Infinite timeout. */ - if (abs_timeout == PIPE_TIMEOUT_INFINITE) { - radeon_bo_wait_idle(bo); - return true; - } + /* Infinite timeout. */ + if (abs_timeout == PIPE_TIMEOUT_INFINITE) { + radeon_bo_wait_idle(bo); + return true; + } - /* Other timeouts need to be emulated with a loop. */ - while (radeon_bo_is_busy(bo)) { - if (os_time_get_nano() >= abs_timeout) - return false; - os_time_sleep(10); - } + /* Other timeouts need to be emulated with a loop. */ + while (radeon_bo_is_busy(bo)) { + if (os_time_get_nano() >= abs_timeout) + return false; + os_time_sleep(10); + } - return true; + return true; } static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain) { - /* Zero domains the driver doesn't understand. */ - domain &= RADEON_DOMAIN_VRAM_GTT; + /* Zero domains the driver doesn't understand. */ + domain &= RADEON_DOMAIN_VRAM_GTT; - /* If no domain is set, we must set something... */ - if (!domain) - domain = RADEON_DOMAIN_VRAM_GTT; + /* If no domain is set, we must set something... */ + if (!domain) + domain = RADEON_DOMAIN_VRAM_GTT; - return domain; + return domain; } static enum radeon_bo_domain radeon_bo_get_initial_domain( - struct pb_buffer *buf) + struct pb_buffer *buf) { - struct radeon_bo *bo = (struct radeon_bo*)buf; - struct drm_radeon_gem_op args; - - if (bo->rws->info.drm_minor < 38) - return RADEON_DOMAIN_VRAM_GTT; - - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN; - - if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP, - &args, sizeof(args))) { - fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n", - bo, bo->handle); - /* Default domain as returned by get_valid_domain. */ - return RADEON_DOMAIN_VRAM_GTT; - } - - /* GEM domains and winsys domains are defined the same. */ - return get_valid_domain(args.value); + struct radeon_bo *bo = (struct radeon_bo*)buf; + struct drm_radeon_gem_op args; + + if (bo->rws->info.drm_minor < 38) + return RADEON_DOMAIN_VRAM_GTT; + + memset(&args, 0, sizeof(args)); + args.handle = bo->handle; + args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN; + + if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP, + &args, sizeof(args))) { + fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n", + bo, bo->handle); + /* Default domain as returned by get_valid_domain. */ + return RADEON_DOMAIN_VRAM_GTT; + } + + /* GEM domains and winsys domains are defined the same. */ + return get_valid_domain(args.value); } static uint64_t radeon_bomgr_find_va(const struct radeon_info *info, struct radeon_vm_heap *heap, uint64_t size, uint64_t alignment) { - struct radeon_bo_va_hole *hole, *n; - uint64_t offset = 0, waste = 0; - - /* All VM address space holes will implicitly start aligned to the - * size alignment, so we don't need to sanitize the alignment here - */ - size = align(size, info->gart_page_size); - - mtx_lock(&heap->mutex); - /* first look for a hole */ - LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) { - offset = hole->offset; - waste = offset % alignment; - waste = waste ? alignment - waste : 0; - offset += waste; - if (offset >= (hole->offset + hole->size)) { - continue; - } - if (!waste && hole->size == size) { - offset = hole->offset; - list_del(&hole->list); - FREE(hole); - mtx_unlock(&heap->mutex); - return offset; - } - if ((hole->size - waste) > size) { - if (waste) { - n = CALLOC_STRUCT(radeon_bo_va_hole); - n->size = waste; - n->offset = hole->offset; - list_add(&n->list, &hole->list); - } - hole->size -= (size + waste); - hole->offset += size + waste; - mtx_unlock(&heap->mutex); - return offset; - } - if ((hole->size - waste) == size) { - hole->size = waste; - mtx_unlock(&heap->mutex); - return offset; - } - } - - offset = heap->start; - waste = offset % alignment; - waste = waste ? alignment - waste : 0; - - if (offset + waste + size > heap->end) { - mtx_unlock(&heap->mutex); - return 0; - } - - if (waste) { - n = CALLOC_STRUCT(radeon_bo_va_hole); - n->size = waste; - n->offset = offset; - list_add(&n->list, &heap->holes); - } - offset += waste; - heap->start += size + waste; - mtx_unlock(&heap->mutex); - return offset; + struct radeon_bo_va_hole *hole, *n; + uint64_t offset = 0, waste = 0; + + /* All VM address space holes will implicitly start aligned to the + * size alignment, so we don't need to sanitize the alignment here + */ + size = align(size, info->gart_page_size); + + mtx_lock(&heap->mutex); + /* first look for a hole */ + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) { + offset = hole->offset; + waste = offset % alignment; + waste = waste ? alignment - waste : 0; + offset += waste; + if (offset >= (hole->offset + hole->size)) { + continue; + } + if (!waste && hole->size == size) { + offset = hole->offset; + list_del(&hole->list); + FREE(hole); + mtx_unlock(&heap->mutex); + return offset; + } + if ((hole->size - waste) > size) { + if (waste) { + n = CALLOC_STRUCT(radeon_bo_va_hole); + n->size = waste; + n->offset = hole->offset; + list_add(&n->list, &hole->list); + } + hole->size -= (size + waste); + hole->offset += size + waste; + mtx_unlock(&heap->mutex); + return offset; + } + if ((hole->size - waste) == size) { + hole->size = waste; + mtx_unlock(&heap->mutex); + return offset; + } + } + + offset = heap->start; + waste = offset % alignment; + waste = waste ? alignment - waste : 0; + + if (offset + waste + size > heap->end) { + mtx_unlock(&heap->mutex); + return 0; + } + + if (waste) { + n = CALLOC_STRUCT(radeon_bo_va_hole); + n->size = waste; + n->offset = offset; + list_add(&n->list, &heap->holes); + } + offset += waste; + heap->start += size + waste; + mtx_unlock(&heap->mutex); + return offset; } static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws, uint64_t size, uint64_t alignment) { - uint64_t va = 0; - - /* Try to allocate from the 64-bit address space first. - * If it doesn't exist (start = 0) or if it doesn't have enough space, - * fall back to the 32-bit address space. - */ - if (ws->vm64.start) - va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment); - if (!va) - va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment); - return va; + uint64_t va = 0; + + /* Try to allocate from the 64-bit address space first. + * If it doesn't exist (start = 0) or if it doesn't have enough space, + * fall back to the 32-bit address space. + */ + if (ws->vm64.start) + va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment); + if (!va) + va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment); + return va; } static void radeon_bomgr_free_va(const struct radeon_info *info, struct radeon_vm_heap *heap, uint64_t va, uint64_t size) { - struct radeon_bo_va_hole *hole = NULL; - - size = align(size, info->gart_page_size); - - mtx_lock(&heap->mutex); - if ((va + size) == heap->start) { - heap->start = va; - /* Delete uppermost hole if it reaches the new top */ - if (!list_is_empty(&heap->holes)) { - hole = container_of(heap->holes.next, hole, list); - if ((hole->offset + hole->size) == va) { - heap->start = hole->offset; - list_del(&hole->list); - FREE(hole); - } - } - } else { - struct radeon_bo_va_hole *next; - - hole = container_of(&heap->holes, hole, list); - LIST_FOR_EACH_ENTRY(next, &heap->holes, list) { - if (next->offset < va) - break; - hole = next; - } - - if (&hole->list != &heap->holes) { - /* Grow upper hole if it's adjacent */ - if (hole->offset == (va + size)) { - hole->offset = va; - hole->size += size; - /* Merge lower hole if it's adjacent */ - if (next != hole && &next->list != &heap->holes && - (next->offset + next->size) == va) { - next->size += hole->size; - list_del(&hole->list); - FREE(hole); - } - goto out; + struct radeon_bo_va_hole *hole = NULL; + + size = align(size, info->gart_page_size); + + mtx_lock(&heap->mutex); + if ((va + size) == heap->start) { + heap->start = va; + /* Delete uppermost hole if it reaches the new top */ + if (!list_is_empty(&heap->holes)) { + hole = container_of(heap->holes.next, hole, list); + if ((hole->offset + hole->size) == va) { + heap->start = hole->offset; + list_del(&hole->list); + FREE(hole); + } + } + } else { + struct radeon_bo_va_hole *next; + + hole = container_of(&heap->holes, hole, list); + LIST_FOR_EACH_ENTRY(next, &heap->holes, list) { + if (next->offset < va) + break; + hole = next; + } + + if (&hole->list != &heap->holes) { + /* Grow upper hole if it's adjacent */ + if (hole->offset == (va + size)) { + hole->offset = va; + hole->size += size; + /* Merge lower hole if it's adjacent */ + if (next != hole && &next->list != &heap->holes && + (next->offset + next->size) == va) { + next->size += hole->size; + list_del(&hole->list); + FREE(hole); } - } - - /* Grow lower hole if it's adjacent */ - if (next != hole && &next->list != &heap->holes && - (next->offset + next->size) == va) { - next->size += size; goto out; - } - - /* FIXME on allocation failure we just lose virtual address space - * maybe print a warning - */ - next = CALLOC_STRUCT(radeon_bo_va_hole); - if (next) { - next->size = size; - next->offset = va; - list_add(&next->list, &hole->list); - } - } + } + } + + /* Grow lower hole if it's adjacent */ + if (next != hole && &next->list != &heap->holes && + (next->offset + next->size) == va) { + next->size += size; + goto out; + } + + /* FIXME on allocation failure we just lose virtual address space + * maybe print a warning + */ + next = CALLOC_STRUCT(radeon_bo_va_hole); + if (next) { + next->size = size; + next->offset = va; + list_add(&next->list, &hole->list); + } + } out: - mtx_unlock(&heap->mutex); + mtx_unlock(&heap->mutex); } void radeon_bo_destroy(struct pb_buffer *_buf) { - struct radeon_bo *bo = radeon_bo(_buf); - struct radeon_drm_winsys *rws = bo->rws; - struct drm_gem_close args; - - assert(bo->handle && "must not be called for slab entries"); - - memset(&args, 0, sizeof(args)); - - mtx_lock(&rws->bo_handles_mutex); - _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle); - if (bo->flink_name) { - _mesa_hash_table_remove_key(rws->bo_names, - (void*)(uintptr_t)bo->flink_name); - } - mtx_unlock(&rws->bo_handles_mutex); - - if (bo->u.real.ptr) - os_munmap(bo->u.real.ptr, bo->base.size); - - if (rws->info.r600_has_virtual_memory) { - if (rws->va_unmap_working) { - struct drm_radeon_gem_va va; - - va.handle = bo->handle; - va.vm_id = 0; - va.operation = RADEON_VA_UNMAP; - va.flags = RADEON_VM_PAGE_READABLE | - RADEON_VM_PAGE_WRITEABLE | - RADEON_VM_PAGE_SNOOPED; - va.offset = bo->va; - - if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, - sizeof(va)) != 0 && - va.operation == RADEON_VA_RESULT_ERROR) { - fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n"); - fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size); - fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va); - } - } - - radeon_bomgr_free_va(&rws->info, - bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64, - bo->va, bo->base.size); - } - - /* Close object. */ - args.handle = bo->handle; - drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args); - - mtx_destroy(&bo->u.real.map_mutex); - - if (bo->initial_domain & RADEON_DOMAIN_VRAM) - rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size); - else if (bo->initial_domain & RADEON_DOMAIN_GTT) - rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size); - - if (bo->u.real.map_count >= 1) { - if (bo->initial_domain & RADEON_DOMAIN_VRAM) - bo->rws->mapped_vram -= bo->base.size; - else - bo->rws->mapped_gtt -= bo->base.size; - bo->rws->num_mapped_buffers--; - } - - FREE(bo); + struct radeon_bo *bo = radeon_bo(_buf); + struct radeon_drm_winsys *rws = bo->rws; + struct drm_gem_close args; + + assert(bo->handle && "must not be called for slab entries"); + + memset(&args, 0, sizeof(args)); + + mtx_lock(&rws->bo_handles_mutex); + _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle); + if (bo->flink_name) { + _mesa_hash_table_remove_key(rws->bo_names, + (void*)(uintptr_t)bo->flink_name); + } + mtx_unlock(&rws->bo_handles_mutex); + + if (bo->u.real.ptr) + os_munmap(bo->u.real.ptr, bo->base.size); + + if (rws->info.r600_has_virtual_memory) { + if (rws->va_unmap_working) { + struct drm_radeon_gem_va va; + + va.handle = bo->handle; + va.vm_id = 0; + va.operation = RADEON_VA_UNMAP; + va.flags = RADEON_VM_PAGE_READABLE | + RADEON_VM_PAGE_WRITEABLE | + RADEON_VM_PAGE_SNOOPED; + va.offset = bo->va; + + if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, + sizeof(va)) != 0 && + va.operation == RADEON_VA_RESULT_ERROR) { + fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n"); + fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size); + fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va); + } + } + + radeon_bomgr_free_va(&rws->info, + bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64, + bo->va, bo->base.size); + } + + /* Close object. */ + args.handle = bo->handle; + drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args); + + mtx_destroy(&bo->u.real.map_mutex); + + if (bo->initial_domain & RADEON_DOMAIN_VRAM) + rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size); + else if (bo->initial_domain & RADEON_DOMAIN_GTT) + rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size); + + if (bo->u.real.map_count >= 1) { + if (bo->initial_domain & RADEON_DOMAIN_VRAM) + bo->rws->mapped_vram -= bo->base.size; + else + bo->rws->mapped_gtt -= bo->base.size; + bo->rws->num_mapped_buffers--; + } + + FREE(bo); } static void radeon_bo_destroy_or_cache(struct pb_buffer *_buf) { struct radeon_bo *bo = radeon_bo(_buf); - assert(bo->handle && "must not be called for slab entries"); + assert(bo->handle && "must not be called for slab entries"); if (bo->u.real.use_reusable_pool) pb_cache_add_buffer(&bo->u.real.cache_entry); @@ -433,188 +433,188 @@ static void radeon_bo_destroy_or_cache(struct pb_buffer *_buf) void *radeon_bo_do_map(struct radeon_bo *bo) { - struct drm_radeon_gem_mmap args = {0}; - void *ptr; - unsigned offset; - - /* If the buffer is created from user memory, return the user pointer. */ - if (bo->user_ptr) - return bo->user_ptr; - - if (bo->handle) { - offset = 0; - } else { - offset = bo->va - bo->u.slab.real->va; - bo = bo->u.slab.real; - } - - /* Map the buffer. */ - mtx_lock(&bo->u.real.map_mutex); - /* Return the pointer if it's already mapped. */ - if (bo->u.real.ptr) { - bo->u.real.map_count++; - mtx_unlock(&bo->u.real.map_mutex); - return (uint8_t*)bo->u.real.ptr + offset; - } - args.handle = bo->handle; - args.offset = 0; - args.size = (uint64_t)bo->base.size; - if (drmCommandWriteRead(bo->rws->fd, - DRM_RADEON_GEM_MMAP, - &args, - sizeof(args))) { - mtx_unlock(&bo->u.real.map_mutex); - fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n", - bo, bo->handle); - return NULL; - } - - ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, - bo->rws->fd, args.addr_ptr); - if (ptr == MAP_FAILED) { - /* Clear the cache and try again. */ - pb_cache_release_all_buffers(&bo->rws->bo_cache); - - ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, - bo->rws->fd, args.addr_ptr); - if (ptr == MAP_FAILED) { - mtx_unlock(&bo->u.real.map_mutex); - fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno); - return NULL; - } - } - bo->u.real.ptr = ptr; - bo->u.real.map_count = 1; - - if (bo->initial_domain & RADEON_DOMAIN_VRAM) - bo->rws->mapped_vram += bo->base.size; - else - bo->rws->mapped_gtt += bo->base.size; - bo->rws->num_mapped_buffers++; - - mtx_unlock(&bo->u.real.map_mutex); - return (uint8_t*)bo->u.real.ptr + offset; + struct drm_radeon_gem_mmap args = {0}; + void *ptr; + unsigned offset; + + /* If the buffer is created from user memory, return the user pointer. */ + if (bo->user_ptr) + return bo->user_ptr; + + if (bo->handle) { + offset = 0; + } else { + offset = bo->va - bo->u.slab.real->va; + bo = bo->u.slab.real; + } + + /* Map the buffer. */ + mtx_lock(&bo->u.real.map_mutex); + /* Return the pointer if it's already mapped. */ + if (bo->u.real.ptr) { + bo->u.real.map_count++; + mtx_unlock(&bo->u.real.map_mutex); + return (uint8_t*)bo->u.real.ptr + offset; + } + args.handle = bo->handle; + args.offset = 0; + args.size = (uint64_t)bo->base.size; + if (drmCommandWriteRead(bo->rws->fd, + DRM_RADEON_GEM_MMAP, + &args, + sizeof(args))) { + mtx_unlock(&bo->u.real.map_mutex); + fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n", + bo, bo->handle); + return NULL; + } + + ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, + bo->rws->fd, args.addr_ptr); + if (ptr == MAP_FAILED) { + /* Clear the cache and try again. */ + pb_cache_release_all_buffers(&bo->rws->bo_cache); + + ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, + bo->rws->fd, args.addr_ptr); + if (ptr == MAP_FAILED) { + mtx_unlock(&bo->u.real.map_mutex); + fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno); + return NULL; + } + } + bo->u.real.ptr = ptr; + bo->u.real.map_count = 1; + + if (bo->initial_domain & RADEON_DOMAIN_VRAM) + bo->rws->mapped_vram += bo->base.size; + else + bo->rws->mapped_gtt += bo->base.size; + bo->rws->num_mapped_buffers++; + + mtx_unlock(&bo->u.real.map_mutex); + return (uint8_t*)bo->u.real.ptr + offset; } static void *radeon_bo_map(struct pb_buffer *buf, struct radeon_cmdbuf *rcs, enum pipe_transfer_usage usage) { - struct radeon_bo *bo = (struct radeon_bo*)buf; - struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs; - - /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */ - if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { - /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */ - if (usage & PIPE_TRANSFER_DONTBLOCK) { - if (!(usage & PIPE_TRANSFER_WRITE)) { - /* Mapping for read. - * - * Since we are mapping for read, we don't need to wait - * if the GPU is using the buffer for read too - * (neither one is changing it). - * - * Only check whether the buffer is being used for write. */ - if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { - cs->flush_cs(cs->flush_data, - RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); - return NULL; - } - - if (!radeon_bo_wait((struct pb_buffer*)bo, 0, - RADEON_USAGE_WRITE)) { - return NULL; - } - } else { - if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) { - cs->flush_cs(cs->flush_data, - RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); - return NULL; - } - - if (!radeon_bo_wait((struct pb_buffer*)bo, 0, - RADEON_USAGE_READWRITE)) { - return NULL; - } + struct radeon_bo *bo = (struct radeon_bo*)buf; + struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs; + + /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */ + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */ + if (usage & PIPE_TRANSFER_DONTBLOCK) { + if (!(usage & PIPE_TRANSFER_WRITE)) { + /* Mapping for read. + * + * Since we are mapping for read, we don't need to wait + * if the GPU is using the buffer for read too + * (neither one is changing it). + * + * Only check whether the buffer is being used for write. */ + if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); + return NULL; + } + + if (!radeon_bo_wait((struct pb_buffer*)bo, 0, + RADEON_USAGE_WRITE)) { + return NULL; + } + } else { + if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) { + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); + return NULL; + } + + if (!radeon_bo_wait((struct pb_buffer*)bo, 0, + RADEON_USAGE_READWRITE)) { + return NULL; + } + } + } else { + uint64_t time = os_time_get_nano(); + + if (!(usage & PIPE_TRANSFER_WRITE)) { + /* Mapping for read. + * + * Since we are mapping for read, we don't need to wait + * if the GPU is using the buffer for read too + * (neither one is changing it). + * + * Only check whether the buffer is being used for write. */ + if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL); } - } else { - uint64_t time = os_time_get_nano(); - - if (!(usage & PIPE_TRANSFER_WRITE)) { - /* Mapping for read. - * - * Since we are mapping for read, we don't need to wait - * if the GPU is using the buffer for read too - * (neither one is changing it). - * - * Only check whether the buffer is being used for write. */ - if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { - cs->flush_cs(cs->flush_data, - RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL); - } - radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE, - RADEON_USAGE_WRITE); - } else { - /* Mapping for write. */ - if (cs) { - if (radeon_bo_is_referenced_by_cs(cs, bo)) { - cs->flush_cs(cs->flush_data, - RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL); - } else { - /* Try to avoid busy-waiting in radeon_bo_wait. */ - if (p_atomic_read(&bo->num_active_ioctls)) - radeon_drm_cs_sync_flush(rcs); - } - } - - radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE, - RADEON_USAGE_READWRITE); + radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE, + RADEON_USAGE_WRITE); + } else { + /* Mapping for write. */ + if (cs) { + if (radeon_bo_is_referenced_by_cs(cs, bo)) { + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL); + } else { + /* Try to avoid busy-waiting in radeon_bo_wait. */ + if (p_atomic_read(&bo->num_active_ioctls)) + radeon_drm_cs_sync_flush(rcs); + } } - bo->rws->buffer_wait_time += os_time_get_nano() - time; - } - } + radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE, + RADEON_USAGE_READWRITE); + } - return radeon_bo_do_map(bo); + bo->rws->buffer_wait_time += os_time_get_nano() - time; + } + } + + return radeon_bo_do_map(bo); } static void radeon_bo_unmap(struct pb_buffer *_buf) { - struct radeon_bo *bo = (struct radeon_bo*)_buf; + struct radeon_bo *bo = (struct radeon_bo*)_buf; - if (bo->user_ptr) - return; + if (bo->user_ptr) + return; - if (!bo->handle) - bo = bo->u.slab.real; + if (!bo->handle) + bo = bo->u.slab.real; - mtx_lock(&bo->u.real.map_mutex); - if (!bo->u.real.ptr) { - mtx_unlock(&bo->u.real.map_mutex); - return; /* it's not been mapped */ - } + mtx_lock(&bo->u.real.map_mutex); + if (!bo->u.real.ptr) { + mtx_unlock(&bo->u.real.map_mutex); + return; /* it's not been mapped */ + } - assert(bo->u.real.map_count); - if (--bo->u.real.map_count) { - mtx_unlock(&bo->u.real.map_mutex); - return; /* it's been mapped multiple times */ - } + assert(bo->u.real.map_count); + if (--bo->u.real.map_count) { + mtx_unlock(&bo->u.real.map_mutex); + return; /* it's been mapped multiple times */ + } - os_munmap(bo->u.real.ptr, bo->base.size); - bo->u.real.ptr = NULL; + os_munmap(bo->u.real.ptr, bo->base.size); + bo->u.real.ptr = NULL; - if (bo->initial_domain & RADEON_DOMAIN_VRAM) - bo->rws->mapped_vram -= bo->base.size; - else - bo->rws->mapped_gtt -= bo->base.size; - bo->rws->num_mapped_buffers--; + if (bo->initial_domain & RADEON_DOMAIN_VRAM) + bo->rws->mapped_vram -= bo->base.size; + else + bo->rws->mapped_gtt -= bo->base.size; + bo->rws->num_mapped_buffers--; - mtx_unlock(&bo->u.real.map_mutex); + mtx_unlock(&bo->u.real.map_mutex); } static const struct pb_vtbl radeon_bo_vtbl = { - radeon_bo_destroy_or_cache - /* other functions are never called */ + radeon_bo_destroy_or_cache + /* other functions are never called */ }; static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws, @@ -623,118 +623,118 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws, unsigned flags, int heap) { - struct radeon_bo *bo; - struct drm_radeon_gem_create args; - int r; - - memset(&args, 0, sizeof(args)); - - assert(initial_domains); - assert((initial_domains & - ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0); - - args.size = size; - args.alignment = alignment; - args.initial_domain = initial_domains; - args.flags = 0; - - /* If VRAM is just stolen system memory, allow both VRAM and - * GTT, whichever has free space. If a buffer is evicted from - * VRAM to GTT, it will stay there. - */ - if (!rws->info.has_dedicated_vram) - args.initial_domain |= RADEON_DOMAIN_GTT; - - if (flags & RADEON_FLAG_GTT_WC) - args.flags |= RADEON_GEM_GTT_WC; - if (flags & RADEON_FLAG_NO_CPU_ACCESS) - args.flags |= RADEON_GEM_NO_CPU_ACCESS; - - if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE, - &args, sizeof(args))) { - fprintf(stderr, "radeon: Failed to allocate a buffer:\n"); - fprintf(stderr, "radeon: size : %u bytes\n", size); - fprintf(stderr, "radeon: alignment : %u bytes\n", alignment); - fprintf(stderr, "radeon: domains : %u\n", args.initial_domain); - fprintf(stderr, "radeon: flags : %u\n", args.flags); - return NULL; - } - - assert(args.handle != 0); - - bo = CALLOC_STRUCT(radeon_bo); - if (!bo) - return NULL; - - pipe_reference_init(&bo->base.reference, 1); - bo->base.alignment = alignment; - bo->base.usage = 0; - bo->base.size = size; - bo->base.vtbl = &radeon_bo_vtbl; - bo->rws = rws; - bo->handle = args.handle; - bo->va = 0; - bo->initial_domain = initial_domains; - bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1); - (void) mtx_init(&bo->u.real.map_mutex, mtx_plain); - - if (heap >= 0) { - pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base, - heap); - } - - if (rws->info.r600_has_virtual_memory) { - struct drm_radeon_gem_va va; - unsigned va_gap_size; - - va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; - - if (flags & RADEON_FLAG_32BIT) { - bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32, - size + va_gap_size, alignment); - assert(bo->va + size < rws->vm32.end); - } else { - bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment); - } - - va.handle = bo->handle; - va.vm_id = 0; - va.operation = RADEON_VA_MAP; - va.flags = RADEON_VM_PAGE_READABLE | - RADEON_VM_PAGE_WRITEABLE | - RADEON_VM_PAGE_SNOOPED; - va.offset = bo->va; - r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); - if (r && va.operation == RADEON_VA_RESULT_ERROR) { - fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n"); - fprintf(stderr, "radeon: size : %d bytes\n", size); - fprintf(stderr, "radeon: alignment : %d bytes\n", alignment); - fprintf(stderr, "radeon: domains : %d\n", args.initial_domain); - fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va); - radeon_bo_destroy(&bo->base); - return NULL; - } - mtx_lock(&rws->bo_handles_mutex); - if (va.operation == RADEON_VA_RESULT_VA_EXIST) { - struct pb_buffer *b = &bo->base; - struct radeon_bo *old_bo = - util_hash_table_get(rws->bo_vas, (void*)(uintptr_t)va.offset); - - mtx_unlock(&rws->bo_handles_mutex); - pb_reference(&b, &old_bo->base); - return radeon_bo(b); - } - - _mesa_hash_table_insert(rws->bo_vas, (void*)(uintptr_t)bo->va, bo); - mtx_unlock(&rws->bo_handles_mutex); - } - - if (initial_domains & RADEON_DOMAIN_VRAM) - rws->allocated_vram += align(size, rws->info.gart_page_size); - else if (initial_domains & RADEON_DOMAIN_GTT) - rws->allocated_gtt += align(size, rws->info.gart_page_size); - - return bo; + struct radeon_bo *bo; + struct drm_radeon_gem_create args; + int r; + + memset(&args, 0, sizeof(args)); + + assert(initial_domains); + assert((initial_domains & + ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0); + + args.size = size; + args.alignment = alignment; + args.initial_domain = initial_domains; + args.flags = 0; + + /* If VRAM is just stolen system memory, allow both VRAM and + * GTT, whichever has free space. If a buffer is evicted from + * VRAM to GTT, it will stay there. + */ + if (!rws->info.has_dedicated_vram) + args.initial_domain |= RADEON_DOMAIN_GTT; + + if (flags & RADEON_FLAG_GTT_WC) + args.flags |= RADEON_GEM_GTT_WC; + if (flags & RADEON_FLAG_NO_CPU_ACCESS) + args.flags |= RADEON_GEM_NO_CPU_ACCESS; + + if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE, + &args, sizeof(args))) { + fprintf(stderr, "radeon: Failed to allocate a buffer:\n"); + fprintf(stderr, "radeon: size : %u bytes\n", size); + fprintf(stderr, "radeon: alignment : %u bytes\n", alignment); + fprintf(stderr, "radeon: domains : %u\n", args.initial_domain); + fprintf(stderr, "radeon: flags : %u\n", args.flags); + return NULL; + } + + assert(args.handle != 0); + + bo = CALLOC_STRUCT(radeon_bo); + if (!bo) + return NULL; + + pipe_reference_init(&bo->base.reference, 1); + bo->base.alignment = alignment; + bo->base.usage = 0; + bo->base.size = size; + bo->base.vtbl = &radeon_bo_vtbl; + bo->rws = rws; + bo->handle = args.handle; + bo->va = 0; + bo->initial_domain = initial_domains; + bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1); + (void) mtx_init(&bo->u.real.map_mutex, mtx_plain); + + if (heap >= 0) { + pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base, + heap); + } + + if (rws->info.r600_has_virtual_memory) { + struct drm_radeon_gem_va va; + unsigned va_gap_size; + + va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; + + if (flags & RADEON_FLAG_32BIT) { + bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32, + size + va_gap_size, alignment); + assert(bo->va + size < rws->vm32.end); + } else { + bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment); + } + + va.handle = bo->handle; + va.vm_id = 0; + va.operation = RADEON_VA_MAP; + va.flags = RADEON_VM_PAGE_READABLE | + RADEON_VM_PAGE_WRITEABLE | + RADEON_VM_PAGE_SNOOPED; + va.offset = bo->va; + r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); + if (r && va.operation == RADEON_VA_RESULT_ERROR) { + fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n"); + fprintf(stderr, "radeon: size : %d bytes\n", size); + fprintf(stderr, "radeon: alignment : %d bytes\n", alignment); + fprintf(stderr, "radeon: domains : %d\n", args.initial_domain); + fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va); + radeon_bo_destroy(&bo->base); + return NULL; + } + mtx_lock(&rws->bo_handles_mutex); + if (va.operation == RADEON_VA_RESULT_VA_EXIST) { + struct pb_buffer *b = &bo->base; + struct radeon_bo *old_bo = + util_hash_table_get(rws->bo_vas, (void*)(uintptr_t)va.offset); + + mtx_unlock(&rws->bo_handles_mutex); + pb_reference(&b, &old_bo->base); + return radeon_bo(b); + } + + _mesa_hash_table_insert(rws->bo_vas, (void*)(uintptr_t)bo->va, bo); + mtx_unlock(&rws->bo_handles_mutex); + } + + if (initial_domains & RADEON_DOMAIN_VRAM) + rws->allocated_vram += align(size, rws->info.gart_page_size); + else if (initial_domains & RADEON_DOMAIN_GTT) + rws->allocated_gtt += align(size, rws->info.gart_page_size); + + return bo; } bool radeon_bo_can_reclaim(struct pb_buffer *_buf) @@ -749,206 +749,206 @@ bool radeon_bo_can_reclaim(struct pb_buffer *_buf) bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry) { - struct radeon_bo *bo = NULL; /* fix container_of */ - bo = container_of(entry, bo, u.slab.entry); + struct radeon_bo *bo = NULL; /* fix container_of */ + bo = container_of(entry, bo, u.slab.entry); - return radeon_bo_can_reclaim(&bo->base); + return radeon_bo_can_reclaim(&bo->base); } static void radeon_bo_slab_destroy(struct pb_buffer *_buf) { - struct radeon_bo *bo = radeon_bo(_buf); + struct radeon_bo *bo = radeon_bo(_buf); - assert(!bo->handle); + assert(!bo->handle); - pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry); + pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry); } static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = { - radeon_bo_slab_destroy - /* other functions are never called */ + radeon_bo_slab_destroy + /* other functions are never called */ }; struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index) { - struct radeon_drm_winsys *ws = priv; - struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab); - enum radeon_bo_domain domains = radeon_domain_from_heap(heap); - enum radeon_bo_flag flags = radeon_flags_from_heap(heap); - unsigned base_hash; + struct radeon_drm_winsys *ws = priv; + struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab); + enum radeon_bo_domain domains = radeon_domain_from_heap(heap); + enum radeon_bo_flag flags = radeon_flags_from_heap(heap); + unsigned base_hash; - if (!slab) - return NULL; + if (!slab) + return NULL; - slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base, - 64 * 1024, 64 * 1024, - domains, flags)); - if (!slab->buffer) - goto fail; + slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base, + 64 * 1024, 64 * 1024, + domains, flags)); + if (!slab->buffer) + goto fail; - assert(slab->buffer->handle); + assert(slab->buffer->handle); - slab->base.num_entries = slab->buffer->base.size / entry_size; - slab->base.num_free = slab->base.num_entries; - slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); - if (!slab->entries) - goto fail_buffer; + slab->base.num_entries = slab->buffer->base.size / entry_size; + slab->base.num_free = slab->base.num_entries; + slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); + if (!slab->entries) + goto fail_buffer; - list_inithead(&slab->base.free); + list_inithead(&slab->base.free); - base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries); + base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries); - for (unsigned i = 0; i < slab->base.num_entries; ++i) { - struct radeon_bo *bo = &slab->entries[i]; + for (unsigned i = 0; i < slab->base.num_entries; ++i) { + struct radeon_bo *bo = &slab->entries[i]; - bo->base.alignment = entry_size; - bo->base.usage = slab->buffer->base.usage; - bo->base.size = entry_size; - bo->base.vtbl = &radeon_winsys_bo_slab_vtbl; - bo->rws = ws; - bo->va = slab->buffer->va + i * entry_size; - bo->initial_domain = domains; - bo->hash = base_hash + i; - bo->u.slab.entry.slab = &slab->base; - bo->u.slab.entry.group_index = group_index; - bo->u.slab.real = slab->buffer; + bo->base.alignment = entry_size; + bo->base.usage = slab->buffer->base.usage; + bo->base.size = entry_size; + bo->base.vtbl = &radeon_winsys_bo_slab_vtbl; + bo->rws = ws; + bo->va = slab->buffer->va + i * entry_size; + bo->initial_domain = domains; + bo->hash = base_hash + i; + bo->u.slab.entry.slab = &slab->base; + bo->u.slab.entry.group_index = group_index; + bo->u.slab.real = slab->buffer; - list_addtail(&bo->u.slab.entry.head, &slab->base.free); - } + list_addtail(&bo->u.slab.entry.head, &slab->base.free); + } - return &slab->base; + return &slab->base; fail_buffer: - radeon_bo_reference(&slab->buffer, NULL); + radeon_bo_reference(&slab->buffer, NULL); fail: - FREE(slab); - return NULL; + FREE(slab); + return NULL; } void radeon_bo_slab_free(void *priv, struct pb_slab *pslab) { - struct radeon_slab *slab = (struct radeon_slab *)pslab; - - for (unsigned i = 0; i < slab->base.num_entries; ++i) { - struct radeon_bo *bo = &slab->entries[i]; - for (unsigned j = 0; j < bo->u.slab.num_fences; ++j) - radeon_bo_reference(&bo->u.slab.fences[j], NULL); - FREE(bo->u.slab.fences); - } - - FREE(slab->entries); - radeon_bo_reference(&slab->buffer, NULL); - FREE(slab); + struct radeon_slab *slab = (struct radeon_slab *)pslab; + + for (unsigned i = 0; i < slab->base.num_entries; ++i) { + struct radeon_bo *bo = &slab->entries[i]; + for (unsigned j = 0; j < bo->u.slab.num_fences; ++j) + radeon_bo_reference(&bo->u.slab.fences[j], NULL); + FREE(bo->u.slab.fences); + } + + FREE(slab->entries); + radeon_bo_reference(&slab->buffer, NULL); + FREE(slab); } static unsigned eg_tile_split(unsigned tile_split) { - switch (tile_split) { - case 0: tile_split = 64; break; - case 1: tile_split = 128; break; - case 2: tile_split = 256; break; - case 3: tile_split = 512; break; - default: - case 4: tile_split = 1024; break; - case 5: tile_split = 2048; break; - case 6: tile_split = 4096; break; - } - return tile_split; + switch (tile_split) { + case 0: tile_split = 64; break; + case 1: tile_split = 128; break; + case 2: tile_split = 256; break; + case 3: tile_split = 512; break; + default: + case 4: tile_split = 1024; break; + case 5: tile_split = 2048; break; + case 6: tile_split = 4096; break; + } + return tile_split; } static unsigned eg_tile_split_rev(unsigned eg_tile_split) { - switch (eg_tile_split) { - case 64: return 0; - case 128: return 1; - case 256: return 2; - case 512: return 3; - default: - case 1024: return 4; - case 2048: return 5; - case 4096: return 6; - } + switch (eg_tile_split) { + case 64: return 0; + case 128: return 1; + case 256: return 2; + case 512: return 3; + default: + case 1024: return 4; + case 2048: return 5; + case 4096: return 6; + } } static void radeon_bo_get_metadata(struct pb_buffer *_buf, - struct radeon_bo_metadata *md) + struct radeon_bo_metadata *md) { - struct radeon_bo *bo = radeon_bo(_buf); - struct drm_radeon_gem_set_tiling args; + struct radeon_bo *bo = radeon_bo(_buf); + struct drm_radeon_gem_set_tiling args; - assert(bo->handle && "must not be called for slab entries"); + assert(bo->handle && "must not be called for slab entries"); - memset(&args, 0, sizeof(args)); + memset(&args, 0, sizeof(args)); - args.handle = bo->handle; + args.handle = bo->handle; - drmCommandWriteRead(bo->rws->fd, - DRM_RADEON_GEM_GET_TILING, - &args, - sizeof(args)); + drmCommandWriteRead(bo->rws->fd, + DRM_RADEON_GEM_GET_TILING, + &args, + sizeof(args)); - md->u.legacy.microtile = RADEON_LAYOUT_LINEAR; - md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR; - if (args.tiling_flags & RADEON_TILING_MICRO) - md->u.legacy.microtile = RADEON_LAYOUT_TILED; - else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE) - md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED; + md->u.legacy.microtile = RADEON_LAYOUT_LINEAR; + md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR; + if (args.tiling_flags & RADEON_TILING_MICRO) + md->u.legacy.microtile = RADEON_LAYOUT_TILED; + else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE) + md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED; - if (args.tiling_flags & RADEON_TILING_MACRO) - md->u.legacy.macrotile = RADEON_LAYOUT_TILED; + if (args.tiling_flags & RADEON_TILING_MACRO) + md->u.legacy.macrotile = RADEON_LAYOUT_TILED; - md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK; - md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK; - md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK; - md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK; - md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split); - md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT); + md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK; + md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK; + md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK; + md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK; + md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split); + md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT); } static void radeon_bo_set_metadata(struct pb_buffer *_buf, struct radeon_bo_metadata *md) { - struct radeon_bo *bo = radeon_bo(_buf); - struct drm_radeon_gem_set_tiling args; + struct radeon_bo *bo = radeon_bo(_buf); + struct drm_radeon_gem_set_tiling args; - assert(bo->handle && "must not be called for slab entries"); + assert(bo->handle && "must not be called for slab entries"); - memset(&args, 0, sizeof(args)); + memset(&args, 0, sizeof(args)); - os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE); + os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE); - if (md->u.legacy.microtile == RADEON_LAYOUT_TILED) - args.tiling_flags |= RADEON_TILING_MICRO; - else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED) - args.tiling_flags |= RADEON_TILING_MICRO_SQUARE; + if (md->u.legacy.microtile == RADEON_LAYOUT_TILED) + args.tiling_flags |= RADEON_TILING_MICRO; + else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED) + args.tiling_flags |= RADEON_TILING_MICRO_SQUARE; - if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED) - args.tiling_flags |= RADEON_TILING_MACRO; + if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED) + args.tiling_flags |= RADEON_TILING_MACRO; - args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) << - RADEON_TILING_EG_BANKW_SHIFT; - args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) << - RADEON_TILING_EG_BANKH_SHIFT; - if (md->u.legacy.tile_split) { - args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) & - RADEON_TILING_EG_TILE_SPLIT_MASK) << - RADEON_TILING_EG_TILE_SPLIT_SHIFT; - } - args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) << - RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT; + args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) << + RADEON_TILING_EG_BANKW_SHIFT; + args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) << + RADEON_TILING_EG_BANKH_SHIFT; + if (md->u.legacy.tile_split) { + args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) & + RADEON_TILING_EG_TILE_SPLIT_MASK) << + RADEON_TILING_EG_TILE_SPLIT_SHIFT; + } + args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) << + RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT; - if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout) - args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT; + if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout) + args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT; - args.handle = bo->handle; - args.pitch = md->u.legacy.stride; + args.handle = bo->handle; + args.pitch = md->u.legacy.stride; - drmCommandWriteRead(bo->rws->fd, - DRM_RADEON_GEM_SET_TILING, - &args, - sizeof(args)); + drmCommandWriteRead(bo->rws->fd, + DRM_RADEON_GEM_SET_TILING, + &args, + sizeof(args)); } static struct pb_buffer * @@ -958,359 +958,359 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, enum radeon_bo_domain domain, enum radeon_bo_flag flags) { - struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); - struct radeon_bo *bo; - int heap = -1; - - assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */ - - /* Only 32-bit sizes are supported. */ - if (size > UINT_MAX) - return NULL; - - /* VRAM implies WC. This is not optional. */ - if (domain & RADEON_DOMAIN_VRAM) - flags |= RADEON_FLAG_GTT_WC; - /* NO_CPU_ACCESS is valid with VRAM only. */ - if (domain != RADEON_DOMAIN_VRAM) - flags &= ~RADEON_FLAG_NO_CPU_ACCESS; - - /* Sub-allocate small buffers from slabs. */ - if (!(flags & RADEON_FLAG_NO_SUBALLOC) && - size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) && - ws->info.r600_has_virtual_memory && - alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) { - struct pb_slab_entry *entry; - int heap = radeon_get_heap_index(domain, flags); - - if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS) - goto no_slab; - - entry = pb_slab_alloc(&ws->bo_slabs, size, heap); - if (!entry) { - /* Clear the cache and try again. */ - pb_cache_release_all_buffers(&ws->bo_cache); - - entry = pb_slab_alloc(&ws->bo_slabs, size, heap); - } - if (!entry) - return NULL; - - bo = NULL; - bo = container_of(entry, bo, u.slab.entry); - - pipe_reference_init(&bo->base.reference, 1); - - return &bo->base; - } + struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); + struct radeon_bo *bo; + int heap = -1; + + assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */ + + /* Only 32-bit sizes are supported. */ + if (size > UINT_MAX) + return NULL; + + /* VRAM implies WC. This is not optional. */ + if (domain & RADEON_DOMAIN_VRAM) + flags |= RADEON_FLAG_GTT_WC; + /* NO_CPU_ACCESS is valid with VRAM only. */ + if (domain != RADEON_DOMAIN_VRAM) + flags &= ~RADEON_FLAG_NO_CPU_ACCESS; + + /* Sub-allocate small buffers from slabs. */ + if (!(flags & RADEON_FLAG_NO_SUBALLOC) && + size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) && + ws->info.r600_has_virtual_memory && + alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) { + struct pb_slab_entry *entry; + int heap = radeon_get_heap_index(domain, flags); + + if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS) + goto no_slab; + + entry = pb_slab_alloc(&ws->bo_slabs, size, heap); + if (!entry) { + /* Clear the cache and try again. */ + pb_cache_release_all_buffers(&ws->bo_cache); + + entry = pb_slab_alloc(&ws->bo_slabs, size, heap); + } + if (!entry) + return NULL; + + bo = NULL; + bo = container_of(entry, bo, u.slab.entry); + + pipe_reference_init(&bo->base.reference, 1); + + return &bo->base; + } no_slab: - /* This flag is irrelevant for the cache. */ - flags &= ~RADEON_FLAG_NO_SUBALLOC; - - /* Align size to page size. This is the minimum alignment for normal - * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, - * like constant/uniform buffers, can benefit from better and more reuse. - */ - size = align(size, ws->info.gart_page_size); - alignment = align(alignment, ws->info.gart_page_size); - - bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING; - - /* Shared resources don't use cached heaps. */ - if (use_reusable_pool) { - heap = radeon_get_heap_index(domain, flags); - assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); - - bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, - 0, heap)); - if (bo) - return &bo->base; - } - - bo = radeon_create_bo(ws, size, alignment, domain, flags, heap); - if (!bo) { - /* Clear the cache and try again. */ - if (ws->info.r600_has_virtual_memory) - pb_slabs_reclaim(&ws->bo_slabs); - pb_cache_release_all_buffers(&ws->bo_cache); - bo = radeon_create_bo(ws, size, alignment, domain, flags, heap); - if (!bo) - return NULL; - } - - bo->u.real.use_reusable_pool = use_reusable_pool; - - mtx_lock(&ws->bo_handles_mutex); - _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo); - mtx_unlock(&ws->bo_handles_mutex); - - return &bo->base; + /* This flag is irrelevant for the cache. */ + flags &= ~RADEON_FLAG_NO_SUBALLOC; + + /* Align size to page size. This is the minimum alignment for normal + * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, + * like constant/uniform buffers, can benefit from better and more reuse. + */ + size = align(size, ws->info.gart_page_size); + alignment = align(alignment, ws->info.gart_page_size); + + bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING; + + /* Shared resources don't use cached heaps. */ + if (use_reusable_pool) { + heap = radeon_get_heap_index(domain, flags); + assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); + + bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, + 0, heap)); + if (bo) + return &bo->base; + } + + bo = radeon_create_bo(ws, size, alignment, domain, flags, heap); + if (!bo) { + /* Clear the cache and try again. */ + if (ws->info.r600_has_virtual_memory) + pb_slabs_reclaim(&ws->bo_slabs); + pb_cache_release_all_buffers(&ws->bo_cache); + bo = radeon_create_bo(ws, size, alignment, domain, flags, heap); + if (!bo) + return NULL; + } + + bo->u.real.use_reusable_pool = use_reusable_pool; + + mtx_lock(&ws->bo_handles_mutex); + _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo); + mtx_unlock(&ws->bo_handles_mutex); + + return &bo->base; } static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws, void *pointer, uint64_t size) { - struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); - struct drm_radeon_gem_userptr args; - struct radeon_bo *bo; - int r; - - bo = CALLOC_STRUCT(radeon_bo); - if (!bo) - return NULL; - - memset(&args, 0, sizeof(args)); - args.addr = (uintptr_t)pointer; - args.size = align(size, ws->info.gart_page_size); - args.flags = RADEON_GEM_USERPTR_ANONONLY | - RADEON_GEM_USERPTR_VALIDATE | - RADEON_GEM_USERPTR_REGISTER; - if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR, - &args, sizeof(args))) { - FREE(bo); - return NULL; - } - - assert(args.handle != 0); - - mtx_lock(&ws->bo_handles_mutex); - - /* Initialize it. */ - pipe_reference_init(&bo->base.reference, 1); - bo->handle = args.handle; - bo->base.alignment = 0; - bo->base.size = size; - bo->base.vtbl = &radeon_bo_vtbl; - bo->rws = ws; - bo->user_ptr = pointer; - bo->va = 0; - bo->initial_domain = RADEON_DOMAIN_GTT; - bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1); - (void) mtx_init(&bo->u.real.map_mutex, mtx_plain); - - _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo); - - mtx_unlock(&ws->bo_handles_mutex); - - if (ws->info.r600_has_virtual_memory) { - struct drm_radeon_gem_va va; - - bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20); - - va.handle = bo->handle; - va.operation = RADEON_VA_MAP; - va.vm_id = 0; - va.offset = bo->va; - va.flags = RADEON_VM_PAGE_READABLE | - RADEON_VM_PAGE_WRITEABLE | - RADEON_VM_PAGE_SNOOPED; - va.offset = bo->va; - r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); - if (r && va.operation == RADEON_VA_RESULT_ERROR) { - fprintf(stderr, "radeon: Failed to assign virtual address space\n"); - radeon_bo_destroy(&bo->base); - return NULL; - } - mtx_lock(&ws->bo_handles_mutex); - if (va.operation == RADEON_VA_RESULT_VA_EXIST) { - struct pb_buffer *b = &bo->base; - struct radeon_bo *old_bo = - util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset); - - mtx_unlock(&ws->bo_handles_mutex); - pb_reference(&b, &old_bo->base); - return b; - } - - _mesa_hash_table_insert(ws->bo_vas, (void*)(uintptr_t)bo->va, bo); - mtx_unlock(&ws->bo_handles_mutex); - } - - ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size); - - return (struct pb_buffer*)bo; + struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); + struct drm_radeon_gem_userptr args; + struct radeon_bo *bo; + int r; + + bo = CALLOC_STRUCT(radeon_bo); + if (!bo) + return NULL; + + memset(&args, 0, sizeof(args)); + args.addr = (uintptr_t)pointer; + args.size = align(size, ws->info.gart_page_size); + args.flags = RADEON_GEM_USERPTR_ANONONLY | + RADEON_GEM_USERPTR_VALIDATE | + RADEON_GEM_USERPTR_REGISTER; + if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR, + &args, sizeof(args))) { + FREE(bo); + return NULL; + } + + assert(args.handle != 0); + + mtx_lock(&ws->bo_handles_mutex); + + /* Initialize it. */ + pipe_reference_init(&bo->base.reference, 1); + bo->handle = args.handle; + bo->base.alignment = 0; + bo->base.size = size; + bo->base.vtbl = &radeon_bo_vtbl; + bo->rws = ws; + bo->user_ptr = pointer; + bo->va = 0; + bo->initial_domain = RADEON_DOMAIN_GTT; + bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1); + (void) mtx_init(&bo->u.real.map_mutex, mtx_plain); + + _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo); + + mtx_unlock(&ws->bo_handles_mutex); + + if (ws->info.r600_has_virtual_memory) { + struct drm_radeon_gem_va va; + + bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20); + + va.handle = bo->handle; + va.operation = RADEON_VA_MAP; + va.vm_id = 0; + va.offset = bo->va; + va.flags = RADEON_VM_PAGE_READABLE | + RADEON_VM_PAGE_WRITEABLE | + RADEON_VM_PAGE_SNOOPED; + va.offset = bo->va; + r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); + if (r && va.operation == RADEON_VA_RESULT_ERROR) { + fprintf(stderr, "radeon: Failed to assign virtual address space\n"); + radeon_bo_destroy(&bo->base); + return NULL; + } + mtx_lock(&ws->bo_handles_mutex); + if (va.operation == RADEON_VA_RESULT_VA_EXIST) { + struct pb_buffer *b = &bo->base; + struct radeon_bo *old_bo = + util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset); + + mtx_unlock(&ws->bo_handles_mutex); + pb_reference(&b, &old_bo->base); + return b; + } + + _mesa_hash_table_insert(ws->bo_vas, (void*)(uintptr_t)bo->va, bo); + mtx_unlock(&ws->bo_handles_mutex); + } + + ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size); + + return (struct pb_buffer*)bo; } static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws, struct winsys_handle *whandle, unsigned vm_alignment) { - struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); - struct radeon_bo *bo; - int r; - unsigned handle; - uint64_t size = 0; - - /* We must maintain a list of pairs <handle, bo>, so that we always return - * the same BO for one particular handle. If we didn't do that and created - * more than one BO for the same handle and then relocated them in a CS, - * we would hit a deadlock in the kernel. - * - * The list of pairs is guarded by a mutex, of course. */ - mtx_lock(&ws->bo_handles_mutex); - - if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) { - /* First check if there already is an existing bo for the handle. */ - bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle); - } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) { - /* We must first get the GEM handle, as fds are unreliable keys */ - r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle); - if (r) - goto fail; - bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle); - } else { - /* Unknown handle type */ - goto fail; - } - - if (bo) { - /* Increase the refcount. */ - struct pb_buffer *b = NULL; - pb_reference(&b, &bo->base); - goto done; - } - - /* There isn't, create a new one. */ - bo = CALLOC_STRUCT(radeon_bo); - if (!bo) { - goto fail; - } - - if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) { - struct drm_gem_open open_arg = {}; - memset(&open_arg, 0, sizeof(open_arg)); - /* Open the BO. */ - open_arg.name = whandle->handle; - if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) { - FREE(bo); - goto fail; - } - handle = open_arg.handle; - size = open_arg.size; - bo->flink_name = whandle->handle; - } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) { - size = lseek(whandle->handle, 0, SEEK_END); - /* - * Could check errno to determine whether the kernel is new enough, but - * it doesn't really matter why this failed, just that it failed. - */ - if (size == (off_t)-1) { - FREE(bo); - goto fail; - } - lseek(whandle->handle, 0, SEEK_SET); - } - - assert(handle != 0); - - bo->handle = handle; - - /* Initialize it. */ - pipe_reference_init(&bo->base.reference, 1); - bo->base.alignment = 0; - bo->base.size = (unsigned) size; - bo->base.vtbl = &radeon_bo_vtbl; - bo->rws = ws; - bo->va = 0; - bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1); - (void) mtx_init(&bo->u.real.map_mutex, mtx_plain); - - if (bo->flink_name) - _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo); - - _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo); + struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); + struct radeon_bo *bo; + int r; + unsigned handle; + uint64_t size = 0; + + /* We must maintain a list of pairs <handle, bo>, so that we always return + * the same BO for one particular handle. If we didn't do that and created + * more than one BO for the same handle and then relocated them in a CS, + * we would hit a deadlock in the kernel. + * + * The list of pairs is guarded by a mutex, of course. */ + mtx_lock(&ws->bo_handles_mutex); + + if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) { + /* First check if there already is an existing bo for the handle. */ + bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle); + } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) { + /* We must first get the GEM handle, as fds are unreliable keys */ + r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle); + if (r) + goto fail; + bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle); + } else { + /* Unknown handle type */ + goto fail; + } + + if (bo) { + /* Increase the refcount. */ + struct pb_buffer *b = NULL; + pb_reference(&b, &bo->base); + goto done; + } + + /* There isn't, create a new one. */ + bo = CALLOC_STRUCT(radeon_bo); + if (!bo) { + goto fail; + } + + if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) { + struct drm_gem_open open_arg = {}; + memset(&open_arg, 0, sizeof(open_arg)); + /* Open the BO. */ + open_arg.name = whandle->handle; + if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) { + FREE(bo); + goto fail; + } + handle = open_arg.handle; + size = open_arg.size; + bo->flink_name = whandle->handle; + } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) { + size = lseek(whandle->handle, 0, SEEK_END); + /* + * Could check errno to determine whether the kernel is new enough, but + * it doesn't really matter why this failed, just that it failed. + */ + if (size == (off_t)-1) { + FREE(bo); + goto fail; + } + lseek(whandle->handle, 0, SEEK_SET); + } + + assert(handle != 0); + + bo->handle = handle; + + /* Initialize it. */ + pipe_reference_init(&bo->base.reference, 1); + bo->base.alignment = 0; + bo->base.size = (unsigned) size; + bo->base.vtbl = &radeon_bo_vtbl; + bo->rws = ws; + bo->va = 0; + bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1); + (void) mtx_init(&bo->u.real.map_mutex, mtx_plain); + + if (bo->flink_name) + _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo); + + _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo); done: - mtx_unlock(&ws->bo_handles_mutex); - - if (ws->info.r600_has_virtual_memory && !bo->va) { - struct drm_radeon_gem_va va; - - bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment); - - va.handle = bo->handle; - va.operation = RADEON_VA_MAP; - va.vm_id = 0; - va.offset = bo->va; - va.flags = RADEON_VM_PAGE_READABLE | - RADEON_VM_PAGE_WRITEABLE | - RADEON_VM_PAGE_SNOOPED; - va.offset = bo->va; - r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); - if (r && va.operation == RADEON_VA_RESULT_ERROR) { - fprintf(stderr, "radeon: Failed to assign virtual address space\n"); - radeon_bo_destroy(&bo->base); - return NULL; - } - mtx_lock(&ws->bo_handles_mutex); - if (va.operation == RADEON_VA_RESULT_VA_EXIST) { - struct pb_buffer *b = &bo->base; - struct radeon_bo *old_bo = - util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset); - - mtx_unlock(&ws->bo_handles_mutex); - pb_reference(&b, &old_bo->base); - return b; - } - - _mesa_hash_table_insert(ws->bo_vas, (void*)(uintptr_t)bo->va, bo); - mtx_unlock(&ws->bo_handles_mutex); - } - - bo->initial_domain = radeon_bo_get_initial_domain((void*)bo); - - if (bo->initial_domain & RADEON_DOMAIN_VRAM) - ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size); - else if (bo->initial_domain & RADEON_DOMAIN_GTT) - ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size); - - return (struct pb_buffer*)bo; + mtx_unlock(&ws->bo_handles_mutex); + + if (ws->info.r600_has_virtual_memory && !bo->va) { + struct drm_radeon_gem_va va; + + bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment); + + va.handle = bo->handle; + va.operation = RADEON_VA_MAP; + va.vm_id = 0; + va.offset = bo->va; + va.flags = RADEON_VM_PAGE_READABLE | + RADEON_VM_PAGE_WRITEABLE | + RADEON_VM_PAGE_SNOOPED; + va.offset = bo->va; + r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); + if (r && va.operation == RADEON_VA_RESULT_ERROR) { + fprintf(stderr, "radeon: Failed to assign virtual address space\n"); + radeon_bo_destroy(&bo->base); + return NULL; + } + mtx_lock(&ws->bo_handles_mutex); + if (va.operation == RADEON_VA_RESULT_VA_EXIST) { + struct pb_buffer *b = &bo->base; + struct radeon_bo *old_bo = + util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset); + + mtx_unlock(&ws->bo_handles_mutex); + pb_reference(&b, &old_bo->base); + return b; + } + + _mesa_hash_table_insert(ws->bo_vas, (void*)(uintptr_t)bo->va, bo); + mtx_unlock(&ws->bo_handles_mutex); + } + + bo->initial_domain = radeon_bo_get_initial_domain((void*)bo); + + if (bo->initial_domain & RADEON_DOMAIN_VRAM) + ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size); + else if (bo->initial_domain & RADEON_DOMAIN_GTT) + ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size); + + return (struct pb_buffer*)bo; fail: - mtx_unlock(&ws->bo_handles_mutex); - return NULL; + mtx_unlock(&ws->bo_handles_mutex); + return NULL; } static bool radeon_winsys_bo_get_handle(struct radeon_winsys *rws, struct pb_buffer *buffer, struct winsys_handle *whandle) { - struct drm_gem_flink flink; - struct radeon_bo *bo = radeon_bo(buffer); - struct radeon_drm_winsys *ws = bo->rws; + struct drm_gem_flink flink; + struct radeon_bo *bo = radeon_bo(buffer); + struct radeon_drm_winsys *ws = bo->rws; - /* Don't allow exports of slab entries. */ - if (!bo->handle) - return false; + /* Don't allow exports of slab entries. */ + if (!bo->handle) + return false; - memset(&flink, 0, sizeof(flink)); + memset(&flink, 0, sizeof(flink)); - bo->u.real.use_reusable_pool = false; + bo->u.real.use_reusable_pool = false; - if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) { - if (!bo->flink_name) { - flink.handle = bo->handle; + if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) { + if (!bo->flink_name) { + flink.handle = bo->handle; - if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) { - return false; - } - - bo->flink_name = flink.name; - - mtx_lock(&ws->bo_handles_mutex); - _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo); - mtx_unlock(&ws->bo_handles_mutex); - } - whandle->handle = bo->flink_name; - } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) { - whandle->handle = bo->handle; - } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) { - if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle)) + if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) { return false; - } - - return true; + } + + bo->flink_name = flink.name; + + mtx_lock(&ws->bo_handles_mutex); + _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo); + mtx_unlock(&ws->bo_handles_mutex); + } + whandle->handle = bo->flink_name; + } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) { + whandle->handle = bo->handle; + } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) { + if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle)) + return false; + } + + return true; } static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf) @@ -1325,33 +1325,33 @@ static bool radeon_winsys_bo_is_suballocated(struct pb_buffer *buf) static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf) { - return ((struct radeon_bo*)buf)->va; + return ((struct radeon_bo*)buf)->va; } static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf) { - struct radeon_bo *bo = radeon_bo(buf); + struct radeon_bo *bo = radeon_bo(buf); - if (bo->handle) - return 0; + if (bo->handle) + return 0; - return bo->va - bo->u.slab.real->va; + return bo->va - bo->u.slab.real->va; } void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws) { - ws->base.buffer_set_metadata = radeon_bo_set_metadata; - ws->base.buffer_get_metadata = radeon_bo_get_metadata; - ws->base.buffer_map = radeon_bo_map; - ws->base.buffer_unmap = radeon_bo_unmap; - ws->base.buffer_wait = radeon_bo_wait; - ws->base.buffer_create = radeon_winsys_bo_create; - ws->base.buffer_from_handle = radeon_winsys_bo_from_handle; - ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr; - ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr; - ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated; - ws->base.buffer_get_handle = radeon_winsys_bo_get_handle; - ws->base.buffer_get_virtual_address = radeon_winsys_bo_va; - ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset; - ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain; + ws->base.buffer_set_metadata = radeon_bo_set_metadata; + ws->base.buffer_get_metadata = radeon_bo_get_metadata; + ws->base.buffer_map = radeon_bo_map; + ws->base.buffer_unmap = radeon_bo_unmap; + ws->base.buffer_wait = radeon_bo_wait; + ws->base.buffer_create = radeon_winsys_bo_create; + ws->base.buffer_from_handle = radeon_winsys_bo_from_handle; + ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr; + ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr; + ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated; + ws->base.buffer_get_handle = radeon_winsys_bo_get_handle; + ws->base.buffer_get_virtual_address = radeon_winsys_bo_va; + ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset; + ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index bc7cba38817..906d932d89a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -33,47 +33,47 @@ #include "pipebuffer/pb_slab.h" struct radeon_bo { - struct pb_buffer base; - union { - struct { - struct pb_cache_entry cache_entry; + struct pb_buffer base; + union { + struct { + struct pb_cache_entry cache_entry; - void *ptr; - mtx_t map_mutex; - unsigned map_count; - bool use_reusable_pool; - } real; - struct { - struct pb_slab_entry entry; - struct radeon_bo *real; + void *ptr; + mtx_t map_mutex; + unsigned map_count; + bool use_reusable_pool; + } real; + struct { + struct pb_slab_entry entry; + struct radeon_bo *real; - unsigned num_fences; - unsigned max_fences; - struct radeon_bo **fences; - } slab; - } u; + unsigned num_fences; + unsigned max_fences; + struct radeon_bo **fences; + } slab; + } u; - struct radeon_drm_winsys *rws; - void *user_ptr; /* from buffer_from_ptr */ + struct radeon_drm_winsys *rws; + void *user_ptr; /* from buffer_from_ptr */ - uint32_t handle; /* 0 for slab entries */ - uint32_t flink_name; - uint64_t va; - uint32_t hash; - enum radeon_bo_domain initial_domain; + uint32_t handle; /* 0 for slab entries */ + uint32_t flink_name; + uint64_t va; + uint32_t hash; + enum radeon_bo_domain initial_domain; - /* how many command streams is this bo referenced in? */ - int num_cs_references; + /* how many command streams is this bo referenced in? */ + int num_cs_references; - /* how many command streams, which are being emitted in a separate - * thread, is this bo referenced in? */ - int num_active_ioctls; + /* how many command streams, which are being emitted in a separate + * thread, is this bo referenced in? */ + int num_active_ioctls; }; struct radeon_slab { - struct pb_slab base; - struct radeon_bo *buffer; - struct radeon_bo *entries; + struct pb_slab base; + struct radeon_bo *buffer; + struct radeon_bo *entries; }; void radeon_bo_destroy(struct pb_buffer *_buf); @@ -89,7 +89,7 @@ void radeon_bo_slab_free(void *priv, struct pb_slab *slab); static inline void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src) { - pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); + pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); } void *radeon_bo_do_map(struct radeon_bo *bo); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 6726f6a77ab..403ade2e848 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -66,100 +66,99 @@ #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) -static struct pipe_fence_handle * -radeon_cs_create_fence(struct radeon_cmdbuf *rcs); +static struct pipe_fence_handle *radeon_cs_create_fence(struct radeon_cmdbuf *rcs); static void radeon_fence_reference(struct pipe_fence_handle **dst, struct pipe_fence_handle *src); static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws) { - struct radeon_ctx *ctx = CALLOC_STRUCT(radeon_ctx); - if (!ctx) - return NULL; + struct radeon_ctx *ctx = CALLOC_STRUCT(radeon_ctx); + if (!ctx) + return NULL; - ctx->ws = (struct radeon_drm_winsys*)ws; - ctx->gpu_reset_counter = radeon_drm_get_gpu_reset_counter(ctx->ws); - return (struct radeon_winsys_ctx*)ctx; + ctx->ws = (struct radeon_drm_winsys*)ws; + ctx->gpu_reset_counter = radeon_drm_get_gpu_reset_counter(ctx->ws); + return (struct radeon_winsys_ctx*)ctx; } static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx) { - FREE(ctx); + FREE(ctx); } static enum pipe_reset_status radeon_drm_ctx_query_reset_status(struct radeon_winsys_ctx *rctx) { - struct radeon_ctx *ctx = (struct radeon_ctx*)rctx; + struct radeon_ctx *ctx = (struct radeon_ctx*)rctx; - unsigned latest = radeon_drm_get_gpu_reset_counter(ctx->ws); + unsigned latest = radeon_drm_get_gpu_reset_counter(ctx->ws); - if (ctx->gpu_reset_counter == latest) - return PIPE_NO_RESET; + if (ctx->gpu_reset_counter == latest) + return PIPE_NO_RESET; - ctx->gpu_reset_counter = latest; - return PIPE_UNKNOWN_CONTEXT_RESET; + ctx->gpu_reset_counter = latest; + return PIPE_UNKNOWN_CONTEXT_RESET; } static bool radeon_init_cs_context(struct radeon_cs_context *csc, struct radeon_drm_winsys *ws) { - int i; - - csc->fd = ws->fd; - - csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; - csc->chunks[0].length_dw = 0; - csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf; - csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; - csc->chunks[1].length_dw = 0; - csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; - csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; - csc->chunks[2].length_dw = 2; - csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags; - - csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0]; - csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1]; - csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2]; - - csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array; - - for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) { - csc->reloc_indices_hashlist[i] = -1; - } - return true; + int i; + + csc->fd = ws->fd; + + csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; + csc->chunks[0].length_dw = 0; + csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf; + csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; + csc->chunks[1].length_dw = 0; + csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; + csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; + csc->chunks[2].length_dw = 2; + csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags; + + csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0]; + csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1]; + csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2]; + + csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array; + + for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) { + csc->reloc_indices_hashlist[i] = -1; + } + return true; } static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) { - unsigned i; - - for (i = 0; i < csc->num_relocs; i++) { - p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references); - radeon_bo_reference(&csc->relocs_bo[i].bo, NULL); - } - for (i = 0; i < csc->num_slab_buffers; ++i) { - p_atomic_dec(&csc->slab_buffers[i].bo->num_cs_references); - radeon_bo_reference(&csc->slab_buffers[i].bo, NULL); - } - - csc->num_relocs = 0; - csc->num_validated_relocs = 0; - csc->num_slab_buffers = 0; - csc->chunks[0].length_dw = 0; - csc->chunks[1].length_dw = 0; - - for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) { - csc->reloc_indices_hashlist[i] = -1; - } + unsigned i; + + for (i = 0; i < csc->num_relocs; i++) { + p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references); + radeon_bo_reference(&csc->relocs_bo[i].bo, NULL); + } + for (i = 0; i < csc->num_slab_buffers; ++i) { + p_atomic_dec(&csc->slab_buffers[i].bo->num_cs_references); + radeon_bo_reference(&csc->slab_buffers[i].bo, NULL); + } + + csc->num_relocs = 0; + csc->num_validated_relocs = 0; + csc->num_slab_buffers = 0; + csc->chunks[0].length_dw = 0; + csc->chunks[1].length_dw = 0; + + for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) { + csc->reloc_indices_hashlist[i] = -1; + } } static void radeon_destroy_cs_context(struct radeon_cs_context *csc) { - radeon_cs_context_cleanup(csc); - FREE(csc->slab_buffers); - FREE(csc->relocs_bo); - FREE(csc->relocs); + radeon_cs_context_cleanup(csc); + FREE(csc->slab_buffers); + FREE(csc->relocs_bo); + FREE(csc->relocs); } @@ -171,275 +170,275 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx, void *flush_ctx, bool stop_exec_on_failure) { - struct radeon_drm_winsys *ws = ((struct radeon_ctx*)ctx)->ws; - struct radeon_drm_cs *cs; - - cs = CALLOC_STRUCT(radeon_drm_cs); - if (!cs) { - return NULL; - } - util_queue_fence_init(&cs->flush_completed); - - cs->ws = ws; - cs->flush_cs = flush; - cs->flush_data = flush_ctx; - - if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { - FREE(cs); - return NULL; - } - if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { - radeon_destroy_cs_context(&cs->csc1); - FREE(cs); - return NULL; - } - - /* Set the first command buffer as current. */ - cs->csc = &cs->csc1; - cs->cst = &cs->csc2; - cs->base.current.buf = cs->csc->buf; - cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf); - cs->ring_type = ring_type; - - p_atomic_inc(&ws->num_cs); - return &cs->base; + struct radeon_drm_winsys *ws = ((struct radeon_ctx*)ctx)->ws; + struct radeon_drm_cs *cs; + + cs = CALLOC_STRUCT(radeon_drm_cs); + if (!cs) { + return NULL; + } + util_queue_fence_init(&cs->flush_completed); + + cs->ws = ws; + cs->flush_cs = flush; + cs->flush_data = flush_ctx; + + if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { + FREE(cs); + return NULL; + } + if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { + radeon_destroy_cs_context(&cs->csc1); + FREE(cs); + return NULL; + } + + /* Set the first command buffer as current. */ + cs->csc = &cs->csc1; + cs->cst = &cs->csc2; + cs->base.current.buf = cs->csc->buf; + cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf); + cs->ring_type = ring_type; + + p_atomic_inc(&ws->num_cs); + return &cs->base; } int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo) { - unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); - struct radeon_bo_item *buffers; - unsigned num_buffers; - int i = csc->reloc_indices_hashlist[hash]; - - if (bo->handle) { - buffers = csc->relocs_bo; - num_buffers = csc->num_relocs; - } else { - buffers = csc->slab_buffers; - num_buffers = csc->num_slab_buffers; - } - - /* not found or found */ - if (i == -1 || (i < num_buffers && buffers[i].bo == bo)) - return i; - - /* Hash collision, look for the BO in the list of relocs linearly. */ - for (i = num_buffers - 1; i >= 0; i--) { - if (buffers[i].bo == bo) { - /* Put this reloc in the hash list. - * This will prevent additional hash collisions if there are - * several consecutive lookup_buffer calls for the same buffer. - * - * Example: Assuming buffers A,B,C collide in the hash list, - * the following sequence of relocs: - * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC - * will collide here: ^ and here: ^, - * meaning that we should get very few collisions in the end. */ - csc->reloc_indices_hashlist[hash] = i; - return i; - } - } - return -1; + unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); + struct radeon_bo_item *buffers; + unsigned num_buffers; + int i = csc->reloc_indices_hashlist[hash]; + + if (bo->handle) { + buffers = csc->relocs_bo; + num_buffers = csc->num_relocs; + } else { + buffers = csc->slab_buffers; + num_buffers = csc->num_slab_buffers; + } + + /* not found or found */ + if (i == -1 || (i < num_buffers && buffers[i].bo == bo)) + return i; + + /* Hash collision, look for the BO in the list of relocs linearly. */ + for (i = num_buffers - 1; i >= 0; i--) { + if (buffers[i].bo == bo) { + /* Put this reloc in the hash list. + * This will prevent additional hash collisions if there are + * several consecutive lookup_buffer calls for the same buffer. + * + * Example: Assuming buffers A,B,C collide in the hash list, + * the following sequence of relocs: + * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC + * will collide here: ^ and here: ^, + * meaning that we should get very few collisions in the end. */ + csc->reloc_indices_hashlist[hash] = i; + return i; + } + } + return -1; } static unsigned radeon_lookup_or_add_real_buffer(struct radeon_drm_cs *cs, struct radeon_bo *bo) { - struct radeon_cs_context *csc = cs->csc; - struct drm_radeon_cs_reloc *reloc; - unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); - int i = -1; - - i = radeon_lookup_buffer(csc, bo); - - if (i >= 0) { - /* For async DMA, every add_buffer call must add a buffer to the list - * no matter how many duplicates there are. This is due to the fact - * the DMA CS checker doesn't use NOP packets for offset patching, - * but always uses the i-th buffer from the list to patch the i-th - * offset. If there are N offsets in a DMA CS, there must also be N - * buffers in the relocation list. - * - * This doesn't have to be done if virtual memory is enabled, - * because there is no offset patching with virtual memory. - */ - if (cs->ring_type != RING_DMA || cs->ws->info.r600_has_virtual_memory) { - return i; - } - } - - /* New relocation, check if the backing array is large enough. */ - if (csc->num_relocs >= csc->max_relocs) { - uint32_t size; - csc->max_relocs = MAX2(csc->max_relocs + 16, (unsigned)(csc->max_relocs * 1.3)); - - size = csc->max_relocs * sizeof(csc->relocs_bo[0]); - csc->relocs_bo = realloc(csc->relocs_bo, size); - - size = csc->max_relocs * sizeof(struct drm_radeon_cs_reloc); - csc->relocs = realloc(csc->relocs, size); - - csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; - } - - /* Initialize the new relocation. */ - csc->relocs_bo[csc->num_relocs].bo = NULL; - csc->relocs_bo[csc->num_relocs].u.real.priority_usage = 0; - radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo); - p_atomic_inc(&bo->num_cs_references); - reloc = &csc->relocs[csc->num_relocs]; - reloc->handle = bo->handle; - reloc->read_domains = 0; - reloc->write_domain = 0; - reloc->flags = 0; - - csc->reloc_indices_hashlist[hash] = csc->num_relocs; - - csc->chunks[1].length_dw += RELOC_DWORDS; - - return csc->num_relocs++; + struct radeon_cs_context *csc = cs->csc; + struct drm_radeon_cs_reloc *reloc; + unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); + int i = -1; + + i = radeon_lookup_buffer(csc, bo); + + if (i >= 0) { + /* For async DMA, every add_buffer call must add a buffer to the list + * no matter how many duplicates there are. This is due to the fact + * the DMA CS checker doesn't use NOP packets for offset patching, + * but always uses the i-th buffer from the list to patch the i-th + * offset. If there are N offsets in a DMA CS, there must also be N + * buffers in the relocation list. + * + * This doesn't have to be done if virtual memory is enabled, + * because there is no offset patching with virtual memory. + */ + if (cs->ring_type != RING_DMA || cs->ws->info.r600_has_virtual_memory) { + return i; + } + } + + /* New relocation, check if the backing array is large enough. */ + if (csc->num_relocs >= csc->max_relocs) { + uint32_t size; + csc->max_relocs = MAX2(csc->max_relocs + 16, (unsigned)(csc->max_relocs * 1.3)); + + size = csc->max_relocs * sizeof(csc->relocs_bo[0]); + csc->relocs_bo = realloc(csc->relocs_bo, size); + + size = csc->max_relocs * sizeof(struct drm_radeon_cs_reloc); + csc->relocs = realloc(csc->relocs, size); + + csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; + } + + /* Initialize the new relocation. */ + csc->relocs_bo[csc->num_relocs].bo = NULL; + csc->relocs_bo[csc->num_relocs].u.real.priority_usage = 0; + radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo); + p_atomic_inc(&bo->num_cs_references); + reloc = &csc->relocs[csc->num_relocs]; + reloc->handle = bo->handle; + reloc->read_domains = 0; + reloc->write_domain = 0; + reloc->flags = 0; + + csc->reloc_indices_hashlist[hash] = csc->num_relocs; + + csc->chunks[1].length_dw += RELOC_DWORDS; + + return csc->num_relocs++; } static int radeon_lookup_or_add_slab_buffer(struct radeon_drm_cs *cs, struct radeon_bo *bo) { - struct radeon_cs_context *csc = cs->csc; - unsigned hash; - struct radeon_bo_item *item; - int idx; - int real_idx; - - idx = radeon_lookup_buffer(csc, bo); - if (idx >= 0) - return idx; - - real_idx = radeon_lookup_or_add_real_buffer(cs, bo->u.slab.real); - - /* Check if the backing array is large enough. */ - if (csc->num_slab_buffers >= csc->max_slab_buffers) { - unsigned new_max = MAX2(csc->max_slab_buffers + 16, - (unsigned)(csc->max_slab_buffers * 1.3)); - struct radeon_bo_item *new_buffers = + struct radeon_cs_context *csc = cs->csc; + unsigned hash; + struct radeon_bo_item *item; + int idx; + int real_idx; + + idx = radeon_lookup_buffer(csc, bo); + if (idx >= 0) + return idx; + + real_idx = radeon_lookup_or_add_real_buffer(cs, bo->u.slab.real); + + /* Check if the backing array is large enough. */ + if (csc->num_slab_buffers >= csc->max_slab_buffers) { + unsigned new_max = MAX2(csc->max_slab_buffers + 16, + (unsigned)(csc->max_slab_buffers * 1.3)); + struct radeon_bo_item *new_buffers = REALLOC(csc->slab_buffers, csc->max_slab_buffers * sizeof(*new_buffers), new_max * sizeof(*new_buffers)); - if (!new_buffers) { - fprintf(stderr, "radeon_lookup_or_add_slab_buffer: allocation failure\n"); - return -1; - } + if (!new_buffers) { + fprintf(stderr, "radeon_lookup_or_add_slab_buffer: allocation failure\n"); + return -1; + } - csc->max_slab_buffers = new_max; - csc->slab_buffers = new_buffers; - } + csc->max_slab_buffers = new_max; + csc->slab_buffers = new_buffers; + } - /* Initialize the new relocation. */ - idx = csc->num_slab_buffers++; - item = &csc->slab_buffers[idx]; + /* Initialize the new relocation. */ + idx = csc->num_slab_buffers++; + item = &csc->slab_buffers[idx]; - item->bo = NULL; - item->u.slab.real_idx = real_idx; - radeon_bo_reference(&item->bo, bo); - p_atomic_inc(&bo->num_cs_references); + item->bo = NULL; + item->u.slab.real_idx = real_idx; + radeon_bo_reference(&item->bo, bo); + p_atomic_inc(&bo->num_cs_references); - hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); - csc->reloc_indices_hashlist[hash] = idx; + hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); + csc->reloc_indices_hashlist[hash] = idx; - return idx; + return idx; } static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs, - struct pb_buffer *buf, - enum radeon_bo_usage usage, - enum radeon_bo_domain domains, - enum radeon_bo_priority priority) + struct pb_buffer *buf, + enum radeon_bo_usage usage, + enum radeon_bo_domain domains, + enum radeon_bo_priority priority) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - struct radeon_bo *bo = (struct radeon_bo*)buf; - enum radeon_bo_domain added_domains; - - /* If VRAM is just stolen system memory, allow both VRAM and - * GTT, whichever has free space. If a buffer is evicted from - * VRAM to GTT, it will stay there. - */ - if (!cs->ws->info.has_dedicated_vram) - domains |= RADEON_DOMAIN_GTT; - - enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0; - enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; - struct drm_radeon_cs_reloc *reloc; - int index; - - if (!bo->handle) { - index = radeon_lookup_or_add_slab_buffer(cs, bo); - if (index < 0) - return 0; - - index = cs->csc->slab_buffers[index].u.slab.real_idx; - } else { - index = radeon_lookup_or_add_real_buffer(cs, bo); - } - - reloc = &cs->csc->relocs[index]; - added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain); - reloc->read_domains |= rd; - reloc->write_domain |= wd; - reloc->flags = MAX2(reloc->flags, priority); - cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority; - - if (added_domains & RADEON_DOMAIN_VRAM) - cs->base.used_vram += bo->base.size; - else if (added_domains & RADEON_DOMAIN_GTT) - cs->base.used_gart += bo->base.size; - - return index; + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_bo *bo = (struct radeon_bo*)buf; + enum radeon_bo_domain added_domains; + + /* If VRAM is just stolen system memory, allow both VRAM and + * GTT, whichever has free space. If a buffer is evicted from + * VRAM to GTT, it will stay there. + */ + if (!cs->ws->info.has_dedicated_vram) + domains |= RADEON_DOMAIN_GTT; + + enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0; + enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; + struct drm_radeon_cs_reloc *reloc; + int index; + + if (!bo->handle) { + index = radeon_lookup_or_add_slab_buffer(cs, bo); + if (index < 0) + return 0; + + index = cs->csc->slab_buffers[index].u.slab.real_idx; + } else { + index = radeon_lookup_or_add_real_buffer(cs, bo); + } + + reloc = &cs->csc->relocs[index]; + added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain); + reloc->read_domains |= rd; + reloc->write_domain |= wd; + reloc->flags = MAX2(reloc->flags, priority); + cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority; + + if (added_domains & RADEON_DOMAIN_VRAM) + cs->base.used_vram += bo->base.size; + else if (added_domains & RADEON_DOMAIN_GTT) + cs->base.used_gart += bo->base.size; + + return index; } static int radeon_drm_cs_lookup_buffer(struct radeon_cmdbuf *rcs, - struct pb_buffer *buf) + struct pb_buffer *buf) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf); + return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf); } static bool radeon_drm_cs_validate(struct radeon_cmdbuf *rcs) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - bool status = - cs->base.used_gart < cs->ws->info.gart_size * 0.8 && - cs->base.used_vram < cs->ws->info.vram_size * 0.8; - - if (status) { - cs->csc->num_validated_relocs = cs->csc->num_relocs; - } else { - /* Remove lately-added buffers. The validation failed with them - * and the CS is about to be flushed because of that. Keep only - * the already-validated buffers. */ - unsigned i; - - for (i = cs->csc->num_validated_relocs; i < cs->csc->num_relocs; i++) { - p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references); - radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL); - } - cs->csc->num_relocs = cs->csc->num_validated_relocs; - - /* Flush if there are any relocs. Clean up otherwise. */ - if (cs->csc->num_relocs) { - cs->flush_cs(cs->flush_data, - RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); - } else { - radeon_cs_context_cleanup(cs->csc); - cs->base.used_vram = 0; - cs->base.used_gart = 0; - - assert(cs->base.current.cdw == 0); - if (cs->base.current.cdw != 0) { - fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__); - } - } - } - return status; + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + bool status = + cs->base.used_gart < cs->ws->info.gart_size * 0.8 && + cs->base.used_vram < cs->ws->info.vram_size * 0.8; + + if (status) { + cs->csc->num_validated_relocs = cs->csc->num_relocs; + } else { + /* Remove lately-added buffers. The validation failed with them + * and the CS is about to be flushed because of that. Keep only + * the already-validated buffers. */ + unsigned i; + + for (i = cs->csc->num_validated_relocs; i < cs->csc->num_relocs; i++) { + p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references); + radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL); + } + cs->csc->num_relocs = cs->csc->num_validated_relocs; + + /* Flush if there are any relocs. Clean up otherwise. */ + if (cs->csc->num_relocs) { + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); + } else { + radeon_cs_context_cleanup(cs->csc); + cs->base.used_vram = 0; + cs->base.used_gart = 0; + + assert(cs->base.current.cdw == 0); + if (cs->base.current.cdw != 0) { + fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__); + } + } + } + return status; } static bool radeon_drm_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw, @@ -452,49 +451,49 @@ static bool radeon_drm_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw, static unsigned radeon_drm_cs_get_buffer_list(struct radeon_cmdbuf *rcs, struct radeon_bo_list_item *list) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - int i; - - if (list) { - for (i = 0; i < cs->csc->num_relocs; i++) { - list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size; - list[i].vm_address = cs->csc->relocs_bo[i].bo->va; - list[i].priority_usage = cs->csc->relocs_bo[i].u.real.priority_usage; - } - } - return cs->csc->num_relocs; + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + int i; + + if (list) { + for (i = 0; i < cs->csc->num_relocs; i++) { + list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size; + list[i].vm_address = cs->csc->relocs_bo[i].bo->va; + list[i].priority_usage = cs->csc->relocs_bo[i].u.real.priority_usage; + } + } + return cs->csc->num_relocs; } void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index) { - struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst; - unsigned i; - int r; - - r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS, - &csc->cs, sizeof(struct drm_radeon_cs)); - if (r) { - if (r == -ENOMEM) - fprintf(stderr, "radeon: Not enough memory for command submission.\n"); - else if (debug_get_bool_option("RADEON_DUMP_CS", false)) { - unsigned i; - - fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); - for (i = 0; i < csc->chunks[0].length_dw; i++) { - fprintf(stderr, "0x%08X\n", csc->buf[i]); - } - } else { - fprintf(stderr, "radeon: The kernel rejected CS, " - "see dmesg for more information (%i).\n", r); - } - } - - for (i = 0; i < csc->num_relocs; i++) - p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls); - for (i = 0; i < csc->num_slab_buffers; i++) - p_atomic_dec(&csc->slab_buffers[i].bo->num_active_ioctls); - - radeon_cs_context_cleanup(csc); + struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst; + unsigned i; + int r; + + r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS, + &csc->cs, sizeof(struct drm_radeon_cs)); + if (r) { + if (r == -ENOMEM) + fprintf(stderr, "radeon: Not enough memory for command submission.\n"); + else if (debug_get_bool_option("RADEON_DUMP_CS", false)) { + unsigned i; + + fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); + for (i = 0; i < csc->chunks[0].length_dw; i++) { + fprintf(stderr, "0x%08X\n", csc->buf[i]); + } + } else { + fprintf(stderr, "radeon: The kernel rejected CS, " + "see dmesg for more information (%i).\n", r); + } + } + + for (i = 0; i < csc->num_relocs; i++) + p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls); + for (i = 0; i < csc->num_slab_buffers; i++) + p_atomic_dec(&csc->slab_buffers[i].bo->num_active_ioctls); + + radeon_cs_context_cleanup(csc); } /* @@ -502,11 +501,11 @@ void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index) */ void radeon_drm_cs_sync_flush(struct radeon_cmdbuf *rcs) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - /* Wait for any pending ioctl of this CS to complete. */ - if (util_queue_is_initialized(&cs->ws->cs_queue)) - util_queue_fence_wait(&cs->flush_completed); + /* Wait for any pending ioctl of this CS to complete. */ + if (util_queue_is_initialized(&cs->ws->cs_queue)) + util_queue_fence_wait(&cs->flush_completed); } /* Add the given fence to a slab buffer fence list. @@ -522,41 +521,41 @@ void radeon_drm_cs_sync_flush(struct radeon_cmdbuf *rcs) */ static void radeon_bo_slab_fence(struct radeon_bo *bo, struct radeon_bo *fence) { - unsigned dst; - - assert(fence->num_cs_references); - - /* Cleanup older fences */ - dst = 0; - for (unsigned src = 0; src < bo->u.slab.num_fences; ++src) { - if (bo->u.slab.fences[src]->num_cs_references) { - bo->u.slab.fences[dst] = bo->u.slab.fences[src]; - dst++; - } else { - radeon_bo_reference(&bo->u.slab.fences[src], NULL); - } - } - bo->u.slab.num_fences = dst; - - /* Check available space for the new fence */ - if (bo->u.slab.num_fences >= bo->u.slab.max_fences) { - unsigned new_max_fences = bo->u.slab.max_fences + 1; - struct radeon_bo **new_fences = REALLOC(bo->u.slab.fences, - bo->u.slab.max_fences * sizeof(*new_fences), - new_max_fences * sizeof(*new_fences)); - if (!new_fences) { - fprintf(stderr, "radeon_bo_slab_fence: allocation failure, dropping fence\n"); - return; - } - - bo->u.slab.fences = new_fences; - bo->u.slab.max_fences = new_max_fences; - } - - /* Add the new fence */ - bo->u.slab.fences[bo->u.slab.num_fences] = NULL; - radeon_bo_reference(&bo->u.slab.fences[bo->u.slab.num_fences], fence); - bo->u.slab.num_fences++; + unsigned dst; + + assert(fence->num_cs_references); + + /* Cleanup older fences */ + dst = 0; + for (unsigned src = 0; src < bo->u.slab.num_fences; ++src) { + if (bo->u.slab.fences[src]->num_cs_references) { + bo->u.slab.fences[dst] = bo->u.slab.fences[src]; + dst++; + } else { + radeon_bo_reference(&bo->u.slab.fences[src], NULL); + } + } + bo->u.slab.num_fences = dst; + + /* Check available space for the new fence */ + if (bo->u.slab.num_fences >= bo->u.slab.max_fences) { + unsigned new_max_fences = bo->u.slab.max_fences + 1; + struct radeon_bo **new_fences = REALLOC(bo->u.slab.fences, + bo->u.slab.max_fences * sizeof(*new_fences), + new_max_fences * sizeof(*new_fences)); + if (!new_fences) { + fprintf(stderr, "radeon_bo_slab_fence: allocation failure, dropping fence\n"); + return; + } + + bo->u.slab.fences = new_fences; + bo->u.slab.max_fences = new_max_fences; + } + + /* Add the new fence */ + bo->u.slab.fences[bo->u.slab.num_fences] = NULL; + radeon_bo_reference(&bo->u.slab.fences[bo->u.slab.num_fences], fence); + bo->u.slab.num_fences++; } DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false) @@ -565,241 +564,239 @@ static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs, unsigned flags, struct pipe_fence_handle **pfence) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - struct radeon_cs_context *tmp; - - switch (cs->ring_type) { - case RING_DMA: - /* pad DMA ring to 8 DWs */ - if (cs->ws->info.chip_class <= GFX6) { - while (rcs->current.cdw & 7) - radeon_emit(&cs->base, 0xf0000000); /* NOP packet */ - } else { - while (rcs->current.cdw & 7) - radeon_emit(&cs->base, 0x00000000); /* NOP packet */ - } - break; - case RING_GFX: - /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements - * r6xx, requires at least 4 dw alignment to avoid a hw bug. - */ - if (cs->ws->info.gfx_ib_pad_with_type2) { - while (rcs->current.cdw & 7) - radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */ - } else { - while (rcs->current.cdw & 7) - radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */ - } - break; - case RING_UVD: - while (rcs->current.cdw & 15) + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_cs_context *tmp; + + switch (cs->ring_type) { + case RING_DMA: + /* pad DMA ring to 8 DWs */ + if (cs->ws->info.chip_class <= GFX6) { + while (rcs->current.cdw & 7) + radeon_emit(&cs->base, 0xf0000000); /* NOP packet */ + } else { + while (rcs->current.cdw & 7) + radeon_emit(&cs->base, 0x00000000); /* NOP packet */ + } + break; + case RING_GFX: + /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements + * r6xx, requires at least 4 dw alignment to avoid a hw bug. + */ + if (cs->ws->info.gfx_ib_pad_with_type2) { + while (rcs->current.cdw & 7) radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */ - break; - default: - break; - } - - if (rcs->current.cdw > rcs->current.max_dw) { - fprintf(stderr, "radeon: command stream overflowed\n"); - } - - if (pfence || cs->csc->num_slab_buffers) { - struct pipe_fence_handle *fence; - - if (cs->next_fence) { - fence = cs->next_fence; - cs->next_fence = NULL; - } else { - fence = radeon_cs_create_fence(rcs); - } - - if (fence) { - if (pfence) - radeon_fence_reference(pfence, fence); - - mtx_lock(&cs->ws->bo_fence_lock); - for (unsigned i = 0; i < cs->csc->num_slab_buffers; ++i) { - struct radeon_bo *bo = cs->csc->slab_buffers[i].bo; - p_atomic_inc(&bo->num_active_ioctls); - radeon_bo_slab_fence(bo, (struct radeon_bo *)fence); - } - mtx_unlock(&cs->ws->bo_fence_lock); - - radeon_fence_reference(&fence, NULL); - } - } else { - radeon_fence_reference(&cs->next_fence, NULL); - } - - radeon_drm_cs_sync_flush(rcs); - - /* Swap command streams. */ - tmp = cs->csc; - cs->csc = cs->cst; - cs->cst = tmp; - - /* If the CS is not empty or overflowed, emit it in a separate thread. */ - if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) { - unsigned i, num_relocs; - - num_relocs = cs->cst->num_relocs; - - cs->cst->chunks[0].length_dw = cs->base.current.cdw; - - for (i = 0; i < num_relocs; i++) { - /* Update the number of active asynchronous CS ioctls for the buffer. */ - p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls); - } - - switch (cs->ring_type) { - case RING_DMA: - cs->cst->flags[0] = 0; - cs->cst->flags[1] = RADEON_CS_RING_DMA; - cs->cst->cs.num_chunks = 3; - if (cs->ws->info.r600_has_virtual_memory) { - cs->cst->flags[0] |= RADEON_CS_USE_VM; - } - break; - - case RING_UVD: - cs->cst->flags[0] = 0; - cs->cst->flags[1] = RADEON_CS_RING_UVD; - cs->cst->cs.num_chunks = 3; - break; + } else { + while (rcs->current.cdw & 7) + radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */ + } + break; + case RING_UVD: + while (rcs->current.cdw & 15) + radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */ + break; + default: + break; + } - case RING_VCE: - cs->cst->flags[0] = 0; - cs->cst->flags[1] = RADEON_CS_RING_VCE; - cs->cst->cs.num_chunks = 3; - break; + if (rcs->current.cdw > rcs->current.max_dw) { + fprintf(stderr, "radeon: command stream overflowed\n"); + } + + if (pfence || cs->csc->num_slab_buffers) { + struct pipe_fence_handle *fence; + + if (cs->next_fence) { + fence = cs->next_fence; + cs->next_fence = NULL; + } else { + fence = radeon_cs_create_fence(rcs); + } + + if (fence) { + if (pfence) + radeon_fence_reference(pfence, fence); + + mtx_lock(&cs->ws->bo_fence_lock); + for (unsigned i = 0; i < cs->csc->num_slab_buffers; ++i) { + struct radeon_bo *bo = cs->csc->slab_buffers[i].bo; + p_atomic_inc(&bo->num_active_ioctls); + radeon_bo_slab_fence(bo, (struct radeon_bo *)fence); + } + mtx_unlock(&cs->ws->bo_fence_lock); + + radeon_fence_reference(&fence, NULL); + } + } else { + radeon_fence_reference(&cs->next_fence, NULL); + } - default: - case RING_GFX: - case RING_COMPUTE: - cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS; - cs->cst->flags[1] = RADEON_CS_RING_GFX; + radeon_drm_cs_sync_flush(rcs); + + /* Swap command streams. */ + tmp = cs->csc; + cs->csc = cs->cst; + cs->cst = tmp; + + /* If the CS is not empty or overflowed, emit it in a separate thread. */ + if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) { + unsigned i, num_relocs; + + num_relocs = cs->cst->num_relocs; + + cs->cst->chunks[0].length_dw = cs->base.current.cdw; + + for (i = 0; i < num_relocs; i++) { + /* Update the number of active asynchronous CS ioctls for the buffer. */ + p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls); + } + + switch (cs->ring_type) { + case RING_DMA: + cs->cst->flags[0] = 0; + cs->cst->flags[1] = RADEON_CS_RING_DMA; + cs->cst->cs.num_chunks = 3; + if (cs->ws->info.r600_has_virtual_memory) { + cs->cst->flags[0] |= RADEON_CS_USE_VM; + } + break; + + case RING_UVD: + cs->cst->flags[0] = 0; + cs->cst->flags[1] = RADEON_CS_RING_UVD; + cs->cst->cs.num_chunks = 3; + break; + + case RING_VCE: + cs->cst->flags[0] = 0; + cs->cst->flags[1] = RADEON_CS_RING_VCE; + cs->cst->cs.num_chunks = 3; + break; + + default: + case RING_GFX: + case RING_COMPUTE: + cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS; + cs->cst->flags[1] = RADEON_CS_RING_GFX; + cs->cst->cs.num_chunks = 3; + + if (cs->ws->info.r600_has_virtual_memory) { + cs->cst->flags[0] |= RADEON_CS_USE_VM; + cs->cst->cs.num_chunks = 3; + } + if (flags & PIPE_FLUSH_END_OF_FRAME) { + cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; + cs->cst->cs.num_chunks = 3; + } + if (cs->ring_type == RING_COMPUTE) { + cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; + } + break; + } + + if (util_queue_is_initialized(&cs->ws->cs_queue)) { + util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed, + radeon_drm_cs_emit_ioctl_oneshot, NULL, 0); + if (!(flags & PIPE_FLUSH_ASYNC)) + radeon_drm_cs_sync_flush(rcs); + } else { + radeon_drm_cs_emit_ioctl_oneshot(cs, 0); + } + } else { + radeon_cs_context_cleanup(cs->cst); + } - if (cs->ws->info.r600_has_virtual_memory) { - cs->cst->flags[0] |= RADEON_CS_USE_VM; - cs->cst->cs.num_chunks = 3; - } - if (flags & PIPE_FLUSH_END_OF_FRAME) { - cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; - cs->cst->cs.num_chunks = 3; - } - if (cs->ring_type == RING_COMPUTE) { - cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; - cs->cst->cs.num_chunks = 3; - } - break; - } - - if (util_queue_is_initialized(&cs->ws->cs_queue)) { - util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed, - radeon_drm_cs_emit_ioctl_oneshot, NULL, 0); - if (!(flags & PIPE_FLUSH_ASYNC)) - radeon_drm_cs_sync_flush(rcs); - } else { - radeon_drm_cs_emit_ioctl_oneshot(cs, 0); - } - } else { - radeon_cs_context_cleanup(cs->cst); - } - - /* Prepare a new CS. */ - cs->base.current.buf = cs->csc->buf; - cs->base.current.cdw = 0; - cs->base.used_vram = 0; - cs->base.used_gart = 0; - - if (cs->ring_type == RING_GFX) - cs->ws->num_gfx_IBs++; - else if (cs->ring_type == RING_DMA) - cs->ws->num_sdma_IBs++; - return 0; + /* Prepare a new CS. */ + cs->base.current.buf = cs->csc->buf; + cs->base.current.cdw = 0; + cs->base.used_vram = 0; + cs->base.used_gart = 0; + + if (cs->ring_type == RING_GFX) + cs->ws->num_gfx_IBs++; + else if (cs->ring_type == RING_DMA) + cs->ws->num_sdma_IBs++; + return 0; } static void radeon_drm_cs_destroy(struct radeon_cmdbuf *rcs) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - - radeon_drm_cs_sync_flush(rcs); - util_queue_fence_destroy(&cs->flush_completed); - radeon_cs_context_cleanup(&cs->csc1); - radeon_cs_context_cleanup(&cs->csc2); - p_atomic_dec(&cs->ws->num_cs); - radeon_destroy_cs_context(&cs->csc1); - radeon_destroy_cs_context(&cs->csc2); - radeon_fence_reference(&cs->next_fence, NULL); - FREE(cs); + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + + radeon_drm_cs_sync_flush(rcs); + util_queue_fence_destroy(&cs->flush_completed); + radeon_cs_context_cleanup(&cs->csc1); + radeon_cs_context_cleanup(&cs->csc2); + p_atomic_dec(&cs->ws->num_cs); + radeon_destroy_cs_context(&cs->csc1); + radeon_destroy_cs_context(&cs->csc2); + radeon_fence_reference(&cs->next_fence, NULL); + FREE(cs); } static bool radeon_bo_is_referenced(struct radeon_cmdbuf *rcs, struct pb_buffer *_buf, enum radeon_bo_usage usage) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - struct radeon_bo *bo = (struct radeon_bo*)_buf; - int index; + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_bo *bo = (struct radeon_bo*)_buf; + int index; - if (!bo->num_cs_references) - return false; + if (!bo->num_cs_references) + return false; - index = radeon_lookup_buffer(cs->csc, bo); - if (index == -1) - return false; + index = radeon_lookup_buffer(cs->csc, bo); + if (index == -1) + return false; - if (!bo->handle) - index = cs->csc->slab_buffers[index].u.slab.real_idx; + if (!bo->handle) + index = cs->csc->slab_buffers[index].u.slab.real_idx; - if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain) - return true; - if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains) - return true; + if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain) + return true; + if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains) + return true; - return false; + return false; } /* FENCES */ -static struct pipe_fence_handle * -radeon_cs_create_fence(struct radeon_cmdbuf *rcs) +static struct pipe_fence_handle *radeon_cs_create_fence(struct radeon_cmdbuf *rcs) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - struct pb_buffer *fence; - - /* Create a fence, which is a dummy BO. */ - fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, - RADEON_DOMAIN_GTT, - RADEON_FLAG_NO_SUBALLOC - | RADEON_FLAG_NO_INTERPROCESS_SHARING); - if (!fence) - return NULL; - - /* Add the fence as a dummy relocation. */ - cs->ws->base.cs_add_buffer(rcs, fence, + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct pb_buffer *fence; + + /* Create a fence, which is a dummy BO. */ + fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, + RADEON_DOMAIN_GTT, + RADEON_FLAG_NO_SUBALLOC + | RADEON_FLAG_NO_INTERPROCESS_SHARING); + if (!fence) + return NULL; + + /* Add the fence as a dummy relocation. */ + cs->ws->base.cs_add_buffer(rcs, fence, RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT, RADEON_PRIO_FENCE); - return (struct pipe_fence_handle*)fence; + return (struct pipe_fence_handle*)fence; } static bool radeon_fence_wait(struct radeon_winsys *ws, struct pipe_fence_handle *fence, uint64_t timeout) { - return ws->buffer_wait((struct pb_buffer*)fence, timeout, - RADEON_USAGE_READWRITE); + return ws->buffer_wait((struct pb_buffer*)fence, timeout, + RADEON_USAGE_READWRITE); } static void radeon_fence_reference(struct pipe_fence_handle **dst, struct pipe_fence_handle *src) { - pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); + pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); } -static struct pipe_fence_handle * -radeon_drm_cs_get_next_fence(struct radeon_cmdbuf *rcs) +static struct pipe_fence_handle *radeon_drm_cs_get_next_fence(struct radeon_cmdbuf *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct pipe_fence_handle *fence = NULL; @@ -838,21 +835,21 @@ radeon_drm_cs_add_fence_dependency(struct radeon_cmdbuf *cs, void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) { - ws->base.ctx_create = radeon_drm_ctx_create; - ws->base.ctx_destroy = radeon_drm_ctx_destroy; - ws->base.ctx_query_reset_status = radeon_drm_ctx_query_reset_status; - ws->base.cs_create = radeon_drm_cs_create; - ws->base.cs_destroy = radeon_drm_cs_destroy; - ws->base.cs_add_buffer = radeon_drm_cs_add_buffer; - ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer; - ws->base.cs_validate = radeon_drm_cs_validate; - ws->base.cs_check_space = radeon_drm_cs_check_space; - ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list; - ws->base.cs_flush = radeon_drm_cs_flush; - ws->base.cs_get_next_fence = radeon_drm_cs_get_next_fence; - ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; - ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; - ws->base.cs_add_fence_dependency = radeon_drm_cs_add_fence_dependency; - ws->base.fence_wait = radeon_fence_wait; - ws->base.fence_reference = radeon_fence_reference; + ws->base.ctx_create = radeon_drm_ctx_create; + ws->base.ctx_destroy = radeon_drm_ctx_destroy; + ws->base.ctx_query_reset_status = radeon_drm_ctx_query_reset_status; + ws->base.cs_create = radeon_drm_cs_create; + ws->base.cs_destroy = radeon_drm_cs_destroy; + ws->base.cs_add_buffer = radeon_drm_cs_add_buffer; + ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer; + ws->base.cs_validate = radeon_drm_cs_validate; + ws->base.cs_check_space = radeon_drm_cs_check_space; + ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list; + ws->base.cs_flush = radeon_drm_cs_flush; + ws->base.cs_get_next_fence = radeon_drm_cs_get_next_fence; + ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; + ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; + ws->base.cs_add_fence_dependency = radeon_drm_cs_add_fence_dependency; + ws->base.fence_wait = radeon_fence_wait; + ws->base.fence_reference = radeon_fence_reference; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 4fa007afa00..b07ffc19ec1 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -35,63 +35,63 @@ struct radeon_ctx { }; struct radeon_bo_item { - struct radeon_bo *bo; - union { - struct { - uint32_t priority_usage; - } real; - struct { - unsigned real_idx; - } slab; - } u; + struct radeon_bo *bo; + union { + struct { + uint32_t priority_usage; + } real; + struct { + unsigned real_idx; + } slab; + } u; }; struct radeon_cs_context { - uint32_t buf[16 * 1024]; - - int fd; - struct drm_radeon_cs cs; - struct drm_radeon_cs_chunk chunks[3]; - uint64_t chunk_array[3]; - uint32_t flags[2]; - - /* Buffers. */ - unsigned max_relocs; - unsigned num_relocs; - unsigned num_validated_relocs; - struct radeon_bo_item *relocs_bo; - struct drm_radeon_cs_reloc *relocs; - - unsigned num_slab_buffers; - unsigned max_slab_buffers; - struct radeon_bo_item *slab_buffers; - - int reloc_indices_hashlist[4096]; + uint32_t buf[16 * 1024]; + + int fd; + struct drm_radeon_cs cs; + struct drm_radeon_cs_chunk chunks[3]; + uint64_t chunk_array[3]; + uint32_t flags[2]; + + /* Buffers. */ + unsigned max_relocs; + unsigned num_relocs; + unsigned num_validated_relocs; + struct radeon_bo_item *relocs_bo; + struct drm_radeon_cs_reloc *relocs; + + unsigned num_slab_buffers; + unsigned max_slab_buffers; + struct radeon_bo_item *slab_buffers; + + int reloc_indices_hashlist[4096]; }; struct radeon_drm_cs { - struct radeon_cmdbuf base; - enum ring_type ring_type; - - /* We flip between these two CS. While one is being consumed - * by the kernel in another thread, the other one is being filled - * by the pipe driver. */ - struct radeon_cs_context csc1; - struct radeon_cs_context csc2; - /* The currently-used CS. */ - struct radeon_cs_context *csc; - /* The CS being currently-owned by the other thread. */ - struct radeon_cs_context *cst; - - /* The winsys. */ - struct radeon_drm_winsys *ws; - - /* Flush CS. */ - void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence); - void *flush_data; - - struct util_queue_fence flush_completed; - struct pipe_fence_handle *next_fence; + struct radeon_cmdbuf base; + enum ring_type ring_type; + + /* We flip between these two CS. While one is being consumed + * by the kernel in another thread, the other one is being filled + * by the pipe driver. */ + struct radeon_cs_context csc1; + struct radeon_cs_context csc2; + /* The currently-used CS. */ + struct radeon_cs_context *csc; + /* The CS being currently-owned by the other thread. */ + struct radeon_cs_context *cst; + + /* The winsys. */ + struct radeon_drm_winsys *ws; + + /* Flush CS. */ + void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence); + void *flush_data; + + struct util_queue_fence flush_completed; + struct pipe_fence_handle *next_fence; }; int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo); @@ -99,41 +99,41 @@ int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo); static inline struct radeon_drm_cs * radeon_drm_cs(struct radeon_cmdbuf *base) { - return (struct radeon_drm_cs*)base; + return (struct radeon_drm_cs*)base; } static inline bool radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs, struct radeon_bo *bo) { - int num_refs = bo->num_cs_references; - return num_refs == bo->rws->num_cs || - (num_refs && radeon_lookup_buffer(cs->csc, bo) != -1); + int num_refs = bo->num_cs_references; + return num_refs == bo->rws->num_cs || + (num_refs && radeon_lookup_buffer(cs->csc, bo) != -1); } static inline bool radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs, struct radeon_bo *bo) { - int index; + int index; - if (!bo->num_cs_references) - return false; + if (!bo->num_cs_references) + return false; - index = radeon_lookup_buffer(cs->csc, bo); - if (index == -1) - return false; + index = radeon_lookup_buffer(cs->csc, bo); + if (index == -1) + return false; - if (!bo->handle) - index = cs->csc->slab_buffers[index].u.slab.real_idx; + if (!bo->handle) + index = cs->csc->slab_buffers[index].u.slab.real_idx; - return cs->csc->relocs[index].write_domain != 0; + return cs->csc->relocs[index].write_domain != 0; } static inline bool radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) { - return bo->num_cs_references != 0; + return bo->num_cs_references != 0; } void radeon_drm_cs_sync_flush(struct radeon_cmdbuf *rcs); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c index cd51fe707e7..41d4bc15a00 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c @@ -30,16 +30,16 @@ static unsigned cik_get_macro_tile_index(struct radeon_surf *surf) { - unsigned index, tileb; + unsigned index, tileb; - tileb = 8 * 8 * surf->bpe; - tileb = MIN2(surf->u.legacy.tile_split, tileb); + tileb = 8 * 8 * surf->bpe; + tileb = MIN2(surf->u.legacy.tile_split, tileb); - for (index = 0; tileb > 64; index++) - tileb >>= 1; + for (index = 0; tileb > 64; index++) + tileb >>= 1; - assert(index < 16); - return index; + assert(index < 16); + return index; } #define G_009910_MICRO_TILE_MODE(x) (((x) >> 0) & 0x03) @@ -48,43 +48,43 @@ static unsigned cik_get_macro_tile_index(struct radeon_surf *surf) static void set_micro_tile_mode(struct radeon_surf *surf, struct radeon_info *info) { - uint32_t tile_mode; + uint32_t tile_mode; - if (info->chip_class < GFX6) { - surf->micro_tile_mode = 0; - return; - } + if (info->chip_class < GFX6) { + surf->micro_tile_mode = 0; + return; + } - tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]]; + tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]]; - if (info->chip_class >= GFX7) - surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); - else - surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); + if (info->chip_class >= GFX7) + surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); + else + surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); } static void surf_level_winsys_to_drm(struct radeon_surface_level *level_drm, const struct legacy_surf_level *level_ws, unsigned bpe) { - level_drm->offset = level_ws->offset; - level_drm->slice_size = (uint64_t)level_ws->slice_size_dw * 4; - level_drm->nblk_x = level_ws->nblk_x; - level_drm->nblk_y = level_ws->nblk_y; - level_drm->pitch_bytes = level_ws->nblk_x * bpe; - level_drm->mode = level_ws->mode; + level_drm->offset = level_ws->offset; + level_drm->slice_size = (uint64_t)level_ws->slice_size_dw * 4; + level_drm->nblk_x = level_ws->nblk_x; + level_drm->nblk_y = level_ws->nblk_y; + level_drm->pitch_bytes = level_ws->nblk_x * bpe; + level_drm->mode = level_ws->mode; } static void surf_level_drm_to_winsys(struct legacy_surf_level *level_ws, const struct radeon_surface_level *level_drm, unsigned bpe) { - level_ws->offset = level_drm->offset; - level_ws->slice_size_dw = level_drm->slice_size / 4; - level_ws->nblk_x = level_drm->nblk_x; - level_ws->nblk_y = level_drm->nblk_y; - level_ws->mode = level_drm->mode; - assert(level_drm->nblk_x * bpe == level_drm->pitch_bytes); + level_ws->offset = level_drm->offset; + level_ws->slice_size_dw = level_drm->slice_size / 4; + level_ws->nblk_x = level_drm->nblk_x; + level_ws->nblk_y = level_drm->nblk_y; + level_ws->mode = level_drm->mode; + assert(level_drm->nblk_x * bpe == level_drm->pitch_bytes); } static void surf_winsys_to_drm(struct radeon_surface *surf_drm, @@ -93,257 +93,257 @@ static void surf_winsys_to_drm(struct radeon_surface *surf_drm, enum radeon_surf_mode mode, const struct radeon_surf *surf_ws) { - int i; - - memset(surf_drm, 0, sizeof(*surf_drm)); - - surf_drm->npix_x = tex->width0; - surf_drm->npix_y = tex->height0; - surf_drm->npix_z = tex->depth0; - surf_drm->blk_w = util_format_get_blockwidth(tex->format); - surf_drm->blk_h = util_format_get_blockheight(tex->format); - surf_drm->blk_d = 1; - surf_drm->array_size = 1; - surf_drm->last_level = tex->last_level; - surf_drm->bpe = bpe; - surf_drm->nsamples = tex->nr_samples ? tex->nr_samples : 1; - - surf_drm->flags = flags; - surf_drm->flags = RADEON_SURF_CLR(surf_drm->flags, TYPE); - surf_drm->flags = RADEON_SURF_CLR(surf_drm->flags, MODE); - surf_drm->flags |= RADEON_SURF_SET(mode, MODE) | - RADEON_SURF_HAS_SBUFFER_MIPTREE | - RADEON_SURF_HAS_TILE_MODE_INDEX; - - switch (tex->target) { - case PIPE_TEXTURE_1D: - surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE); - break; - case PIPE_TEXTURE_RECT: - case PIPE_TEXTURE_2D: - surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE); - break; - case PIPE_TEXTURE_3D: - surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE); - break; - case PIPE_TEXTURE_1D_ARRAY: - surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE); - surf_drm->array_size = tex->array_size; - break; - case PIPE_TEXTURE_CUBE_ARRAY: /* cube array layout like 2d array */ - assert(tex->array_size % 6 == 0); - /* fall through */ - case PIPE_TEXTURE_2D_ARRAY: - surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE); - surf_drm->array_size = tex->array_size; - break; - case PIPE_TEXTURE_CUBE: - surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE); - break; - case PIPE_BUFFER: - default: - assert(0); - } - - surf_drm->bo_size = surf_ws->surf_size; - surf_drm->bo_alignment = surf_ws->surf_alignment; - - surf_drm->bankw = surf_ws->u.legacy.bankw; - surf_drm->bankh = surf_ws->u.legacy.bankh; - surf_drm->mtilea = surf_ws->u.legacy.mtilea; - surf_drm->tile_split = surf_ws->u.legacy.tile_split; - - for (i = 0; i <= surf_drm->last_level; i++) { - surf_level_winsys_to_drm(&surf_drm->level[i], &surf_ws->u.legacy.level[i], - bpe * surf_drm->nsamples); - - surf_drm->tiling_index[i] = surf_ws->u.legacy.tiling_index[i]; - } - - if (flags & RADEON_SURF_SBUFFER) { - surf_drm->stencil_tile_split = surf_ws->u.legacy.stencil_tile_split; - - for (i = 0; i <= surf_drm->last_level; i++) { - surf_level_winsys_to_drm(&surf_drm->stencil_level[i], - &surf_ws->u.legacy.stencil_level[i], - surf_drm->nsamples); - surf_drm->stencil_tiling_index[i] = surf_ws->u.legacy.stencil_tiling_index[i]; - } - } + int i; + + memset(surf_drm, 0, sizeof(*surf_drm)); + + surf_drm->npix_x = tex->width0; + surf_drm->npix_y = tex->height0; + surf_drm->npix_z = tex->depth0; + surf_drm->blk_w = util_format_get_blockwidth(tex->format); + surf_drm->blk_h = util_format_get_blockheight(tex->format); + surf_drm->blk_d = 1; + surf_drm->array_size = 1; + surf_drm->last_level = tex->last_level; + surf_drm->bpe = bpe; + surf_drm->nsamples = tex->nr_samples ? tex->nr_samples : 1; + + surf_drm->flags = flags; + surf_drm->flags = RADEON_SURF_CLR(surf_drm->flags, TYPE); + surf_drm->flags = RADEON_SURF_CLR(surf_drm->flags, MODE); + surf_drm->flags |= RADEON_SURF_SET(mode, MODE) | + RADEON_SURF_HAS_SBUFFER_MIPTREE | + RADEON_SURF_HAS_TILE_MODE_INDEX; + + switch (tex->target) { + case PIPE_TEXTURE_1D: + surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE); + break; + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D: + surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE); + break; + case PIPE_TEXTURE_3D: + surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE); + break; + case PIPE_TEXTURE_1D_ARRAY: + surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE); + surf_drm->array_size = tex->array_size; + break; + case PIPE_TEXTURE_CUBE_ARRAY: /* cube array layout like 2d array */ + assert(tex->array_size % 6 == 0); + /* fall through */ + case PIPE_TEXTURE_2D_ARRAY: + surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE); + surf_drm->array_size = tex->array_size; + break; + case PIPE_TEXTURE_CUBE: + surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE); + break; + case PIPE_BUFFER: + default: + assert(0); + } + + surf_drm->bo_size = surf_ws->surf_size; + surf_drm->bo_alignment = surf_ws->surf_alignment; + + surf_drm->bankw = surf_ws->u.legacy.bankw; + surf_drm->bankh = surf_ws->u.legacy.bankh; + surf_drm->mtilea = surf_ws->u.legacy.mtilea; + surf_drm->tile_split = surf_ws->u.legacy.tile_split; + + for (i = 0; i <= surf_drm->last_level; i++) { + surf_level_winsys_to_drm(&surf_drm->level[i], &surf_ws->u.legacy.level[i], + bpe * surf_drm->nsamples); + + surf_drm->tiling_index[i] = surf_ws->u.legacy.tiling_index[i]; + } + + if (flags & RADEON_SURF_SBUFFER) { + surf_drm->stencil_tile_split = surf_ws->u.legacy.stencil_tile_split; + + for (i = 0; i <= surf_drm->last_level; i++) { + surf_level_winsys_to_drm(&surf_drm->stencil_level[i], + &surf_ws->u.legacy.stencil_level[i], + surf_drm->nsamples); + surf_drm->stencil_tiling_index[i] = surf_ws->u.legacy.stencil_tiling_index[i]; + } + } } static void surf_drm_to_winsys(struct radeon_drm_winsys *ws, struct radeon_surf *surf_ws, const struct radeon_surface *surf_drm) { - int i; - - memset(surf_ws, 0, sizeof(*surf_ws)); - - surf_ws->blk_w = surf_drm->blk_w; - surf_ws->blk_h = surf_drm->blk_h; - surf_ws->bpe = surf_drm->bpe; - surf_ws->is_linear = surf_drm->level[0].mode <= RADEON_SURF_MODE_LINEAR_ALIGNED; - surf_ws->has_stencil = !!(surf_drm->flags & RADEON_SURF_SBUFFER); - surf_ws->flags = surf_drm->flags; - - surf_ws->surf_size = surf_drm->bo_size; - surf_ws->surf_alignment = surf_drm->bo_alignment; - - surf_ws->u.legacy.bankw = surf_drm->bankw; - surf_ws->u.legacy.bankh = surf_drm->bankh; - surf_ws->u.legacy.mtilea = surf_drm->mtilea; - surf_ws->u.legacy.tile_split = surf_drm->tile_split; - - surf_ws->u.legacy.macro_tile_index = cik_get_macro_tile_index(surf_ws); - - for (i = 0; i <= surf_drm->last_level; i++) { - surf_level_drm_to_winsys(&surf_ws->u.legacy.level[i], &surf_drm->level[i], - surf_drm->bpe * surf_drm->nsamples); - surf_ws->u.legacy.tiling_index[i] = surf_drm->tiling_index[i]; - } - - if (surf_ws->flags & RADEON_SURF_SBUFFER) { - surf_ws->u.legacy.stencil_tile_split = surf_drm->stencil_tile_split; - - for (i = 0; i <= surf_drm->last_level; i++) { - surf_level_drm_to_winsys(&surf_ws->u.legacy.stencil_level[i], - &surf_drm->stencil_level[i], - surf_drm->nsamples); - surf_ws->u.legacy.stencil_tiling_index[i] = surf_drm->stencil_tiling_index[i]; - } - } - - set_micro_tile_mode(surf_ws, &ws->info); - surf_ws->is_displayable = surf_ws->is_linear || - surf_ws->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY || - surf_ws->micro_tile_mode == RADEON_MICRO_MODE_ROTATED; + int i; + + memset(surf_ws, 0, sizeof(*surf_ws)); + + surf_ws->blk_w = surf_drm->blk_w; + surf_ws->blk_h = surf_drm->blk_h; + surf_ws->bpe = surf_drm->bpe; + surf_ws->is_linear = surf_drm->level[0].mode <= RADEON_SURF_MODE_LINEAR_ALIGNED; + surf_ws->has_stencil = !!(surf_drm->flags & RADEON_SURF_SBUFFER); + surf_ws->flags = surf_drm->flags; + + surf_ws->surf_size = surf_drm->bo_size; + surf_ws->surf_alignment = surf_drm->bo_alignment; + + surf_ws->u.legacy.bankw = surf_drm->bankw; + surf_ws->u.legacy.bankh = surf_drm->bankh; + surf_ws->u.legacy.mtilea = surf_drm->mtilea; + surf_ws->u.legacy.tile_split = surf_drm->tile_split; + + surf_ws->u.legacy.macro_tile_index = cik_get_macro_tile_index(surf_ws); + + for (i = 0; i <= surf_drm->last_level; i++) { + surf_level_drm_to_winsys(&surf_ws->u.legacy.level[i], &surf_drm->level[i], + surf_drm->bpe * surf_drm->nsamples); + surf_ws->u.legacy.tiling_index[i] = surf_drm->tiling_index[i]; + } + + if (surf_ws->flags & RADEON_SURF_SBUFFER) { + surf_ws->u.legacy.stencil_tile_split = surf_drm->stencil_tile_split; + + for (i = 0; i <= surf_drm->last_level; i++) { + surf_level_drm_to_winsys(&surf_ws->u.legacy.stencil_level[i], + &surf_drm->stencil_level[i], + surf_drm->nsamples); + surf_ws->u.legacy.stencil_tiling_index[i] = surf_drm->stencil_tiling_index[i]; + } + } + + set_micro_tile_mode(surf_ws, &ws->info); + surf_ws->is_displayable = surf_ws->is_linear || + surf_ws->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY || + surf_ws->micro_tile_mode == RADEON_MICRO_MODE_ROTATED; } static void si_compute_cmask(const struct radeon_info *info, - const struct ac_surf_config *config, - struct radeon_surf *surf) + const struct ac_surf_config *config, + struct radeon_surf *surf) { - unsigned pipe_interleave_bytes = info->pipe_interleave_bytes; - unsigned num_pipes = info->num_tile_pipes; - unsigned cl_width, cl_height; - - if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) - return; - - assert(info->chip_class <= GFX8); - - switch (num_pipes) { - case 2: - cl_width = 32; - cl_height = 16; - break; - case 4: - cl_width = 32; - cl_height = 32; - break; - case 8: - cl_width = 64; - cl_height = 32; - break; - case 16: /* Hawaii */ - cl_width = 64; - cl_height = 64; - break; - default: - assert(0); - return; - } - - unsigned base_align = num_pipes * pipe_interleave_bytes; - - unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8); - unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8); - unsigned slice_elements = (width * height) / (8*8); - - /* Each element of CMASK is a nibble. */ - unsigned slice_bytes = slice_elements / 2; - - surf->u.legacy.cmask_slice_tile_max = (width * height) / (128*128); - if (surf->u.legacy.cmask_slice_tile_max) - surf->u.legacy.cmask_slice_tile_max -= 1; - - unsigned num_layers; - if (config->is_3d) - num_layers = config->info.depth; - else if (config->is_cube) - num_layers = 6; - else - num_layers = config->info.array_size; - - surf->cmask_alignment = MAX2(256, base_align); - surf->cmask_size = align(slice_bytes, base_align) * num_layers; + unsigned pipe_interleave_bytes = info->pipe_interleave_bytes; + unsigned num_pipes = info->num_tile_pipes; + unsigned cl_width, cl_height; + + if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) + return; + + assert(info->chip_class <= GFX8); + + switch (num_pipes) { + case 2: + cl_width = 32; + cl_height = 16; + break; + case 4: + cl_width = 32; + cl_height = 32; + break; + case 8: + cl_width = 64; + cl_height = 32; + break; + case 16: /* Hawaii */ + cl_width = 64; + cl_height = 64; + break; + default: + assert(0); + return; + } + + unsigned base_align = num_pipes * pipe_interleave_bytes; + + unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8); + unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8); + unsigned slice_elements = (width * height) / (8*8); + + /* Each element of CMASK is a nibble. */ + unsigned slice_bytes = slice_elements / 2; + + surf->u.legacy.cmask_slice_tile_max = (width * height) / (128*128); + if (surf->u.legacy.cmask_slice_tile_max) + surf->u.legacy.cmask_slice_tile_max -= 1; + + unsigned num_layers; + if (config->is_3d) + num_layers = config->info.depth; + else if (config->is_cube) + num_layers = 6; + else + num_layers = config->info.array_size; + + surf->cmask_alignment = MAX2(256, base_align); + surf->cmask_size = align(slice_bytes, base_align) * num_layers; } static void si_compute_htile(const struct radeon_info *info, struct radeon_surf *surf, unsigned num_layers) { - unsigned cl_width, cl_height, width, height; - unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; - unsigned num_pipes = info->num_tile_pipes; + unsigned cl_width, cl_height, width, height; + unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; + unsigned num_pipes = info->num_tile_pipes; - surf->htile_size = 0; + surf->htile_size = 0; - if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) || - surf->flags & RADEON_SURF_NO_HTILE) - return; + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) || + surf->flags & RADEON_SURF_NO_HTILE) + return; - if (surf->u.legacy.level[0].mode == RADEON_SURF_MODE_1D && - !info->htile_cmask_support_1d_tiling) - return; + if (surf->u.legacy.level[0].mode == RADEON_SURF_MODE_1D && + !info->htile_cmask_support_1d_tiling) + return; - /* Overalign HTILE on P2 configs to work around GPU hangs in + /* Overalign HTILE on P2 configs to work around GPU hangs in * piglit/depthstencil-render-miplevels 585. * * This has been confirmed to help Kabini & Stoney, where the hangs * are always reproducible. I think I have seen the test hang * on Carrizo too, though it was very rare there. */ - if (info->chip_class >= GFX7 && num_pipes < 4) - num_pipes = 4; - - switch (num_pipes) { - case 1: - cl_width = 32; - cl_height = 16; - break; - case 2: - cl_width = 32; - cl_height = 32; - break; - case 4: - cl_width = 64; - cl_height = 32; - break; - case 8: - cl_width = 64; - cl_height = 64; - break; - case 16: - cl_width = 128; - cl_height = 64; - break; - default: - assert(0); - return; - } - - width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8); - height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8); - - slice_elements = (width * height) / (8 * 8); - slice_bytes = slice_elements * 4; - - pipe_interleave_bytes = info->pipe_interleave_bytes; - base_align = num_pipes * pipe_interleave_bytes; - - surf->htile_alignment = base_align; - surf->htile_size = num_layers * align(slice_bytes, base_align); + if (info->chip_class >= GFX7 && num_pipes < 4) + num_pipes = 4; + + switch (num_pipes) { + case 1: + cl_width = 32; + cl_height = 16; + break; + case 2: + cl_width = 32; + cl_height = 32; + break; + case 4: + cl_width = 64; + cl_height = 32; + break; + case 8: + cl_width = 64; + cl_height = 64; + break; + case 16: + cl_width = 128; + cl_height = 64; + break; + default: + assert(0); + return; + } + + width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8); + height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8); + + slice_elements = (width * height) / (8 * 8); + slice_bytes = slice_elements * 4; + + pipe_interleave_bytes = info->pipe_interleave_bytes; + base_align = num_pipes * pipe_interleave_bytes; + + surf->htile_alignment = base_align; + surf->htile_size = num_layers * align(slice_bytes, base_align); } static int radeon_winsys_surface_init(struct radeon_winsys *rws, @@ -352,114 +352,114 @@ static int radeon_winsys_surface_init(struct radeon_winsys *rws, enum radeon_surf_mode mode, struct radeon_surf *surf_ws) { - struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; - struct radeon_surface surf_drm; - int r; - - surf_winsys_to_drm(&surf_drm, tex, flags, bpe, mode, surf_ws); - - if (!(flags & (RADEON_SURF_IMPORTED | RADEON_SURF_FMASK))) { - r = radeon_surface_best(ws->surf_man, &surf_drm); - if (r) - return r; - } - - r = radeon_surface_init(ws->surf_man, &surf_drm); - if (r) - return r; - - surf_drm_to_winsys(ws, surf_ws, &surf_drm); - - /* Compute FMASK. */ - if (ws->gen == DRV_SI && - tex->nr_samples >= 2 && - !(flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_FMASK | RADEON_SURF_NO_FMASK))) { - /* FMASK is allocated like an ordinary texture. */ - struct pipe_resource templ = *tex; - struct radeon_surf fmask = {}; - unsigned fmask_flags, bpe; - - templ.nr_samples = 1; - fmask_flags = flags | RADEON_SURF_FMASK; - - switch (tex->nr_samples) { - case 2: - case 4: - bpe = 1; - break; - case 8: - bpe = 4; - break; - default: - fprintf(stderr, "radeon: Invalid sample count for FMASK allocation.\n"); - return -1; - } - - if (radeon_winsys_surface_init(rws, &templ, fmask_flags, bpe, - RADEON_SURF_MODE_2D, &fmask)) { - fprintf(stderr, "Got error in surface_init while allocating FMASK.\n"); - return -1; - } - - assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); - - surf_ws->fmask_size = fmask.surf_size; - surf_ws->fmask_alignment = MAX2(256, fmask.surf_alignment); - surf_ws->fmask_tile_swizzle = fmask.tile_swizzle; - - surf_ws->u.legacy.fmask.slice_tile_max = + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; + struct radeon_surface surf_drm; + int r; + + surf_winsys_to_drm(&surf_drm, tex, flags, bpe, mode, surf_ws); + + if (!(flags & (RADEON_SURF_IMPORTED | RADEON_SURF_FMASK))) { + r = radeon_surface_best(ws->surf_man, &surf_drm); + if (r) + return r; + } + + r = radeon_surface_init(ws->surf_man, &surf_drm); + if (r) + return r; + + surf_drm_to_winsys(ws, surf_ws, &surf_drm); + + /* Compute FMASK. */ + if (ws->gen == DRV_SI && + tex->nr_samples >= 2 && + !(flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_FMASK | RADEON_SURF_NO_FMASK))) { + /* FMASK is allocated like an ordinary texture. */ + struct pipe_resource templ = *tex; + struct radeon_surf fmask = {}; + unsigned fmask_flags, bpe; + + templ.nr_samples = 1; + fmask_flags = flags | RADEON_SURF_FMASK; + + switch (tex->nr_samples) { + case 2: + case 4: + bpe = 1; + break; + case 8: + bpe = 4; + break; + default: + fprintf(stderr, "radeon: Invalid sample count for FMASK allocation.\n"); + return -1; + } + + if (radeon_winsys_surface_init(rws, &templ, fmask_flags, bpe, + RADEON_SURF_MODE_2D, &fmask)) { + fprintf(stderr, "Got error in surface_init while allocating FMASK.\n"); + return -1; + } + + assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); + + surf_ws->fmask_size = fmask.surf_size; + surf_ws->fmask_alignment = MAX2(256, fmask.surf_alignment); + surf_ws->fmask_tile_swizzle = fmask.tile_swizzle; + + surf_ws->u.legacy.fmask.slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64; - if (surf_ws->u.legacy.fmask.slice_tile_max) - surf_ws->u.legacy.fmask.slice_tile_max -= 1; - - surf_ws->u.legacy.fmask.tiling_index = fmask.u.legacy.tiling_index[0]; - surf_ws->u.legacy.fmask.bankh = fmask.u.legacy.bankh; - surf_ws->u.legacy.fmask.pitch_in_pixels = fmask.u.legacy.level[0].nblk_x; - } - - if (ws->gen == DRV_SI && - (tex->nr_samples <= 1 || surf_ws->fmask_size)) { - struct ac_surf_config config; - - /* Only these fields need to be set for the CMASK computation. */ - config.info.width = tex->width0; - config.info.height = tex->height0; - config.info.depth = tex->depth0; - config.info.array_size = tex->array_size; - config.is_3d = !!(tex->target == PIPE_TEXTURE_3D); - config.is_cube = !!(tex->target == PIPE_TEXTURE_CUBE); - - si_compute_cmask(&ws->info, &config, surf_ws); - } - - if (ws->gen == DRV_SI) { - si_compute_htile(&ws->info, surf_ws, util_num_layers(tex, 0)); - - /* Determine the memory layout of multiple allocations in one buffer. */ - surf_ws->total_size = surf_ws->surf_size; - - if (surf_ws->htile_size) { - surf_ws->htile_offset = align64(surf_ws->total_size, surf_ws->htile_alignment); - surf_ws->total_size = surf_ws->htile_offset + surf_ws->htile_size; - } - - if (surf_ws->fmask_size) { - assert(tex->nr_samples >= 2); - surf_ws->fmask_offset = align64(surf_ws->total_size, surf_ws->fmask_alignment); - surf_ws->total_size = surf_ws->fmask_offset + surf_ws->fmask_size; - } - - /* Single-sample CMASK is in a separate buffer. */ - if (surf_ws->cmask_size && tex->nr_samples >= 2) { - surf_ws->cmask_offset = align64(surf_ws->total_size, surf_ws->cmask_alignment); - surf_ws->total_size = surf_ws->cmask_offset + surf_ws->cmask_size; - } - } - - return 0; + if (surf_ws->u.legacy.fmask.slice_tile_max) + surf_ws->u.legacy.fmask.slice_tile_max -= 1; + + surf_ws->u.legacy.fmask.tiling_index = fmask.u.legacy.tiling_index[0]; + surf_ws->u.legacy.fmask.bankh = fmask.u.legacy.bankh; + surf_ws->u.legacy.fmask.pitch_in_pixels = fmask.u.legacy.level[0].nblk_x; + } + + if (ws->gen == DRV_SI && + (tex->nr_samples <= 1 || surf_ws->fmask_size)) { + struct ac_surf_config config; + + /* Only these fields need to be set for the CMASK computation. */ + config.info.width = tex->width0; + config.info.height = tex->height0; + config.info.depth = tex->depth0; + config.info.array_size = tex->array_size; + config.is_3d = !!(tex->target == PIPE_TEXTURE_3D); + config.is_cube = !!(tex->target == PIPE_TEXTURE_CUBE); + + si_compute_cmask(&ws->info, &config, surf_ws); + } + + if (ws->gen == DRV_SI) { + si_compute_htile(&ws->info, surf_ws, util_num_layers(tex, 0)); + + /* Determine the memory layout of multiple allocations in one buffer. */ + surf_ws->total_size = surf_ws->surf_size; + + if (surf_ws->htile_size) { + surf_ws->htile_offset = align64(surf_ws->total_size, surf_ws->htile_alignment); + surf_ws->total_size = surf_ws->htile_offset + surf_ws->htile_size; + } + + if (surf_ws->fmask_size) { + assert(tex->nr_samples >= 2); + surf_ws->fmask_offset = align64(surf_ws->total_size, surf_ws->fmask_alignment); + surf_ws->total_size = surf_ws->fmask_offset + surf_ws->fmask_size; + } + + /* Single-sample CMASK is in a separate buffer. */ + if (surf_ws->cmask_size && tex->nr_samples >= 2) { + surf_ws->cmask_offset = align64(surf_ws->total_size, surf_ws->cmask_alignment); + surf_ws->total_size = surf_ws->cmask_offset + surf_ws->cmask_size; + } + } + + return 0; } void radeon_surface_init_functions(struct radeon_drm_winsys *ws) { - ws->base.surface_init = radeon_winsys_surface_init; + ws->base.surface_init = radeon_winsys_surface_init; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 8dde57200bd..a8d5c978e17 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -56,126 +56,126 @@ static bool radeon_set_fd_access(struct radeon_drm_cs *applier, unsigned request, const char *request_name, bool enable) { - struct drm_radeon_info info; - unsigned value = enable ? 1 : 0; - - memset(&info, 0, sizeof(info)); - - mtx_lock(&*mutex); - - /* Early exit if we are sure the request will fail. */ - if (enable) { - if (*owner) { - mtx_unlock(&*mutex); - return false; - } - } else { - if (*owner != applier) { - mtx_unlock(&*mutex); - return false; - } - } - - /* Pass through the request to the kernel. */ - info.value = (unsigned long)&value; - info.request = request; - if (drmCommandWriteRead(applier->ws->fd, DRM_RADEON_INFO, - &info, sizeof(info)) != 0) { - mtx_unlock(&*mutex); - return false; - } - - /* Update the rights in the winsys. */ - if (enable) { - if (value) { - *owner = applier; - mtx_unlock(&*mutex); - return true; - } - } else { - *owner = NULL; - } - - mtx_unlock(&*mutex); - return false; + struct drm_radeon_info info; + unsigned value = enable ? 1 : 0; + + memset(&info, 0, sizeof(info)); + + mtx_lock(&*mutex); + + /* Early exit if we are sure the request will fail. */ + if (enable) { + if (*owner) { + mtx_unlock(&*mutex); + return false; + } + } else { + if (*owner != applier) { + mtx_unlock(&*mutex); + return false; + } + } + + /* Pass through the request to the kernel. */ + info.value = (unsigned long)&value; + info.request = request; + if (drmCommandWriteRead(applier->ws->fd, DRM_RADEON_INFO, + &info, sizeof(info)) != 0) { + mtx_unlock(&*mutex); + return false; + } + + /* Update the rights in the winsys. */ + if (enable) { + if (value) { + *owner = applier; + mtx_unlock(&*mutex); + return true; + } + } else { + *owner = NULL; + } + + mtx_unlock(&*mutex); + return false; } static bool radeon_get_drm_value(int fd, unsigned request, const char *errname, uint32_t *out) { - struct drm_radeon_info info; - int retval; - - memset(&info, 0, sizeof(info)); - - info.value = (unsigned long)out; - info.request = request; - - retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - if (errname) { - fprintf(stderr, "radeon: Failed to get %s, error number %d\n", - errname, retval); - } - return false; - } - return true; + struct drm_radeon_info info; + int retval; + + memset(&info, 0, sizeof(info)); + + info.value = (unsigned long)out; + info.request = request; + + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (retval) { + if (errname) { + fprintf(stderr, "radeon: Failed to get %s, error number %d\n", + errname, retval); + } + return false; + } + return true; } /* Helper function to do the ioctls needed for setup and init. */ static bool do_winsys_init(struct radeon_drm_winsys *ws) { - struct drm_radeon_gem_info gem_info; - int retval; - drmVersionPtr version; - - memset(&gem_info, 0, sizeof(gem_info)); - - /* We do things in a specific order here. - * - * DRM version first. We need to be sure we're running on a KMS chipset. - * This is also for some features. - * - * Then, the PCI ID. This is essential and should return usable numbers - * for all Radeons. If this fails, we probably got handed an FD for some - * non-Radeon card. - * - * The GEM info is actually bogus on the kernel side, as well as our side - * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because - * we don't actually use the info for anything yet. - * - * The GB and Z pipe requests should always succeed, but they might not - * return sensical values for all chipsets, but that's alright because - * the pipe drivers already know that. - */ - - /* Get DRM version. */ - version = drmGetVersion(ws->fd); - if (version->version_major != 2 || - version->version_minor < 12) { - fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " - "only compatible with 2.12.0 (kernel 3.2) or later.\n", - __FUNCTION__, - version->version_major, - version->version_minor, - version->version_patchlevel); - drmFreeVersion(version); - return false; - } - - ws->info.drm_major = version->version_major; - ws->info.drm_minor = version->version_minor; - ws->info.drm_patchlevel = version->version_patchlevel; - ws->info.is_amdgpu = false; - drmFreeVersion(version); - - /* Get PCI ID. */ - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID", - &ws->info.pci_id)) - return false; - - /* Check PCI ID. */ - switch (ws->info.pci_id) { + struct drm_radeon_gem_info gem_info; + int retval; + drmVersionPtr version; + + memset(&gem_info, 0, sizeof(gem_info)); + + /* We do things in a specific order here. + * + * DRM version first. We need to be sure we're running on a KMS chipset. + * This is also for some features. + * + * Then, the PCI ID. This is essential and should return usable numbers + * for all Radeons. If this fails, we probably got handed an FD for some + * non-Radeon card. + * + * The GEM info is actually bogus on the kernel side, as well as our side + * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because + * we don't actually use the info for anything yet. + * + * The GB and Z pipe requests should always succeed, but they might not + * return sensical values for all chipsets, but that's alright because + * the pipe drivers already know that. + */ + + /* Get DRM version. */ + version = drmGetVersion(ws->fd); + if (version->version_major != 2 || + version->version_minor < 12) { + fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " + "only compatible with 2.12.0 (kernel 3.2) or later.\n", + __FUNCTION__, + version->version_major, + version->version_minor, + version->version_patchlevel); + drmFreeVersion(version); + return false; + } + + ws->info.drm_major = version->version_major; + ws->info.drm_minor = version->version_minor; + ws->info.drm_patchlevel = version->version_patchlevel; + ws->info.is_amdgpu = false; + drmFreeVersion(version); + + /* Get PCI ID. */ + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID", + &ws->info.pci_id)) + return false; + + /* Check PCI ID. */ + switch (ws->info.pci_id) { #define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; ws->gen = DRV_R300; break; #include "pci_ids/r300_pci_ids.h" #undef CHIPSET @@ -185,773 +185,773 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) #undef CHIPSET #define CHIPSET(pci_id, cfamily) \ - case pci_id: \ - ws->info.family = CHIP_##cfamily; \ - ws->info.name = #cfamily; \ - ws->gen = DRV_SI; \ - break; + case pci_id: \ + ws->info.family = CHIP_##cfamily; \ + ws->info.name = #cfamily; \ + ws->gen = DRV_SI; \ + break; #include "pci_ids/radeonsi_pci_ids.h" #undef CHIPSET - default: - fprintf(stderr, "radeon: Invalid PCI ID.\n"); - return false; - } - - switch (ws->info.family) { - default: - case CHIP_UNKNOWN: - fprintf(stderr, "radeon: Unknown family.\n"); - return false; - case CHIP_R300: - case CHIP_R350: - case CHIP_RV350: - case CHIP_RV370: - case CHIP_RV380: - case CHIP_RS400: - case CHIP_RC410: - case CHIP_RS480: - ws->info.chip_class = R300; - break; - case CHIP_R420: /* R4xx-based cores. */ - case CHIP_R423: - case CHIP_R430: - case CHIP_R480: - case CHIP_R481: - case CHIP_RV410: - case CHIP_RS600: - case CHIP_RS690: - case CHIP_RS740: - ws->info.chip_class = R400; - break; - case CHIP_RV515: /* R5xx-based cores. */ - case CHIP_R520: - case CHIP_RV530: - case CHIP_R580: - case CHIP_RV560: - case CHIP_RV570: - ws->info.chip_class = R500; - break; - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - ws->info.chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - ws->info.chip_class = R700; - break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - case CHIP_SUMO: - case CHIP_SUMO2: - case CHIP_BARTS: - case CHIP_TURKS: - case CHIP_CAICOS: - ws->info.chip_class = EVERGREEN; - break; - case CHIP_CAYMAN: - case CHIP_ARUBA: - ws->info.chip_class = CAYMAN; - break; - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - ws->info.chip_class = GFX6; - break; - case CHIP_BONAIRE: - case CHIP_KAVERI: - case CHIP_KABINI: - case CHIP_HAWAII: - ws->info.chip_class = GFX7; - break; - } - - /* Set which chips don't have dedicated VRAM. */ - switch (ws->info.family) { - case CHIP_RS400: - case CHIP_RC410: - case CHIP_RS480: - case CHIP_RS600: - case CHIP_RS690: - case CHIP_RS740: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_PALM: - case CHIP_SUMO: - case CHIP_SUMO2: - case CHIP_ARUBA: - case CHIP_KAVERI: - case CHIP_KABINI: - ws->info.has_dedicated_vram = false; - break; - - default: - ws->info.has_dedicated_vram = true; - } - - ws->info.num_rings[RING_GFX] = 1; - /* Check for dma */ - ws->info.num_rings[RING_DMA] = 0; - /* DMA is disabled on R700. There is IB corruption and hangs. */ - if (ws->info.chip_class >= EVERGREEN && ws->info.drm_minor >= 27) { - ws->info.num_rings[RING_DMA] = 1; - } - - /* Check for UVD and VCE */ - ws->info.has_hw_decode = false; - ws->info.vce_fw_version = 0x00000000; - if (ws->info.drm_minor >= 32) { - uint32_t value = RADEON_CS_RING_UVD; - if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING, - "UVD Ring working", &value)) { - ws->info.has_hw_decode = value; - ws->info.num_rings[RING_UVD] = 1; - } - - value = RADEON_CS_RING_VCE; - if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING, - NULL, &value) && value) { - - if (radeon_get_drm_value(ws->fd, RADEON_INFO_VCE_FW_VERSION, - "VCE FW version", &value)) { - ws->info.vce_fw_version = value; - ws->info.num_rings[RING_VCE] = 1; - } - } - } - - /* Check for userptr support. */ - { - struct drm_radeon_gem_userptr args = {0}; - - /* If the ioctl doesn't exist, -EINVAL is returned. - * - * If the ioctl exists, it should return -EACCES - * if RADEON_GEM_USERPTR_READONLY or RADEON_GEM_USERPTR_REGISTER - * aren't set. - */ - ws->info.has_userptr = + default: + fprintf(stderr, "radeon: Invalid PCI ID.\n"); + return false; + } + + switch (ws->info.family) { + default: + case CHIP_UNKNOWN: + fprintf(stderr, "radeon: Unknown family.\n"); + return false; + case CHIP_R300: + case CHIP_R350: + case CHIP_RV350: + case CHIP_RV370: + case CHIP_RV380: + case CHIP_RS400: + case CHIP_RC410: + case CHIP_RS480: + ws->info.chip_class = R300; + break; + case CHIP_R420: /* R4xx-based cores. */ + case CHIP_R423: + case CHIP_R430: + case CHIP_R480: + case CHIP_R481: + case CHIP_RV410: + case CHIP_RS600: + case CHIP_RS690: + case CHIP_RS740: + ws->info.chip_class = R400; + break; + case CHIP_RV515: /* R5xx-based cores. */ + case CHIP_R520: + case CHIP_RV530: + case CHIP_R580: + case CHIP_RV560: + case CHIP_RV570: + ws->info.chip_class = R500; + break; + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + ws->info.chip_class = R600; + break; + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + ws->info.chip_class = R700; + break; + case CHIP_CEDAR: + case CHIP_REDWOOD: + case CHIP_JUNIPER: + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + case CHIP_PALM: + case CHIP_SUMO: + case CHIP_SUMO2: + case CHIP_BARTS: + case CHIP_TURKS: + case CHIP_CAICOS: + ws->info.chip_class = EVERGREEN; + break; + case CHIP_CAYMAN: + case CHIP_ARUBA: + ws->info.chip_class = CAYMAN; + break; + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + ws->info.chip_class = GFX6; + break; + case CHIP_BONAIRE: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_HAWAII: + ws->info.chip_class = GFX7; + break; + } + + /* Set which chips don't have dedicated VRAM. */ + switch (ws->info.family) { + case CHIP_RS400: + case CHIP_RC410: + case CHIP_RS480: + case CHIP_RS600: + case CHIP_RS690: + case CHIP_RS740: + case CHIP_RS780: + case CHIP_RS880: + case CHIP_PALM: + case CHIP_SUMO: + case CHIP_SUMO2: + case CHIP_ARUBA: + case CHIP_KAVERI: + case CHIP_KABINI: + ws->info.has_dedicated_vram = false; + break; + + default: + ws->info.has_dedicated_vram = true; + } + + ws->info.num_rings[RING_GFX] = 1; + /* Check for dma */ + ws->info.num_rings[RING_DMA] = 0; + /* DMA is disabled on R700. There is IB corruption and hangs. */ + if (ws->info.chip_class >= EVERGREEN && ws->info.drm_minor >= 27) { + ws->info.num_rings[RING_DMA] = 1; + } + + /* Check for UVD and VCE */ + ws->info.has_hw_decode = false; + ws->info.vce_fw_version = 0x00000000; + if (ws->info.drm_minor >= 32) { + uint32_t value = RADEON_CS_RING_UVD; + if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING, + "UVD Ring working", &value)) { + ws->info.has_hw_decode = value; + ws->info.num_rings[RING_UVD] = 1; + } + + value = RADEON_CS_RING_VCE; + if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING, + NULL, &value) && value) { + + if (radeon_get_drm_value(ws->fd, RADEON_INFO_VCE_FW_VERSION, + "VCE FW version", &value)) { + ws->info.vce_fw_version = value; + ws->info.num_rings[RING_VCE] = 1; + } + } + } + + /* Check for userptr support. */ + { + struct drm_radeon_gem_userptr args = {0}; + + /* If the ioctl doesn't exist, -EINVAL is returned. + * + * If the ioctl exists, it should return -EACCES + * if RADEON_GEM_USERPTR_READONLY or RADEON_GEM_USERPTR_REGISTER + * aren't set. + */ + ws->info.has_userptr = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR, &args, sizeof(args)) == -EACCES; - } - - /* Get GEM info. */ - retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO, - &gem_info, sizeof(gem_info)); - if (retval) { - fprintf(stderr, "radeon: Failed to get MM info, error number %d\n", - retval); - return false; - } - ws->info.gart_size = gem_info.gart_size; - ws->info.vram_size = gem_info.vram_size; - ws->info.vram_vis_size = gem_info.vram_visible; - /* Older versions of the kernel driver reported incorrect values, and - * didn't support more than 256MB of visible VRAM anyway - */ - if (ws->info.drm_minor < 49) - ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024); - - /* Radeon allocates all buffers contiguously, which makes large allocations - * unlikely to succeed. */ - if (ws->info.has_dedicated_vram) - ws->info.max_alloc_size = ws->info.vram_size * 0.7; - else - ws->info.max_alloc_size = ws->info.gart_size * 0.7; - - if (ws->info.drm_minor < 40) - ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024); - /* Both 32-bit and 64-bit address spaces only have 4GB. */ - ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024); - - /* Get max clock frequency info and convert it to MHz */ - radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL, - &ws->info.max_shader_clock); - ws->info.max_shader_clock /= 1000; - - ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); - - /* Generation-specific queries. */ - if (ws->gen == DRV_R300) { - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES, - "GB pipe count", - &ws->info.r300_num_gb_pipes)) - return false; - - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES, - "Z pipe count", - &ws->info.r300_num_z_pipes)) - return false; - } - else if (ws->gen >= DRV_R600) { - uint32_t tiling_config = 0; - - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, - "num backends", - &ws->info.num_render_backends)) - return false; - - /* get the GPU counter frequency, failure is not fatal */ - radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL, - &ws->info.clock_crystal_freq); - - radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL, - &tiling_config); - - ws->info.r600_num_banks = + } + + /* Get GEM info. */ + retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO, + &gem_info, sizeof(gem_info)); + if (retval) { + fprintf(stderr, "radeon: Failed to get MM info, error number %d\n", + retval); + return false; + } + ws->info.gart_size = gem_info.gart_size; + ws->info.vram_size = gem_info.vram_size; + ws->info.vram_vis_size = gem_info.vram_visible; + /* Older versions of the kernel driver reported incorrect values, and + * didn't support more than 256MB of visible VRAM anyway + */ + if (ws->info.drm_minor < 49) + ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024); + + /* Radeon allocates all buffers contiguously, which makes large allocations + * unlikely to succeed. */ + if (ws->info.has_dedicated_vram) + ws->info.max_alloc_size = ws->info.vram_size * 0.7; + else + ws->info.max_alloc_size = ws->info.gart_size * 0.7; + + if (ws->info.drm_minor < 40) + ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024); + /* Both 32-bit and 64-bit address spaces only have 4GB. */ + ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024); + + /* Get max clock frequency info and convert it to MHz */ + radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL, + &ws->info.max_shader_clock); + ws->info.max_shader_clock /= 1000; + + ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); + + /* Generation-specific queries. */ + if (ws->gen == DRV_R300) { + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES, + "GB pipe count", + &ws->info.r300_num_gb_pipes)) + return false; + + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES, + "Z pipe count", + &ws->info.r300_num_z_pipes)) + return false; + } + else if (ws->gen >= DRV_R600) { + uint32_t tiling_config = 0; + + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, + "num backends", + &ws->info.num_render_backends)) + return false; + + /* get the GPU counter frequency, failure is not fatal */ + radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL, + &ws->info.clock_crystal_freq); + + radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL, + &tiling_config); + + ws->info.r600_num_banks = ws->info.chip_class >= EVERGREEN ? - 4 << ((tiling_config & 0xf0) >> 4) : - 4 << ((tiling_config & 0x30) >> 4); + 4 << ((tiling_config & 0xf0) >> 4) : + 4 << ((tiling_config & 0x30) >> 4); - ws->info.pipe_interleave_bytes = + ws->info.pipe_interleave_bytes = ws->info.chip_class >= EVERGREEN ? - 256 << ((tiling_config & 0xf00) >> 8) : - 256 << ((tiling_config & 0xc0) >> 6); - - if (!ws->info.pipe_interleave_bytes) - ws->info.pipe_interleave_bytes = - ws->info.chip_class >= EVERGREEN ? 512 : 256; - - radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, - &ws->info.num_tile_pipes); - - /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the - * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti) - * reports a different value (12). Fix it by setting what's in the - * GB_TILE_MODE array (8). - */ - if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12) - ws->info.num_tile_pipes = 8; - - if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, - &ws->info.r600_gb_backend_map)) - ws->info.r600_gb_backend_map_valid = true; - - /* Default value. */ - ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.num_render_backends); - /* - * This fails (silently) on non-GCN or older kernels, overwriting the - * default enabled_rb_mask with the result of the last query. - */ - if (ws->gen >= DRV_SI) - radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, - &ws->info.enabled_rb_mask); - - ws->info.r600_has_virtual_memory = false; - if (ws->info.drm_minor >= 13) { - uint32_t ib_vm_max_size; - - ws->info.r600_has_virtual_memory = true; - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL, - &ws->va_start)) - ws->info.r600_has_virtual_memory = false; - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL, - &ib_vm_max_size)) - ws->info.r600_has_virtual_memory = false; - radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL, - &ws->va_unmap_working); - } - if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", false)) - ws->info.r600_has_virtual_memory = false; - } - - /* Get max pipes, this is only needed for compute shaders. All evergreen+ - * chips have at least 2 pipes, so we use 2 as a default. */ - ws->info.r600_max_quad_pipes = 2; - radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL, - &ws->info.r600_max_quad_pipes); - - /* All GPUs have at least one compute unit */ - ws->info.num_good_compute_units = 1; - radeon_get_drm_value(ws->fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL, - &ws->info.num_good_compute_units); - - radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL, - &ws->info.max_se); - - switch (ws->info.family) { - case CHIP_HAINAN: - case CHIP_KABINI: - ws->info.num_tcc_blocks = 2; - break; - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_BONAIRE: - case CHIP_KAVERI: - ws->info.num_tcc_blocks = 4; - break; - case CHIP_PITCAIRN: - ws->info.num_tcc_blocks = 8; - break; - case CHIP_TAHITI: - ws->info.num_tcc_blocks = 12; - break; - case CHIP_HAWAII: - ws->info.num_tcc_blocks = 16; - break; - default: - ws->info.num_tcc_blocks = 0; - break; - } - - if (!ws->info.max_se) { - switch (ws->info.family) { - default: - ws->info.max_se = 1; - break; - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_BARTS: - case CHIP_CAYMAN: - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_BONAIRE: - ws->info.max_se = 2; - break; - case CHIP_HAWAII: - ws->info.max_se = 4; - break; - } - } - - radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL, - &ws->info.max_sh_per_se); - if (ws->gen == DRV_SI) { - ws->info.num_good_cu_per_sh = ws->info.num_good_compute_units / - (ws->info.max_se * ws->info.max_sh_per_se); - } - - radeon_get_drm_value(ws->fd, RADEON_INFO_ACCEL_WORKING2, NULL, - &ws->accel_working2); - if (ws->info.family == CHIP_HAWAII && ws->accel_working2 < 2) { - fprintf(stderr, "radeon: GPU acceleration for Hawaii disabled, " - "returned accel_working2 value %u is smaller than 2. " - "Please install a newer kernel.\n", - ws->accel_working2); - return false; - } - - if (ws->info.chip_class == GFX7) { - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_CIK_MACROTILE_MODE_ARRAY, NULL, - ws->info.cik_macrotile_mode_array)) { - fprintf(stderr, "radeon: Kernel 3.13 is required for Sea Islands support.\n"); - return false; - } - } - - if (ws->info.chip_class >= GFX6) { - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL, - ws->info.si_tile_mode_array)) { - fprintf(stderr, "radeon: Kernel 3.10 is required for Southern Islands support.\n"); - return false; - } - } - - /* Hawaii with old firmware needs type2 nop packet. - * accel_working2 with value 3 indicates the new firmware. - */ - ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= GFX6 || - (ws->info.family == CHIP_HAWAII && - ws->accel_working2 < 3); - ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */ - ws->info.ib_start_alignment = 4096; - ws->info.kernel_flushes_hdp_before_ib = ws->info.drm_minor >= 40; - /* HTILE is broken with 1D tiling on old kernels and GFX7. */ - ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != GFX7 || - ws->info.drm_minor >= 38; - ws->info.si_TA_CS_BC_BASE_ADDR_allowed = ws->info.drm_minor >= 48; - ws->info.has_bo_metadata = false; - ws->info.has_gpu_reset_status_query = ws->info.drm_minor >= 43; - ws->info.has_eqaa_surface_allocator = false; - ws->info.has_format_bc1_through_bc7 = ws->info.drm_minor >= 31; - ws->info.kernel_flushes_tc_l2_after_ib = true; - /* Old kernels disallowed register writes via COPY_DATA - * that are used for indirect compute dispatches. */ - ws->info.has_indirect_compute_dispatch = ws->info.chip_class == GFX7 || - (ws->info.chip_class == GFX6 && - ws->info.drm_minor >= 45); - /* GFX6 doesn't support unaligned loads. */ - ws->info.has_unaligned_shader_loads = ws->info.chip_class == GFX7 && - ws->info.drm_minor >= 50; - ws->info.has_sparse_vm_mappings = false; - /* 2D tiling on GFX7 is supported since DRM 2.35.0 */ - ws->info.has_2d_tiling = ws->info.chip_class <= GFX6 || ws->info.drm_minor >= 35; - ws->info.has_read_registers_query = ws->info.drm_minor >= 42; - ws->info.max_alignment = 1024*1024; - ws->info.has_graphics = true; - ws->info.cpdma_prefetch_writes_memory = true; - ws->info.max_wave64_per_simd = 10; - ws->info.num_physical_sgprs_per_simd = 512; - ws->info.num_physical_wave64_vgprs_per_simd = 256; - /* Potential hang on Kabini: */ - ws->info.use_late_alloc = ws->info.family != CHIP_KABINI; - - ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL || - strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL; - - return true; + 256 << ((tiling_config & 0xf00) >> 8) : + 256 << ((tiling_config & 0xc0) >> 6); + + if (!ws->info.pipe_interleave_bytes) + ws->info.pipe_interleave_bytes = + ws->info.chip_class >= EVERGREEN ? 512 : 256; + + radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, + &ws->info.num_tile_pipes); + + /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the + * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti) + * reports a different value (12). Fix it by setting what's in the + * GB_TILE_MODE array (8). + */ + if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12) + ws->info.num_tile_pipes = 8; + + if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, + &ws->info.r600_gb_backend_map)) + ws->info.r600_gb_backend_map_valid = true; + + /* Default value. */ + ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.num_render_backends); + /* + * This fails (silently) on non-GCN or older kernels, overwriting the + * default enabled_rb_mask with the result of the last query. + */ + if (ws->gen >= DRV_SI) + radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, + &ws->info.enabled_rb_mask); + + ws->info.r600_has_virtual_memory = false; + if (ws->info.drm_minor >= 13) { + uint32_t ib_vm_max_size; + + ws->info.r600_has_virtual_memory = true; + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL, + &ws->va_start)) + ws->info.r600_has_virtual_memory = false; + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL, + &ib_vm_max_size)) + ws->info.r600_has_virtual_memory = false; + radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL, + &ws->va_unmap_working); + } + if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", false)) + ws->info.r600_has_virtual_memory = false; + } + + /* Get max pipes, this is only needed for compute shaders. All evergreen+ + * chips have at least 2 pipes, so we use 2 as a default. */ + ws->info.r600_max_quad_pipes = 2; + radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL, + &ws->info.r600_max_quad_pipes); + + /* All GPUs have at least one compute unit */ + ws->info.num_good_compute_units = 1; + radeon_get_drm_value(ws->fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL, + &ws->info.num_good_compute_units); + + radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL, + &ws->info.max_se); + + switch (ws->info.family) { + case CHIP_HAINAN: + case CHIP_KABINI: + ws->info.num_tcc_blocks = 2; + break; + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_BONAIRE: + case CHIP_KAVERI: + ws->info.num_tcc_blocks = 4; + break; + case CHIP_PITCAIRN: + ws->info.num_tcc_blocks = 8; + break; + case CHIP_TAHITI: + ws->info.num_tcc_blocks = 12; + break; + case CHIP_HAWAII: + ws->info.num_tcc_blocks = 16; + break; + default: + ws->info.num_tcc_blocks = 0; + break; + } + + if (!ws->info.max_se) { + switch (ws->info.family) { + default: + ws->info.max_se = 1; + break; + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + case CHIP_BARTS: + case CHIP_CAYMAN: + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_BONAIRE: + ws->info.max_se = 2; + break; + case CHIP_HAWAII: + ws->info.max_se = 4; + break; + } + } + + radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL, + &ws->info.max_sh_per_se); + if (ws->gen == DRV_SI) { + ws->info.num_good_cu_per_sh = ws->info.num_good_compute_units / + (ws->info.max_se * ws->info.max_sh_per_se); + } + + radeon_get_drm_value(ws->fd, RADEON_INFO_ACCEL_WORKING2, NULL, + &ws->accel_working2); + if (ws->info.family == CHIP_HAWAII && ws->accel_working2 < 2) { + fprintf(stderr, "radeon: GPU acceleration for Hawaii disabled, " + "returned accel_working2 value %u is smaller than 2. " + "Please install a newer kernel.\n", + ws->accel_working2); + return false; + } + + if (ws->info.chip_class == GFX7) { + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_CIK_MACROTILE_MODE_ARRAY, NULL, + ws->info.cik_macrotile_mode_array)) { + fprintf(stderr, "radeon: Kernel 3.13 is required for Sea Islands support.\n"); + return false; + } + } + + if (ws->info.chip_class >= GFX6) { + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL, + ws->info.si_tile_mode_array)) { + fprintf(stderr, "radeon: Kernel 3.10 is required for Southern Islands support.\n"); + return false; + } + } + + /* Hawaii with old firmware needs type2 nop packet. + * accel_working2 with value 3 indicates the new firmware. + */ + ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= GFX6 || + (ws->info.family == CHIP_HAWAII && + ws->accel_working2 < 3); + ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */ + ws->info.ib_start_alignment = 4096; + ws->info.kernel_flushes_hdp_before_ib = ws->info.drm_minor >= 40; + /* HTILE is broken with 1D tiling on old kernels and GFX7. */ + ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != GFX7 || + ws->info.drm_minor >= 38; + ws->info.si_TA_CS_BC_BASE_ADDR_allowed = ws->info.drm_minor >= 48; + ws->info.has_bo_metadata = false; + ws->info.has_gpu_reset_status_query = ws->info.drm_minor >= 43; + ws->info.has_eqaa_surface_allocator = false; + ws->info.has_format_bc1_through_bc7 = ws->info.drm_minor >= 31; + ws->info.kernel_flushes_tc_l2_after_ib = true; + /* Old kernels disallowed register writes via COPY_DATA + * that are used for indirect compute dispatches. */ + ws->info.has_indirect_compute_dispatch = ws->info.chip_class == GFX7 || + (ws->info.chip_class == GFX6 && + ws->info.drm_minor >= 45); + /* GFX6 doesn't support unaligned loads. */ + ws->info.has_unaligned_shader_loads = ws->info.chip_class == GFX7 && + ws->info.drm_minor >= 50; + ws->info.has_sparse_vm_mappings = false; + /* 2D tiling on GFX7 is supported since DRM 2.35.0 */ + ws->info.has_2d_tiling = ws->info.chip_class <= GFX6 || ws->info.drm_minor >= 35; + ws->info.has_read_registers_query = ws->info.drm_minor >= 42; + ws->info.max_alignment = 1024*1024; + ws->info.has_graphics = true; + ws->info.cpdma_prefetch_writes_memory = true; + ws->info.max_wave64_per_simd = 10; + ws->info.num_physical_sgprs_per_simd = 512; + ws->info.num_physical_wave64_vgprs_per_simd = 256; + /* Potential hang on Kabini: */ + ws->info.use_late_alloc = ws->info.family != CHIP_KABINI; + + ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL || + strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL; + + return true; } static void radeon_winsys_destroy(struct radeon_winsys *rws) { - struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; - if (util_queue_is_initialized(&ws->cs_queue)) - util_queue_destroy(&ws->cs_queue); + if (util_queue_is_initialized(&ws->cs_queue)) + util_queue_destroy(&ws->cs_queue); - mtx_destroy(&ws->hyperz_owner_mutex); - mtx_destroy(&ws->cmask_owner_mutex); + mtx_destroy(&ws->hyperz_owner_mutex); + mtx_destroy(&ws->cmask_owner_mutex); - if (ws->info.r600_has_virtual_memory) - pb_slabs_deinit(&ws->bo_slabs); - pb_cache_deinit(&ws->bo_cache); + if (ws->info.r600_has_virtual_memory) + pb_slabs_deinit(&ws->bo_slabs); + pb_cache_deinit(&ws->bo_cache); - if (ws->gen >= DRV_R600) { - radeon_surface_manager_free(ws->surf_man); - } + if (ws->gen >= DRV_R600) { + radeon_surface_manager_free(ws->surf_man); + } - _mesa_hash_table_destroy(ws->bo_names, NULL); - _mesa_hash_table_destroy(ws->bo_handles, NULL); - _mesa_hash_table_destroy(ws->bo_vas, NULL); - mtx_destroy(&ws->bo_handles_mutex); - mtx_destroy(&ws->vm32.mutex); - mtx_destroy(&ws->vm64.mutex); - mtx_destroy(&ws->bo_fence_lock); + _mesa_hash_table_destroy(ws->bo_names, NULL); + _mesa_hash_table_destroy(ws->bo_handles, NULL); + _mesa_hash_table_destroy(ws->bo_vas, NULL); + mtx_destroy(&ws->bo_handles_mutex); + mtx_destroy(&ws->vm32.mutex); + mtx_destroy(&ws->vm64.mutex); + mtx_destroy(&ws->bo_fence_lock); - if (ws->fd >= 0) - close(ws->fd); + if (ws->fd >= 0) + close(ws->fd); - FREE(rws); + FREE(rws); } static void radeon_query_info(struct radeon_winsys *rws, struct radeon_info *info) { - *info = ((struct radeon_drm_winsys *)rws)->info; + *info = ((struct radeon_drm_winsys *)rws)->info; } static bool radeon_cs_request_feature(struct radeon_cmdbuf *rcs, enum radeon_feature_id fid, bool enable) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - - switch (fid) { - case RADEON_FID_R300_HYPERZ_ACCESS: - return radeon_set_fd_access(cs, &cs->ws->hyperz_owner, - &cs->ws->hyperz_owner_mutex, - RADEON_INFO_WANT_HYPERZ, "Hyper-Z", - enable); - - case RADEON_FID_R300_CMASK_ACCESS: - return radeon_set_fd_access(cs, &cs->ws->cmask_owner, - &cs->ws->cmask_owner_mutex, - RADEON_INFO_WANT_CMASK, "AA optimizations", - enable); - } - return false; + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + + switch (fid) { + case RADEON_FID_R300_HYPERZ_ACCESS: + return radeon_set_fd_access(cs, &cs->ws->hyperz_owner, + &cs->ws->hyperz_owner_mutex, + RADEON_INFO_WANT_HYPERZ, "Hyper-Z", + enable); + + case RADEON_FID_R300_CMASK_ACCESS: + return radeon_set_fd_access(cs, &cs->ws->cmask_owner, + &cs->ws->cmask_owner_mutex, + RADEON_INFO_WANT_CMASK, "AA optimizations", + enable); + } + return false; } uint32_t radeon_drm_get_gpu_reset_counter(struct radeon_drm_winsys *ws) { - uint64_t retval = 0; + uint64_t retval = 0; - if (!ws->info.has_gpu_reset_status_query) - return 0; + if (!ws->info.has_gpu_reset_status_query) + return 0; - radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER, - "gpu-reset-counter", (uint32_t*)&retval); - return retval; + radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER, + "gpu-reset-counter", (uint32_t*)&retval); + return retval; } static uint64_t radeon_query_value(struct radeon_winsys *rws, enum radeon_value_id value) { - struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; - uint64_t retval = 0; - - switch (value) { - case RADEON_REQUESTED_VRAM_MEMORY: - return ws->allocated_vram; - case RADEON_REQUESTED_GTT_MEMORY: - return ws->allocated_gtt; - case RADEON_MAPPED_VRAM: - return ws->mapped_vram; - case RADEON_MAPPED_GTT: - return ws->mapped_gtt; - case RADEON_BUFFER_WAIT_TIME_NS: - return ws->buffer_wait_time; - case RADEON_NUM_MAPPED_BUFFERS: - return ws->num_mapped_buffers; - case RADEON_TIMESTAMP: - if (ws->info.drm_minor < 20 || ws->gen < DRV_R600) { - assert(0); - return 0; - } - - radeon_get_drm_value(ws->fd, RADEON_INFO_TIMESTAMP, "timestamp", - (uint32_t*)&retval); - return retval; - case RADEON_NUM_GFX_IBS: - return ws->num_gfx_IBs; - case RADEON_NUM_SDMA_IBS: - return ws->num_sdma_IBs; - case RADEON_NUM_BYTES_MOVED: - radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BYTES_MOVED, - "num-bytes-moved", (uint32_t*)&retval); - return retval; - case RADEON_NUM_EVICTIONS: - case RADEON_NUM_VRAM_CPU_PAGE_FAULTS: - case RADEON_VRAM_VIS_USAGE: - case RADEON_GFX_BO_LIST_COUNTER: - case RADEON_GFX_IB_SIZE_COUNTER: - return 0; /* unimplemented */ - case RADEON_VRAM_USAGE: - radeon_get_drm_value(ws->fd, RADEON_INFO_VRAM_USAGE, - "vram-usage", (uint32_t*)&retval); - return retval; - case RADEON_GTT_USAGE: - radeon_get_drm_value(ws->fd, RADEON_INFO_GTT_USAGE, - "gtt-usage", (uint32_t*)&retval); - return retval; - case RADEON_GPU_TEMPERATURE: - radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_TEMP, - "gpu-temp", (uint32_t*)&retval); - return retval; - case RADEON_CURRENT_SCLK: - radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_SCLK, - "current-gpu-sclk", (uint32_t*)&retval); - return retval; - case RADEON_CURRENT_MCLK: - radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK, - "current-gpu-mclk", (uint32_t*)&retval); - return retval; - case RADEON_CS_THREAD_TIME: - return util_queue_get_thread_time_nano(&ws->cs_queue, 0); - } - return 0; + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; + uint64_t retval = 0; + + switch (value) { + case RADEON_REQUESTED_VRAM_MEMORY: + return ws->allocated_vram; + case RADEON_REQUESTED_GTT_MEMORY: + return ws->allocated_gtt; + case RADEON_MAPPED_VRAM: + return ws->mapped_vram; + case RADEON_MAPPED_GTT: + return ws->mapped_gtt; + case RADEON_BUFFER_WAIT_TIME_NS: + return ws->buffer_wait_time; + case RADEON_NUM_MAPPED_BUFFERS: + return ws->num_mapped_buffers; + case RADEON_TIMESTAMP: + if (ws->info.drm_minor < 20 || ws->gen < DRV_R600) { + assert(0); + return 0; + } + + radeon_get_drm_value(ws->fd, RADEON_INFO_TIMESTAMP, "timestamp", + (uint32_t*)&retval); + return retval; + case RADEON_NUM_GFX_IBS: + return ws->num_gfx_IBs; + case RADEON_NUM_SDMA_IBS: + return ws->num_sdma_IBs; + case RADEON_NUM_BYTES_MOVED: + radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BYTES_MOVED, + "num-bytes-moved", (uint32_t*)&retval); + return retval; + case RADEON_NUM_EVICTIONS: + case RADEON_NUM_VRAM_CPU_PAGE_FAULTS: + case RADEON_VRAM_VIS_USAGE: + case RADEON_GFX_BO_LIST_COUNTER: + case RADEON_GFX_IB_SIZE_COUNTER: + return 0; /* unimplemented */ + case RADEON_VRAM_USAGE: + radeon_get_drm_value(ws->fd, RADEON_INFO_VRAM_USAGE, + "vram-usage", (uint32_t*)&retval); + return retval; + case RADEON_GTT_USAGE: + radeon_get_drm_value(ws->fd, RADEON_INFO_GTT_USAGE, + "gtt-usage", (uint32_t*)&retval); + return retval; + case RADEON_GPU_TEMPERATURE: + radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_TEMP, + "gpu-temp", (uint32_t*)&retval); + return retval; + case RADEON_CURRENT_SCLK: + radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_SCLK, + "current-gpu-sclk", (uint32_t*)&retval); + return retval; + case RADEON_CURRENT_MCLK: + radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK, + "current-gpu-mclk", (uint32_t*)&retval); + return retval; + case RADEON_CS_THREAD_TIME: + return util_queue_get_thread_time_nano(&ws->cs_queue, 0); + } + return 0; } static bool radeon_read_registers(struct radeon_winsys *rws, unsigned reg_offset, unsigned num_registers, uint32_t *out) { - struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; - unsigned i; + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; + unsigned i; - for (i = 0; i < num_registers; i++) { - uint32_t reg = reg_offset + i*4; + for (i = 0; i < num_registers; i++) { + uint32_t reg = reg_offset + i*4; - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_READ_REG, NULL, ®)) - return false; - out[i] = reg; - } - return true; + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_READ_REG, NULL, ®)) + return false; + out[i] = reg; + } + return true; } DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", true) static bool radeon_winsys_unref(struct radeon_winsys *ws) { - struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws; - bool destroy; - - /* When the reference counter drops to zero, remove the fd from the table. - * This must happen while the mutex is locked, so that - * radeon_drm_winsys_create in another thread doesn't get the winsys - * from the table when the counter drops to 0. */ - mtx_lock(&fd_tab_mutex); - - destroy = pipe_reference(&rws->reference, NULL); - if (destroy && fd_tab) { - _mesa_hash_table_remove_key(fd_tab, intptr_to_pointer(rws->fd)); - if (_mesa_hash_table_num_entries(fd_tab) == 0) { - _mesa_hash_table_destroy(fd_tab, NULL); - fd_tab = NULL; - } - } - - mtx_unlock(&fd_tab_mutex); - return destroy; + struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws; + bool destroy; + + /* When the reference counter drops to zero, remove the fd from the table. + * This must happen while the mutex is locked, so that + * radeon_drm_winsys_create in another thread doesn't get the winsys + * from the table when the counter drops to 0. */ + mtx_lock(&fd_tab_mutex); + + destroy = pipe_reference(&rws->reference, NULL); + if (destroy && fd_tab) { + _mesa_hash_table_remove_key(fd_tab, intptr_to_pointer(rws->fd)); + if (_mesa_hash_table_num_entries(fd_tab) == 0) { + _mesa_hash_table_destroy(fd_tab, NULL); + fd_tab = NULL; + } + } + + mtx_unlock(&fd_tab_mutex); + return destroy; } static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws, unsigned cache) { - struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws; + struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws; - if (util_queue_is_initialized(&rws->cs_queue)) { - util_pin_thread_to_L3(rws->cs_queue.threads[0], cache, - util_cpu_caps.cores_per_L3); - } + if (util_queue_is_initialized(&rws->cs_queue)) { + util_pin_thread_to_L3(rws->cs_queue.threads[0], cache, + util_cpu_caps.cores_per_L3); + } } PUBLIC struct radeon_winsys * radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, - radeon_screen_create_t screen_create) + radeon_screen_create_t screen_create) { - struct radeon_drm_winsys *ws; - - mtx_lock(&fd_tab_mutex); - if (!fd_tab) { - fd_tab = util_hash_table_create_fd_keys(); - } - - ws = util_hash_table_get(fd_tab, intptr_to_pointer(fd)); - if (ws) { - pipe_reference(NULL, &ws->reference); - mtx_unlock(&fd_tab_mutex); - return &ws->base; - } - - ws = CALLOC_STRUCT(radeon_drm_winsys); - if (!ws) { - mtx_unlock(&fd_tab_mutex); - return NULL; - } - - ws->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); - - if (!do_winsys_init(ws)) - goto fail1; - - pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, - 500000, ws->check_vm ? 1.0f : 2.0f, 0, - MIN2(ws->info.vram_size, ws->info.gart_size), - radeon_bo_destroy, - radeon_bo_can_reclaim); - - if (ws->info.r600_has_virtual_memory) { - /* There is no fundamental obstacle to using slab buffer allocation - * without GPUVM, but enabling it requires making sure that the drivers - * honor the address offset. - */ - if (!pb_slabs_init(&ws->bo_slabs, - RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2, - RADEON_MAX_SLAB_HEAPS, - ws, - radeon_bo_can_reclaim_slab, - radeon_bo_slab_alloc, - radeon_bo_slab_free)) - goto fail_cache; - - ws->info.min_alloc_size = 1 << RADEON_SLAB_MIN_SIZE_LOG2; - } else { - ws->info.min_alloc_size = ws->info.gart_page_size; - } - - if (ws->gen >= DRV_R600) { - ws->surf_man = radeon_surface_manager_new(ws->fd); - if (!ws->surf_man) - goto fail_slab; - } - - /* init reference */ - pipe_reference_init(&ws->reference, 1); - - /* Set functions. */ - ws->base.unref = radeon_winsys_unref; - ws->base.destroy = radeon_winsys_destroy; - ws->base.query_info = radeon_query_info; - ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache; - ws->base.cs_request_feature = radeon_cs_request_feature; - ws->base.query_value = radeon_query_value; - ws->base.read_registers = radeon_read_registers; - - radeon_drm_bo_init_functions(ws); - radeon_drm_cs_init_functions(ws); - radeon_surface_init_functions(ws); - - (void) mtx_init(&ws->hyperz_owner_mutex, mtx_plain); - (void) mtx_init(&ws->cmask_owner_mutex, mtx_plain); - - ws->bo_names = util_hash_table_create_ptr_keys(); - ws->bo_handles = util_hash_table_create_ptr_keys(); - ws->bo_vas = util_hash_table_create_ptr_keys(); - (void) mtx_init(&ws->bo_handles_mutex, mtx_plain); - (void) mtx_init(&ws->vm32.mutex, mtx_plain); - (void) mtx_init(&ws->vm64.mutex, mtx_plain); - (void) mtx_init(&ws->bo_fence_lock, mtx_plain); - list_inithead(&ws->vm32.holes); - list_inithead(&ws->vm64.holes); - - /* The kernel currently returns 8MB. Make sure this doesn't change. */ - if (ws->va_start > 8 * 1024 * 1024) { - /* Not enough 32-bit address space. */ - radeon_winsys_destroy(&ws->base); - mtx_unlock(&fd_tab_mutex); - return NULL; - } - - ws->vm32.start = ws->va_start; - ws->vm32.end = 1ull << 32; - - /* The maximum is 8GB of virtual address space limited by the kernel. - * It's obviously not enough for bigger cards, like Hawaiis with 4GB - * and 8GB of physical memory and 4GB of GART. - * - * Older kernels set the limit to 4GB, which is even worse, so they only - * have 32-bit address space. - */ - if (ws->info.drm_minor >= 41) { - ws->vm64.start = 1ull << 32; - ws->vm64.end = 1ull << 33; - } - - /* TTM aligns the BO size to the CPU page size */ - ws->info.gart_page_size = sysconf(_SC_PAGESIZE); - ws->info.pte_fragment_size = 64 * 1024; /* GPUVM page size */ - - if (ws->num_cpus > 1 && debug_get_option_thread()) - util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0); - - /* Create the screen at the end. The winsys must be initialized - * completely. - * - * Alternatively, we could create the screen based on "ws->gen" - * and link all drivers into one binary blob. */ - ws->base.screen = screen_create(&ws->base, config); - if (!ws->base.screen) { - radeon_winsys_destroy(&ws->base); - mtx_unlock(&fd_tab_mutex); - return NULL; - } - - _mesa_hash_table_insert(fd_tab, intptr_to_pointer(ws->fd), ws); - - /* We must unlock the mutex once the winsys is fully initialized, so that - * other threads attempting to create the winsys from the same fd will - * get a fully initialized winsys and not just half-way initialized. */ - mtx_unlock(&fd_tab_mutex); - - return &ws->base; + struct radeon_drm_winsys *ws; + + mtx_lock(&fd_tab_mutex); + if (!fd_tab) { + fd_tab = util_hash_table_create_fd_keys(); + } + + ws = util_hash_table_get(fd_tab, intptr_to_pointer(fd)); + if (ws) { + pipe_reference(NULL, &ws->reference); + mtx_unlock(&fd_tab_mutex); + return &ws->base; + } + + ws = CALLOC_STRUCT(radeon_drm_winsys); + if (!ws) { + mtx_unlock(&fd_tab_mutex); + return NULL; + } + + ws->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + + if (!do_winsys_init(ws)) + goto fail1; + + pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, + 500000, ws->check_vm ? 1.0f : 2.0f, 0, + MIN2(ws->info.vram_size, ws->info.gart_size), + radeon_bo_destroy, + radeon_bo_can_reclaim); + + if (ws->info.r600_has_virtual_memory) { + /* There is no fundamental obstacle to using slab buffer allocation + * without GPUVM, but enabling it requires making sure that the drivers + * honor the address offset. + */ + if (!pb_slabs_init(&ws->bo_slabs, + RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2, + RADEON_MAX_SLAB_HEAPS, + ws, + radeon_bo_can_reclaim_slab, + radeon_bo_slab_alloc, + radeon_bo_slab_free)) + goto fail_cache; + + ws->info.min_alloc_size = 1 << RADEON_SLAB_MIN_SIZE_LOG2; + } else { + ws->info.min_alloc_size = ws->info.gart_page_size; + } + + if (ws->gen >= DRV_R600) { + ws->surf_man = radeon_surface_manager_new(ws->fd); + if (!ws->surf_man) + goto fail_slab; + } + + /* init reference */ + pipe_reference_init(&ws->reference, 1); + + /* Set functions. */ + ws->base.unref = radeon_winsys_unref; + ws->base.destroy = radeon_winsys_destroy; + ws->base.query_info = radeon_query_info; + ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache; + ws->base.cs_request_feature = radeon_cs_request_feature; + ws->base.query_value = radeon_query_value; + ws->base.read_registers = radeon_read_registers; + + radeon_drm_bo_init_functions(ws); + radeon_drm_cs_init_functions(ws); + radeon_surface_init_functions(ws); + + (void) mtx_init(&ws->hyperz_owner_mutex, mtx_plain); + (void) mtx_init(&ws->cmask_owner_mutex, mtx_plain); + + ws->bo_names = util_hash_table_create_ptr_keys(); + ws->bo_handles = util_hash_table_create_ptr_keys(); + ws->bo_vas = util_hash_table_create_ptr_keys(); + (void) mtx_init(&ws->bo_handles_mutex, mtx_plain); + (void) mtx_init(&ws->vm32.mutex, mtx_plain); + (void) mtx_init(&ws->vm64.mutex, mtx_plain); + (void) mtx_init(&ws->bo_fence_lock, mtx_plain); + list_inithead(&ws->vm32.holes); + list_inithead(&ws->vm64.holes); + + /* The kernel currently returns 8MB. Make sure this doesn't change. */ + if (ws->va_start > 8 * 1024 * 1024) { + /* Not enough 32-bit address space. */ + radeon_winsys_destroy(&ws->base); + mtx_unlock(&fd_tab_mutex); + return NULL; + } + + ws->vm32.start = ws->va_start; + ws->vm32.end = 1ull << 32; + + /* The maximum is 8GB of virtual address space limited by the kernel. + * It's obviously not enough for bigger cards, like Hawaiis with 4GB + * and 8GB of physical memory and 4GB of GART. + * + * Older kernels set the limit to 4GB, which is even worse, so they only + * have 32-bit address space. + */ + if (ws->info.drm_minor >= 41) { + ws->vm64.start = 1ull << 32; + ws->vm64.end = 1ull << 33; + } + + /* TTM aligns the BO size to the CPU page size */ + ws->info.gart_page_size = sysconf(_SC_PAGESIZE); + ws->info.pte_fragment_size = 64 * 1024; /* GPUVM page size */ + + if (ws->num_cpus > 1 && debug_get_option_thread()) + util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0); + + /* Create the screen at the end. The winsys must be initialized + * completely. + * + * Alternatively, we could create the screen based on "ws->gen" + * and link all drivers into one binary blob. */ + ws->base.screen = screen_create(&ws->base, config); + if (!ws->base.screen) { + radeon_winsys_destroy(&ws->base); + mtx_unlock(&fd_tab_mutex); + return NULL; + } + + _mesa_hash_table_insert(fd_tab, intptr_to_pointer(ws->fd), ws); + + /* We must unlock the mutex once the winsys is fully initialized, so that + * other threads attempting to create the winsys from the same fd will + * get a fully initialized winsys and not just half-way initialized. */ + mtx_unlock(&fd_tab_mutex); + + return &ws->base; fail_slab: - if (ws->info.r600_has_virtual_memory) - pb_slabs_deinit(&ws->bo_slabs); + if (ws->info.r600_has_virtual_memory) + pb_slabs_deinit(&ws->bo_slabs); fail_cache: - pb_cache_deinit(&ws->bo_cache); + pb_cache_deinit(&ws->bo_cache); fail1: - mtx_unlock(&fd_tab_mutex); - if (ws->surf_man) - radeon_surface_manager_free(ws->surf_man); - if (ws->fd >= 0) - close(ws->fd); - - FREE(ws); - return NULL; + mtx_unlock(&fd_tab_mutex); + if (ws->surf_man) + radeon_surface_manager_free(ws->surf_man); + if (ws->fd >= 0) + close(ws->fd); + + FREE(ws); + return NULL; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index e23a963f37c..a45478f4244 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -37,76 +37,75 @@ struct radeon_drm_cs; enum radeon_generation { - DRV_R300, - DRV_R600, - DRV_SI + DRV_R300, + DRV_R600, + DRV_SI }; #define RADEON_SLAB_MIN_SIZE_LOG2 9 #define RADEON_SLAB_MAX_SIZE_LOG2 14 struct radeon_vm_heap { - mtx_t mutex; - uint64_t start; - uint64_t end; - struct list_head holes; + mtx_t mutex; + uint64_t start; + uint64_t end; + struct list_head holes; }; struct radeon_drm_winsys { - struct radeon_winsys base; - struct pipe_reference reference; - struct pb_cache bo_cache; - struct pb_slabs bo_slabs; - - int fd; /* DRM file descriptor */ - int num_cs; /* The number of command streams created. */ - uint64_t allocated_vram; - uint64_t allocated_gtt; - uint64_t mapped_vram; - uint64_t mapped_gtt; - uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */ - uint64_t num_gfx_IBs; - uint64_t num_sdma_IBs; - uint64_t num_mapped_buffers; - uint32_t next_bo_hash; - - enum radeon_generation gen; - struct radeon_info info; - uint32_t va_start; - uint32_t va_unmap_working; - uint32_t accel_working2; - - /* List of buffer GEM names. Protected by bo_handles_mutex. */ - struct hash_table *bo_names; - /* List of buffer handles. Protectded by bo_handles_mutex. */ - struct hash_table *bo_handles; - /* List of buffer virtual memory ranges. Protectded by bo_handles_mutex. */ - struct hash_table *bo_vas; - mtx_t bo_handles_mutex; - mtx_t bo_fence_lock; - - struct radeon_vm_heap vm32; - struct radeon_vm_heap vm64; - - bool check_vm; - - struct radeon_surface_manager *surf_man; - - uint32_t num_cpus; /* Number of CPUs. */ - - struct radeon_drm_cs *hyperz_owner; - mtx_t hyperz_owner_mutex; - struct radeon_drm_cs *cmask_owner; - mtx_t cmask_owner_mutex; - - /* multithreaded command submission */ - struct util_queue cs_queue; + struct radeon_winsys base; + struct pipe_reference reference; + struct pb_cache bo_cache; + struct pb_slabs bo_slabs; + + int fd; /* DRM file descriptor */ + int num_cs; /* The number of command streams created. */ + uint64_t allocated_vram; + uint64_t allocated_gtt; + uint64_t mapped_vram; + uint64_t mapped_gtt; + uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */ + uint64_t num_gfx_IBs; + uint64_t num_sdma_IBs; + uint64_t num_mapped_buffers; + uint32_t next_bo_hash; + + enum radeon_generation gen; + struct radeon_info info; + uint32_t va_start; + uint32_t va_unmap_working; + uint32_t accel_working2; + + /* List of buffer GEM names. Protected by bo_handles_mutex. */ + struct hash_table *bo_names; + /* List of buffer handles. Protectded by bo_handles_mutex. */ + struct hash_table *bo_handles; + /* List of buffer virtual memory ranges. Protectded by bo_handles_mutex. */ + struct hash_table *bo_vas; + mtx_t bo_handles_mutex; + mtx_t bo_fence_lock; + + struct radeon_vm_heap vm32; + struct radeon_vm_heap vm64; + + bool check_vm; + + struct radeon_surface_manager *surf_man; + + uint32_t num_cpus; /* Number of CPUs. */ + + struct radeon_drm_cs *hyperz_owner; + mtx_t hyperz_owner_mutex; + struct radeon_drm_cs *cmask_owner; + mtx_t cmask_owner_mutex; + + /* multithreaded command submission */ + struct util_queue cs_queue; }; -static inline struct radeon_drm_winsys * -radeon_drm_winsys(struct radeon_winsys *base) +static inline struct radeon_drm_winsys *radeon_drm_winsys(struct radeon_winsys *base) { - return (struct radeon_drm_winsys*)base; + return (struct radeon_drm_winsys*)base; } uint32_t radeon_drm_get_gpu_reset_counter(struct radeon_drm_winsys *ws); |