diff options
author | Dave Airlie <[email protected]> | 2016-10-07 09:16:09 +1000 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2016-10-07 09:16:09 +1000 |
commit | f4e499ec79147f4172f3669ae9dafd941aaeeb65 (patch) | |
tree | 4e082d4d950ffd0a676a8131179ca0e7ce94d99f /src/amd/vulkan/winsys/amdgpu | |
parent | 28ecd3eac24ce41b8a855a50f366f1985d1dc934 (diff) |
radv: add initial non-conformant radv vulkan driver
This squashes all the radv development up until now into
one for merging.
History can be found:
https://github.com/airlied/mesa/tree/semi-interesting
This requires llvm 3.9 and is in no way considered
a conformant vulkan implementation. It can run a number
of vulkan applications, and supports all GPUs using
the amdgpu kernel driver.
Thanks to Intel for providing anv and spirv->nir,
and Emil Velikov for reviewing build integration.
Parts of this are:
Reviewed-by: Nicolai Hähnle <[email protected]>
Acked-by: Edward O'Callaghan <[email protected]>
Authors: Bas Nieuwenhuizen and Dave Airlie
Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd/vulkan/winsys/amdgpu')
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 297 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h | 50 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 778 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h | 51 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c | 523 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h | 29 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 359 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h | 57 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h | 30 |
9 files changed, 2174 insertions, 0 deletions
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c new file mode 100644 index 00000000000..7319a988872 --- /dev/null +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -0,0 +1,297 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based on amdgpu winsys. + * Copyright © 2011 Marek Olšák <[email protected]> + * Copyright © 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdio.h> + +#include "radv_amdgpu_bo.h" + +#include <amdgpu.h> +#include <amdgpu_drm.h> +#include <inttypes.h> + +static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) +{ + struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); + + if (bo->ws->debug_all_bos) { + pthread_mutex_lock(&bo->ws->global_bo_list_lock); + LIST_DEL(&bo->global_list_item); + bo->ws->num_buffers--; + pthread_mutex_unlock(&bo->ws->global_bo_list_lock); + } + amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP); + amdgpu_va_range_free(bo->va_handle); + amdgpu_bo_free(bo->bo); + FREE(bo); +} + +static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo) +{ + struct radv_amdgpu_winsys *ws = bo->ws; + + if (bo->ws->debug_all_bos) { + pthread_mutex_lock(&ws->global_bo_list_lock); + LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list); + ws->num_buffers++; + pthread_mutex_unlock(&ws->global_bo_list_lock); + } +} + +static struct radeon_winsys_bo * +radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, + uint64_t size, + unsigned alignment, + enum radeon_bo_domain initial_domain, + unsigned flags) +{ + struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); + struct radv_amdgpu_winsys_bo *bo; + struct amdgpu_bo_alloc_request request = {0}; + amdgpu_bo_handle buf_handle; + uint64_t va = 0; + amdgpu_va_handle va_handle; + int r; + bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo); + if (!bo) { + return NULL; + } + + request.alloc_size = size; + request.phys_alignment = alignment; + + if (initial_domain & RADEON_DOMAIN_VRAM) + request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; + if (initial_domain & RADEON_DOMAIN_GTT) + request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; + + if (flags & RADEON_FLAG_CPU_ACCESS) + request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (flags & RADEON_FLAG_NO_CPU_ACCESS) + request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + if (flags & RADEON_FLAG_GTT_WC) + request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; + + r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); + if (r) { + fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); + fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size); + fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment); + fprintf(stderr, "amdgpu: domains : %u\n", initial_domain); + goto error_bo_alloc; + } + + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, + size, alignment, 0, &va, &va_handle, 0); + if (r) + goto error_va_alloc; + + r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP); + if (r) + goto error_va_map; + + bo->bo = buf_handle; + bo->va = va; + bo->va_handle = va_handle; + bo->initial_domain = initial_domain; + bo->size = size; + bo->is_shared = false; + bo->ws = ws; + radv_amdgpu_add_buffer_to_global_list(bo); + return (struct radeon_winsys_bo *)bo; +error_va_map: + amdgpu_va_range_free(va_handle); + +error_va_alloc: + amdgpu_bo_free(buf_handle); + +error_bo_alloc: + FREE(bo); + return NULL; +} + +static uint64_t radv_amdgpu_winsys_bo_get_va(struct radeon_winsys_bo *_bo) +{ + struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); + return bo->va; +} + +static void * +radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo) +{ + struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); + int ret; + void *data; + ret = amdgpu_bo_cpu_map(bo->bo, &data); + if (ret) + return NULL; + return data; +} + +static void +radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo) +{ + struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); + amdgpu_bo_cpu_unmap(bo->bo); +} + +static struct radeon_winsys_bo * +radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, + int fd, unsigned *stride, + unsigned *offset) +{ + struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); + struct radv_amdgpu_winsys_bo *bo; + uint64_t va; + amdgpu_va_handle va_handle; + enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd; + struct amdgpu_bo_import_result result = {0}; + struct amdgpu_bo_info info = {0}; + enum radeon_bo_domain initial = 0; + int r; + bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo); + if (!bo) + return NULL; + + r = amdgpu_bo_import(ws->dev, type, fd, &result); + if (r) + goto error; + + r = amdgpu_bo_query_info(result.buf_handle, &info); + if (r) + goto error_query; + + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, + result.alloc_size, 1 << 20, 0, &va, &va_handle, 0); + if (r) + goto error_query; + + r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP); + if (r) + goto error_va_map; + + if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM) + initial |= RADEON_DOMAIN_VRAM; + if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT) + initial |= RADEON_DOMAIN_GTT; + + bo->bo = result.buf_handle; + bo->va = va; + bo->va_handle = va_handle; + bo->initial_domain = initial; + bo->size = result.alloc_size; + bo->is_shared = true; + return (struct radeon_winsys_bo *)bo; +error_va_map: + amdgpu_va_range_free(va_handle); + +error_query: + amdgpu_bo_free(result.buf_handle); + +error: + FREE(bo); + return NULL; +} + +static bool +radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, + struct radeon_winsys_bo *_bo, + int *fd) +{ + struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); + enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd; + int r; + unsigned handle; + r = amdgpu_bo_export(bo->bo, type, &handle); + if (r) + return false; + + *fd = (int)handle; + bo->is_shared = true; + return true; +} + +static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split) +{ + switch (eg_tile_split) { + case 64: return 0; + case 128: return 1; + case 256: return 2; + case 512: return 3; + default: + case 1024: return 4; + case 2048: return 5; + case 4096: return 6; + } +} + +static void +radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo, + struct radeon_bo_metadata *md) +{ + struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); + struct amdgpu_bo_metadata metadata = {0}; + uint32_t tiling_flags = 0; + + if (md->macrotile == RADEON_LAYOUT_TILED) + tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */ + else if (md->microtile == RADEON_LAYOUT_TILED) + tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */ + else + tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */ + + tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config); + tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw)); + tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh)); + if (md->tile_split) + tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->tile_split)); + tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea)); + tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1); + + if (md->scanout) + tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */ + else + tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */ + + metadata.tiling_info = tiling_flags; + metadata.size_metadata = md->size_metadata; + memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata)); + + amdgpu_bo_set_metadata(bo->bo, &metadata); +} + +void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws) +{ + ws->base.buffer_create = radv_amdgpu_winsys_bo_create; + ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy; + ws->base.buffer_get_va = radv_amdgpu_winsys_bo_get_va; + ws->base.buffer_map = radv_amdgpu_winsys_bo_map; + ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap; + ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd; + ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd; + ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata; +} diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h new file mode 100644 index 00000000000..59a1bb76502 --- /dev/null +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h @@ -0,0 +1,50 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based on amdgpu winsys. + * Copyright © 2011 Marek Olšák <[email protected]> + * Copyright © 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#pragma once +#include "radv_amdgpu_winsys.h" +struct radv_amdgpu_winsys_bo { + amdgpu_bo_handle bo; + amdgpu_va_handle va_handle; + + uint64_t va; + enum radeon_bo_domain initial_domain; + uint64_t size; + bool is_shared; + + struct radv_amdgpu_winsys *ws; + struct list_head global_list_item; +}; + +static inline +struct radv_amdgpu_winsys_bo *radv_amdgpu_winsys_bo(struct radeon_winsys_bo *bo) +{ + return (struct radv_amdgpu_winsys_bo *)bo; +} + +void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws); + diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c new file mode 100644 index 00000000000..dedc778f1cd --- /dev/null +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -0,0 +1,778 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdlib.h> +#include <amdgpu.h> +#include <amdgpu_drm.h> +#include <assert.h> + +#include "amdgpu_id.h" +#include "radv_radeon_winsys.h" +#include "radv_amdgpu_cs.h" +#include "radv_amdgpu_bo.h" +#include "sid.h" + +struct radv_amdgpu_cs { + struct radeon_winsys_cs base; + struct radv_amdgpu_winsys *ws; + + struct amdgpu_cs_ib_info ib; + + struct radeon_winsys_bo *ib_buffer; + uint8_t *ib_mapped; + unsigned max_num_buffers; + unsigned num_buffers; + amdgpu_bo_handle *handles; + uint8_t *priorities; + + struct radeon_winsys_bo **old_ib_buffers; + unsigned num_old_ib_buffers; + unsigned max_num_old_ib_buffers; + unsigned *ib_size_ptr; + bool failed; + bool is_chained; + + int buffer_hash_table[1024]; +}; + +static inline struct radv_amdgpu_cs * +radv_amdgpu_cs(struct radeon_winsys_cs *base) +{ + return (struct radv_amdgpu_cs*)base; +} + + +static struct radeon_winsys_fence *radv_amdgpu_create_fence() +{ + struct radv_amdgpu_cs_fence *fence = calloc(1, sizeof(struct amdgpu_cs_fence)); + return (struct radeon_winsys_fence*)fence; +} + +static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence *_fence) +{ + struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence; + free(fence); +} + +static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws, + struct radeon_winsys_fence *_fence, + bool absolute, + uint64_t timeout) +{ + struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence; + unsigned flags = absolute ? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE : 0; + int r; + uint32_t expired = 0; + /* Now use the libdrm query. */ + r = amdgpu_cs_query_fence_status(fence, + timeout, + flags, + &expired); + + if (r) { + fprintf(stderr, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n"); + return false; + } + + if (expired) { + return true; + } + return false; + +} + +static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs) +{ + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs); + if (cs->ib_buffer) + cs->ws->base.buffer_destroy(cs->ib_buffer); + else + free(cs->base.buf); + + for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i) + cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); + free(cs->old_ib_buffers); + free(cs->handles); + free(cs->priorities); + free(cs); +} + +static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs, + enum ring_type ring_type) +{ + for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i) { + cs->buffer_hash_table[i] = -1; + } + return true; +} + +static struct radeon_winsys_cs * +radv_amdgpu_cs_create(struct radeon_winsys *ws, + enum ring_type ring_type) +{ + struct radv_amdgpu_cs *cs; + uint32_t ib_size = 20 * 1024 * 4; + cs = calloc(1, sizeof(struct radv_amdgpu_cs)); + if (!cs) + return NULL; + + cs->ws = radv_amdgpu_winsys(ws); + radv_amdgpu_init_cs(cs, RING_GFX); + + if (cs->ws->use_ib_bos) { + cs->ib_buffer = ws->buffer_create(ws, ib_size, 0, + RADEON_DOMAIN_GTT, + RADEON_FLAG_CPU_ACCESS); + if (!cs->ib_buffer) { + free(cs); + return NULL; + } + + cs->ib_mapped = ws->buffer_map(cs->ib_buffer); + if (!cs->ib_mapped) { + ws->buffer_destroy(cs->ib_buffer); + free(cs); + return NULL; + } + + cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va; + cs->base.buf = (uint32_t *)cs->ib_mapped; + cs->base.max_dw = ib_size / 4 - 4; + cs->ib_size_ptr = &cs->ib.size; + cs->ib.size = 0; + + ws->cs_add_buffer(&cs->base, cs->ib_buffer, 8); + } else { + cs->base.buf = malloc(16384); + cs->base.max_dw = 4096; + if (!cs->base.buf) { + free(cs); + return NULL; + } + } + + return &cs->base; +} + +static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size) +{ + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); + uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2); + + /* max that fits in the chain size field. */ + ib_size = MIN2(ib_size, 0xfffff); + + if (cs->failed) { + cs->base.cdw = 0; + return; + } + + if (!cs->ws->use_ib_bos) { + uint32_t *new_buf = realloc(cs->base.buf, ib_size); + if (new_buf) { + cs->base.buf = new_buf; + cs->base.max_dw = ib_size / 4; + } else { + cs->failed = true; + cs->base.cdw = 0; + } + return; + } + + while (!cs->base.cdw || (cs->base.cdw & 7) != 4) + cs->base.buf[cs->base.cdw++] = 0xffff1000; + + *cs->ib_size_ptr |= cs->base.cdw + 4; + + if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) { + cs->max_num_old_ib_buffers = MAX2(1, cs->max_num_old_ib_buffers * 2); + cs->old_ib_buffers = realloc(cs->old_ib_buffers, + cs->max_num_old_ib_buffers * sizeof(void*)); + } + + cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer; + + cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0, + RADEON_DOMAIN_GTT, + RADEON_FLAG_CPU_ACCESS); + + if (!cs->ib_buffer) { + cs->base.cdw = 0; + cs->failed = true; + cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers]; + } + + cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer); + if (!cs->ib_mapped) { + cs->ws->base.buffer_destroy(cs->ib_buffer); + cs->base.cdw = 0; + cs->failed = true; + cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers]; + } + + cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8); + + cs->base.buf[cs->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0); + cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va; + cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va >> 32; + cs->ib_size_ptr = cs->base.buf + cs->base.cdw; + cs->base.buf[cs->base.cdw++] = S_3F2_CHAIN(1) | S_3F2_VALID(1); + + cs->base.buf = (uint32_t *)cs->ib_mapped; + cs->base.cdw = 0; + cs->base.max_dw = ib_size / 4 - 4; + +} + +static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs *_cs) +{ + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); + + if (cs->ws->use_ib_bos) { + while (!cs->base.cdw || (cs->base.cdw & 7) != 0) + cs->base.buf[cs->base.cdw++] = 0xffff1000; + + *cs->ib_size_ptr |= cs->base.cdw; + + cs->is_chained = false; + } + + return !cs->failed; +} + +static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs) +{ + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); + cs->base.cdw = 0; + cs->failed = false; + + for (unsigned i = 0; i < cs->num_buffers; ++i) { + unsigned hash = ((uintptr_t)cs->handles[i] >> 6) & + (ARRAY_SIZE(cs->buffer_hash_table) - 1); + cs->buffer_hash_table[hash] = -1; + } + + cs->num_buffers = 0; + + if (cs->ws->use_ib_bos) { + cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8); + + for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i) + cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); + + cs->num_old_ib_buffers = 0; + cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va; + cs->ib_size_ptr = &cs->ib.size; + cs->ib.size = 0; + } +} + +static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs, + amdgpu_bo_handle bo) +{ + unsigned hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1); + int index = cs->buffer_hash_table[hash]; + + if (index == -1) + return -1; + + if(cs->handles[index] == bo) + return index; + + for (unsigned i = 0; i < cs->num_buffers; ++i) { + if (cs->handles[i] == bo) { + cs->buffer_hash_table[hash] = i; + return i; + } + } + return -1; +} + +static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs, + amdgpu_bo_handle bo, + uint8_t priority) +{ + unsigned hash; + int index = radv_amdgpu_cs_find_buffer(cs, bo); + + if (index != -1) { + cs->priorities[index] = MAX2(cs->priorities[index], priority); + return; + } + + if (cs->num_buffers == cs->max_num_buffers) { + unsigned new_count = MAX2(1, cs->max_num_buffers * 2); + cs->handles = realloc(cs->handles, new_count * sizeof(amdgpu_bo_handle)); + cs->priorities = realloc(cs->priorities, new_count * sizeof(uint8_t)); + cs->max_num_buffers = new_count; + } + + cs->handles[cs->num_buffers] = bo; + cs->priorities[cs->num_buffers] = priority; + + hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1); + cs->buffer_hash_table[hash] = cs->num_buffers; + + ++cs->num_buffers; +} + +static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs, + struct radeon_winsys_bo *_bo, + uint8_t priority) +{ + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); + struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); + + radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority); +} + +static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent, + struct radeon_winsys_cs *_child) +{ + struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent); + struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child); + + for (unsigned i = 0; i < child->num_buffers; ++i) { + radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i], + child->priorities[i]); + } + + if (parent->ws->use_ib_bos) { + if (parent->base.cdw + 4 > parent->base.max_dw) + radv_amdgpu_cs_grow(&parent->base, 4); + + parent->base.buf[parent->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0); + parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address; + parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address >> 32; + parent->base.buf[parent->base.cdw++] = child->ib.size; + } else { + if (parent->base.cdw + child->base.cdw > parent->base.max_dw) + radv_amdgpu_cs_grow(&parent->base, child->base.cdw); + + memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw); + parent->base.cdw += child->base.cdw; + } +} + +static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, + struct radeon_winsys_cs **cs_array, + unsigned count, + struct radv_amdgpu_winsys_bo *extra_bo, + amdgpu_bo_list_handle *bo_list) +{ + int r; + if (ws->debug_all_bos) { + struct radv_amdgpu_winsys_bo *bo; + amdgpu_bo_handle *handles; + unsigned num = 0; + + pthread_mutex_lock(&ws->global_bo_list_lock); + + handles = malloc(sizeof(handles[0]) * ws->num_buffers); + if (!handles) { + pthread_mutex_unlock(&ws->global_bo_list_lock); + return -ENOMEM; + } + + LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) { + assert(num < ws->num_buffers); + handles[num++] = bo->bo; + } + + r = amdgpu_bo_list_create(ws->dev, ws->num_buffers, + handles, NULL, + bo_list); + free(handles); + pthread_mutex_unlock(&ws->global_bo_list_lock); + } else if (count == 1 && !extra_bo) { + struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0]; + r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles, + cs->priorities, bo_list); + } else { + unsigned total_buffer_count = !!extra_bo; + unsigned unique_bo_count = !!extra_bo; + for (unsigned i = 0; i < count; ++i) { + struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i]; + total_buffer_count += cs->num_buffers; + } + + amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count); + uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count); + if (!handles || !priorities) { + free(handles); + free(priorities); + return -ENOMEM; + } + + if (extra_bo) { + handles[0] = extra_bo->bo; + priorities[0] = 8; + } + + for (unsigned i = 0; i < count; ++i) { + struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i]; + for (unsigned j = 0; j < cs->num_buffers; ++j) { + bool found = false; + for (unsigned k = 0; k < unique_bo_count; ++k) { + if (handles[k] == cs->handles[j]) { + found = true; + priorities[k] = MAX2(priorities[k], + cs->priorities[j]); + break; + } + } + if (!found) { + handles[unique_bo_count] = cs->handles[j]; + priorities[unique_bo_count] = cs->priorities[j]; + ++unique_bo_count; + } + } + } + r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles, + priorities, bo_list); + + free(handles); + free(priorities); + } + return r; +} + +static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, + struct radeon_winsys_cs **cs_array, + unsigned cs_count, + struct radeon_winsys_fence *_fence) +{ + int r; + struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); + struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence; + struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]); + amdgpu_bo_list_handle bo_list; + struct amdgpu_cs_request request = {0}; + + for (unsigned i = cs_count; i--;) { + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]); + + if (cs->is_chained) { + *cs->ib_size_ptr -= 4; + cs->is_chained = false; + } + + if (i + 1 < cs_count) { + struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]); + assert(cs->base.cdw + 4 <= cs->base.max_dw); + + cs->is_chained = true; + *cs->ib_size_ptr += 4; + + cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0); + cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address; + cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32; + cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size; + } + } + + r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, &bo_list); + if (r) { + fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); + return r; + } + + request.ip_type = AMDGPU_HW_IP_GFX; + request.number_of_ibs = 1; + request.ibs = &cs0->ib; + request.resources = bo_list; + + r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); + if (r) { + if (r == -ENOMEM) + fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); + else + fprintf(stderr, "amdgpu: The CS has been rejected, " + "see dmesg for more information.\n"); + } + + amdgpu_bo_list_destroy(bo_list); + + if (fence) { + fence->context = ctx->ctx; + fence->ip_type = request.ip_type; + fence->ip_instance = request.ip_instance; + fence->ring = request.ring; + fence->fence = request.seq_no; + } + ctx->last_seq_no = request.seq_no; + + return r; +} + +static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, + struct radeon_winsys_cs **cs_array, + unsigned cs_count, + struct radeon_winsys_fence *_fence) +{ + int r; + struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); + struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence; + amdgpu_bo_list_handle bo_list; + struct amdgpu_cs_request request; + + assert(cs_count); + + for (unsigned i = 0; i < cs_count;) { + struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]); + struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT]; + unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT, cs_count - i); + + memset(&request, 0, sizeof(request)); + + r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, &bo_list); + if (r) { + fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); + return r; + } + + request.ip_type = AMDGPU_HW_IP_GFX; + request.resources = bo_list; + request.number_of_ibs = cnt; + request.ibs = ibs; + + for (unsigned j = 0; j < cnt; ++j) { + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); + ibs[j] = cs->ib; + + if (cs->is_chained) { + *cs->ib_size_ptr -= 4; + cs->is_chained = false; + } + } + + r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); + if (r) { + if (r == -ENOMEM) + fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); + else + fprintf(stderr, "amdgpu: The CS has been rejected, " + "see dmesg for more information.\n"); + } + + amdgpu_bo_list_destroy(bo_list); + + if (r) + return r; + + i += cnt; + } + if (fence) { + fence->context = ctx->ctx; + fence->ip_type = request.ip_type; + fence->ip_instance = request.ip_instance; + fence->ring = request.ring; + fence->fence = request.seq_no; + } + ctx->last_seq_no = request.seq_no; + + return 0; +} + +static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, + struct radeon_winsys_cs **cs_array, + unsigned cs_count, + struct radeon_winsys_fence *_fence) +{ + int r; + struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); + struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence; + struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]); + struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws; + amdgpu_bo_list_handle bo_list; + struct amdgpu_cs_request request; + uint32_t pad_word = 0xffff1000U; + + if (radv_amdgpu_winsys(ws)->family == FAMILY_SI) + pad_word = 0x80000000; + + assert(cs_count); + + for (unsigned i = 0; i < cs_count;) { + struct amdgpu_cs_ib_info ib = {0}; + struct radeon_winsys_bo *bo = NULL; + uint32_t *ptr; + unsigned cnt = 0; + unsigned size = 0; + + while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) { + size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw; + ++cnt; + } + + assert(cnt); + + bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS); + ptr = ws->buffer_map(bo); + + for (unsigned j = 0; j < cnt; ++j) { + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); + memcpy(ptr, cs->base.buf, 4 * cs->base.cdw); + ptr += cs->base.cdw; + + } + + while(!size || (size & 7)) { + *ptr++ = pad_word; + ++size; + } + + memset(&request, 0, sizeof(request)); + + + r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, + (struct radv_amdgpu_winsys_bo*)bo, &bo_list); + if (r) { + fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); + return r; + } + + ib.size = size; + ib.ib_mc_address = ws->buffer_get_va(bo); + + request.ip_type = AMDGPU_HW_IP_GFX; + request.resources = bo_list; + request.number_of_ibs = 1; + request.ibs = &ib; + + r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); + if (r) { + if (r == -ENOMEM) + fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); + else + fprintf(stderr, "amdgpu: The CS has been rejected, " + "see dmesg for more information.\n"); + } + + amdgpu_bo_list_destroy(bo_list); + + ws->buffer_destroy(bo); + if (r) + return r; + + i += cnt; + } + if (fence) { + fence->context = ctx->ctx; + fence->ip_type = request.ip_type; + fence->ip_instance = request.ip_instance; + fence->ring = request.ring; + fence->fence = request.seq_no; + } + ctx->last_seq_no = request.seq_no; + + return 0; +} + +static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, + struct radeon_winsys_cs **cs_array, + unsigned cs_count, + bool can_patch, + struct radeon_winsys_fence *_fence) +{ + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]); + if (!cs->ws->use_ib_bos) { + return radv_amdgpu_winsys_cs_submit_sysmem(_ctx, cs_array, + cs_count, _fence); + } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) { + return radv_amdgpu_winsys_cs_submit_chained(_ctx, cs_array, + cs_count, _fence); + } else { + return radv_amdgpu_winsys_cs_submit_fallback(_ctx, cs_array, + cs_count, _fence); + } +} + +static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws) +{ + struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); + struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx); + int r; + + if (!ctx) + return NULL; + r = amdgpu_cs_ctx_create(ws->dev, &ctx->ctx); + if (r) { + fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r); + goto error_create; + } + ctx->ws = ws; + return (struct radeon_winsys_ctx *)ctx; +error_create: + return NULL; +} + +static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx) +{ + struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx; + amdgpu_cs_ctx_free(ctx->ctx); + FREE(ctx); +} + +static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx) +{ + struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx; + + if (ctx->last_seq_no) { + uint32_t expired; + struct amdgpu_cs_fence fence; + + fence.context = ctx->ctx; + fence.ip_type = RING_GFX; + fence.ip_instance = 0; + fence.ring = 0; + fence.fence = ctx->last_seq_no; + + int ret = amdgpu_cs_query_fence_status(&fence, 1000000000ull, 0, + &expired); + + if (ret || !expired) + return false; + } + + return true; +} + +void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws) +{ + ws->base.ctx_create = radv_amdgpu_ctx_create; + ws->base.ctx_destroy = radv_amdgpu_ctx_destroy; + ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle; + ws->base.cs_create = radv_amdgpu_cs_create; + ws->base.cs_destroy = radv_amdgpu_cs_destroy; + ws->base.cs_grow = radv_amdgpu_cs_grow; + ws->base.cs_finalize = radv_amdgpu_cs_finalize; + ws->base.cs_reset = radv_amdgpu_cs_reset; + ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer; + ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary; + ws->base.cs_submit = radv_amdgpu_winsys_cs_submit; + ws->base.create_fence = radv_amdgpu_create_fence; + ws->base.destroy_fence = radv_amdgpu_destroy_fence; + ws->base.fence_wait = radv_amdgpu_fence_wait; +} diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h new file mode 100644 index 00000000000..230639a2580 --- /dev/null +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h @@ -0,0 +1,51 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based on amdgpu winsys. + * Copyright © 2011 Marek Olšák <[email protected]> + * Copyright © 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#pragma once + +#include <string.h> +#include <stdint.h> +#include <assert.h> +#include "r600d_common.h" +#include <amdgpu.h> + +#include "radv_radeon_winsys.h" + +#include "radv_amdgpu_winsys.h" +struct radv_amdgpu_ctx { + struct radv_amdgpu_winsys *ws; + amdgpu_context_handle ctx; + uint64_t last_seq_no; +}; + +static inline struct radv_amdgpu_ctx * +radv_amdgpu_ctx(struct radeon_winsys_ctx *base) +{ + return (struct radv_amdgpu_ctx *)base; +} + +void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c new file mode 100644 index 00000000000..a3c24115a13 --- /dev/null +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c @@ -0,0 +1,523 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based on amdgpu winsys. + * Copyright © 2011 Marek Olšák <[email protected]> + * Copyright © 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <errno.h> +#include "radv_private.h" +#include "addrlib/addrinterface.h" +#include "util/bitset.h" +#include "radv_amdgpu_winsys.h" +#include "radv_amdgpu_surface.h" +#include "sid.h" +#ifndef NO_ENTRIES +#define NO_ENTRIES 32 +#endif + +#ifndef NO_MACRO_ENTRIES +#define NO_MACRO_ENTRIES 16 +#endif + +#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND +#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A +#endif + +static int radv_amdgpu_surface_sanity(const struct radeon_surf *surf) +{ + unsigned type = RADEON_SURF_GET(surf->flags, TYPE); + + if (!(surf->flags & RADEON_SURF_HAS_TILE_MODE_INDEX)) + return -EINVAL; + + /* all dimension must be at least 1 ! */ + if (!surf->npix_x || !surf->npix_y || !surf->npix_z || + !surf->array_size) + return -EINVAL; + + if (!surf->blk_w || !surf->blk_h || !surf->blk_d) + return -EINVAL; + + switch (surf->nsamples) { + case 1: + case 2: + case 4: + case 8: + break; + default: + return -EINVAL; + } + + switch (type) { + case RADEON_SURF_TYPE_1D: + if (surf->npix_y > 1) + return -EINVAL; + /* fall through */ + case RADEON_SURF_TYPE_2D: + case RADEON_SURF_TYPE_CUBEMAP: + if (surf->npix_z > 1 || surf->array_size > 1) + return -EINVAL; + break; + case RADEON_SURF_TYPE_3D: + if (surf->array_size > 1) + return -EINVAL; + break; + case RADEON_SURF_TYPE_1D_ARRAY: + if (surf->npix_y > 1) + return -EINVAL; + /* fall through */ + case RADEON_SURF_TYPE_2D_ARRAY: + if (surf->npix_z > 1) + return -EINVAL; + break; + default: + return -EINVAL; + } + return 0; +} + +static void *ADDR_API radv_allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput) +{ + return malloc(pInput->sizeInBytes); +} + +static ADDR_E_RETURNCODE ADDR_API radv_freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput) +{ + free(pInput->pVirtAddr); + return ADDR_OK; +} + +ADDR_HANDLE radv_amdgpu_addr_create(struct amdgpu_gpu_info *amdinfo, int family, int rev_id, + enum chip_class chip_class) +{ + ADDR_CREATE_INPUT addrCreateInput = {0}; + ADDR_CREATE_OUTPUT addrCreateOutput = {0}; + ADDR_REGISTER_VALUE regValue = {0}; + ADDR_CREATE_FLAGS createFlags = {{0}}; + ADDR_E_RETURNCODE addrRet; + + addrCreateInput.size = sizeof(ADDR_CREATE_INPUT); + addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT); + + regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3; + regValue.gbAddrConfig = amdinfo->gb_addr_cfg; + regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2; + + regValue.backendDisables = amdinfo->backend_disable[0]; + regValue.pTileConfig = amdinfo->gb_tile_mode; + regValue.noOfEntries = ARRAY_SIZE(amdinfo->gb_tile_mode); + if (chip_class == SI) { + regValue.pMacroTileConfig = NULL; + regValue.noOfMacroEntries = 0; + } else { + regValue.pMacroTileConfig = amdinfo->gb_macro_tile_mode; + regValue.noOfMacroEntries = ARRAY_SIZE(amdinfo->gb_macro_tile_mode); + } + + createFlags.value = 0; + createFlags.useTileIndex = 1; + createFlags.degradeBaseLevel = 1; + + addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; + addrCreateInput.chipFamily = family; + addrCreateInput.chipRevision = rev_id; + addrCreateInput.createFlags = createFlags; + addrCreateInput.callbacks.allocSysMem = radv_allocSysMem; + addrCreateInput.callbacks.freeSysMem = radv_freeSysMem; + addrCreateInput.callbacks.debugPrint = 0; + addrCreateInput.regValue = regValue; + + addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput); + if (addrRet != ADDR_OK) + return NULL; + + return addrCreateOutput.hLib; +} + +static int radv_compute_level(ADDR_HANDLE addrlib, + struct radeon_surf *surf, bool is_stencil, + unsigned level, unsigned type, bool compressed, + ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut, + ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn, + ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut) +{ + struct radeon_surf_level *surf_level; + ADDR_E_RETURNCODE ret; + + AddrSurfInfoIn->mipLevel = level; + AddrSurfInfoIn->width = u_minify(surf->npix_x, level); + AddrSurfInfoIn->height = u_minify(surf->npix_y, level); + + if (type == RADEON_SURF_TYPE_3D) + AddrSurfInfoIn->numSlices = u_minify(surf->npix_z, level); + else if (type == RADEON_SURF_TYPE_CUBEMAP) + AddrSurfInfoIn->numSlices = 6; + else + AddrSurfInfoIn->numSlices = surf->array_size; + + if (level > 0) { + /* Set the base level pitch. This is needed for calculation + * of non-zero levels. */ + if (is_stencil) + AddrSurfInfoIn->basePitch = surf->stencil_level[0].nblk_x; + else + AddrSurfInfoIn->basePitch = surf->level[0].nblk_x; + + /* Convert blocks to pixels for compressed formats. */ + if (compressed) + AddrSurfInfoIn->basePitch *= surf->blk_w; + } + + ret = AddrComputeSurfaceInfo(addrlib, + AddrSurfInfoIn, + AddrSurfInfoOut); + if (ret != ADDR_OK) { + return ret; + } + + surf_level = is_stencil ? &surf->stencil_level[level] : &surf->level[level]; + surf_level->offset = align64(surf->bo_size, AddrSurfInfoOut->baseAlign); + surf_level->slice_size = AddrSurfInfoOut->sliceSize; + surf_level->pitch_bytes = AddrSurfInfoOut->pitch * (is_stencil ? 1 : surf->bpe); + surf_level->npix_x = u_minify(surf->npix_x, level); + surf_level->npix_y = u_minify(surf->npix_y, level); + surf_level->npix_z = u_minify(surf->npix_z, level); + surf_level->nblk_x = AddrSurfInfoOut->pitch; + surf_level->nblk_y = AddrSurfInfoOut->height; + if (type == RADEON_SURF_TYPE_3D) + surf_level->nblk_z = AddrSurfInfoOut->depth; + else + surf_level->nblk_z = 1; + + switch (AddrSurfInfoOut->tileMode) { + case ADDR_TM_LINEAR_ALIGNED: + surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED; + break; + case ADDR_TM_1D_TILED_THIN1: + surf_level->mode = RADEON_SURF_MODE_1D; + break; + case ADDR_TM_2D_TILED_THIN1: + surf_level->mode = RADEON_SURF_MODE_2D; + break; + default: + assert(0); + } + + if (is_stencil) + surf->stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex; + else + surf->tiling_index[level] = AddrSurfInfoOut->tileIndex; + + surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize; + + /* Clear DCC fields at the beginning. */ + surf_level->dcc_offset = 0; + surf_level->dcc_enabled = false; + + /* The previous level's flag tells us if we can use DCC for this level. */ + if (AddrSurfInfoIn->flags.dccCompatible && + (level == 0 || AddrDccOut->subLvlCompressible)) { + AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize; + AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; + AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; + AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; + AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; + + ret = AddrComputeDccInfo(addrlib, + AddrDccIn, + AddrDccOut); + + if (ret == ADDR_OK) { + surf_level->dcc_offset = surf->dcc_size; + surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize; + surf_level->dcc_enabled = true; + surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize; + surf->dcc_alignment = MAX(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign); + } + } + + return 0; +} + +static void radv_set_micro_tile_mode(struct radeon_surf *surf, + struct radeon_info *info) +{ + uint32_t tile_mode = info->si_tile_mode_array[surf->tiling_index[0]]; + + if (info->chip_class >= CIK) + surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); + else + surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); +} + + +static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, + struct radeon_surf *surf) +{ + struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); + unsigned level, mode, type; + bool compressed; + ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; + ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0}; + ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0}; + ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0}; + ADDR_TILEINFO AddrTileInfoIn = {0}; + ADDR_TILEINFO AddrTileInfoOut = {0}; + int r; + + r = radv_amdgpu_surface_sanity(surf); + if (r) + return r; + + AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT); + AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT); + AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT); + AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT); + AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut; + + type = RADEON_SURF_GET(surf->flags, TYPE); + mode = RADEON_SURF_GET(surf->flags, MODE); + compressed = surf->blk_w == 4 && surf->blk_h == 4; + + /* MSAA and FMASK require 2D tiling. */ + if (surf->nsamples > 1 || + (surf->flags & RADEON_SURF_FMASK)) + mode = RADEON_SURF_MODE_2D; + + /* DB doesn't support linear layouts. */ + if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && + mode < RADEON_SURF_MODE_1D) + mode = RADEON_SURF_MODE_1D; + + /* Set the requested tiling mode. */ + switch (mode) { + case RADEON_SURF_MODE_LINEAR_ALIGNED: + AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED; + break; + case RADEON_SURF_MODE_1D: + AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1; + break; + case RADEON_SURF_MODE_2D: + AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1; + break; + default: + assert(0); + } + + /* The format must be set correctly for the allocation of compressed + * textures to work. In other cases, setting the bpp is sufficient. */ + if (compressed) { + switch (surf->bpe) { + case 8: + AddrSurfInfoIn.format = ADDR_FMT_BC1; + break; + case 16: + AddrSurfInfoIn.format = ADDR_FMT_BC3; + break; + default: + assert(0); + } + } + else { + AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8; + } + + AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = surf->nsamples; + AddrSurfInfoIn.tileIndex = -1; + + /* Set the micro tile type. */ + if (surf->flags & RADEON_SURF_SCANOUT) + AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE; + else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) + AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER; + else + AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE; + + AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); + AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; + AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP; + AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0; + AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0; + AddrSurfInfoIn.flags.degrade4Space = 1; + + /* DCC notes: + * - If we add MSAA support, keep in mind that CB can't decompress 8bpp + * with samples >= 4. + * - Mipmapped array textures have low performance (discovered by a closed + * driver team). + */ + AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && + !(surf->flags & RADEON_SURF_DISABLE_DCC) && + !compressed && AddrDccIn.numSamples <= 1 && + ((surf->array_size == 1 && surf->npix_z == 1) || + surf->last_level == 0); + + AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0; + AddrSurfInfoIn.flags.compressZ = AddrSurfInfoIn.flags.depth; + + /* noStencil = 0 can result in a depth part that is incompatible with + * mipmapped texturing. So set noStencil = 1 when mipmaps are requested (in + * this case, we may end up setting stencil_adjusted). + * + * TODO: update addrlib to a newer version, remove this, and + * use flags.matchStencilTileCfg = 1 as an alternative fix. + */ + if (surf->last_level > 0) + AddrSurfInfoIn.flags.noStencil = 1; + + /* Set preferred macrotile parameters. This is usually required + * for shared resources. This is for 2D tiling only. */ + if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && + surf->bankw && surf->bankh && surf->mtilea && surf->tile_split) { + /* If any of these parameters are incorrect, the calculation + * will fail. */ + AddrTileInfoIn.banks = surf->num_banks; + AddrTileInfoIn.bankWidth = surf->bankw; + AddrTileInfoIn.bankHeight = surf->bankh; + AddrTileInfoIn.macroAspectRatio = surf->mtilea; + AddrTileInfoIn.tileSplitBytes = surf->tile_split; + AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */ + AddrSurfInfoIn.flags.degrade4Space = 0; + AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn; + + /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set + * the tile index, because we are expected to know it if + * we know the other parameters. + * + * This is something that can easily be fixed in Addrlib. + * For now, just figure it out here. + * Note that only 2D_TILE_THIN1 is handled here. + */ + assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); + assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1); + + if (ws->info.chip_class == SI) { + if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) { + if (surf->bpe == 2) + AddrSurfInfoIn.tileIndex = 11; /* 16bpp */ + else + AddrSurfInfoIn.tileIndex = 12; /* 32bpp */ + } else { + if (surf->bpe == 1) + AddrSurfInfoIn.tileIndex = 14; /* 8bpp */ + else if (surf->bpe == 2) + AddrSurfInfoIn.tileIndex = 15; /* 16bpp */ + else if (surf->bpe == 4) + AddrSurfInfoIn.tileIndex = 16; /* 32bpp */ + else + AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */ + } + } else { + if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) + AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */ + else + AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */ + } + } + + surf->bo_size = 0; + surf->dcc_size = 0; + surf->dcc_alignment = 1; + + /* Calculate texture layout information. */ + for (level = 0; level <= surf->last_level; level++) { + r = radv_compute_level(ws->addrlib, surf, false, level, type, compressed, + &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); + if (r) + return r; + + if (level == 0) { + surf->bo_alignment = AddrSurfInfoOut.baseAlign; + surf->pipe_config = AddrSurfInfoOut.pTileInfo->pipeConfig - 1; + radv_set_micro_tile_mode(surf, &ws->info); + + /* For 2D modes only. */ + if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) { + surf->bankw = AddrSurfInfoOut.pTileInfo->bankWidth; + surf->bankh = AddrSurfInfoOut.pTileInfo->bankHeight; + surf->mtilea = AddrSurfInfoOut.pTileInfo->macroAspectRatio; + surf->tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes; + surf->num_banks = AddrSurfInfoOut.pTileInfo->banks; + surf->macro_tile_index = AddrSurfInfoOut.macroModeIndex; + } else { + surf->macro_tile_index = 0; + } + } + } + + /* Calculate texture layout information for stencil. */ + if (surf->flags & RADEON_SURF_SBUFFER) { + AddrSurfInfoIn.bpp = 8; + AddrSurfInfoIn.flags.depth = 0; + AddrSurfInfoIn.flags.stencil = 1; + /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */ + AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split; + + for (level = 0; level <= surf->last_level; level++) { + r = radv_compute_level(ws->addrlib, surf, true, level, type, compressed, + &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); + if (r) + return r; + + /* DB uses the depth pitch for both stencil and depth. */ + if (surf->stencil_level[level].nblk_x != surf->level[level].nblk_x) + surf->stencil_adjusted = true; + + if (level == 0) { + /* For 2D modes only. */ + if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) { + surf->stencil_tile_split = + AddrSurfInfoOut.pTileInfo->tileSplitBytes; + } + } + } + } + + /* Recalculate the whole DCC miptree size including disabled levels. + * This is what addrlib does, but calling addrlib would be a lot more + * complicated. + */ +#if 0 + if (surf->dcc_size && surf->last_level > 0) { + surf->dcc_size = align64(surf->bo_size >> 8, + ws->info.pipe_interleave_bytes * + ws->info.num_tile_pipes); + } +#endif + return 0; +} + +static int radv_amdgpu_winsys_surface_best(struct radeon_winsys *rws, + struct radeon_surf *surf) +{ + return 0; +} + +void radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws) +{ + ws->base.surface_init = radv_amdgpu_winsys_surface_init; + ws->base.surface_best = radv_amdgpu_winsys_surface_best; +} diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h new file mode 100644 index 00000000000..acc12af3d08 --- /dev/null +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h @@ -0,0 +1,29 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#pragma once + +#include <amdgpu.h> + +void radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws); +ADDR_HANDLE radv_amdgpu_addr_create(struct amdgpu_gpu_info *amdinfo, int family, int rev_id, enum chip_class chip_class); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c new file mode 100644 index 00000000000..94505367e23 --- /dev/null +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -0,0 +1,359 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * based on amdgpu winsys. + * Copyright © 2011 Marek Olšák <[email protected]> + * Copyright © 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "radv_amdgpu_winsys.h" +#include "radv_amdgpu_winsys_public.h" +#include "radv_amdgpu_surface.h" +#include "amdgpu_id.h" +#include "xf86drm.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <amdgpu_drm.h> +#include <assert.h> +#include "radv_amdgpu_cs.h" +#include "radv_amdgpu_bo.h" +#include "radv_amdgpu_surface.h" +#define CIK_TILE_MODE_COLOR_2D 14 + +#define CIK__GB_TILE_MODE__PIPE_CONFIG(x) (((x) >> 6) & 0x1f) +#define CIK__PIPE_CONFIG__ADDR_SURF_P2 0 +#define CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16 4 +#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16 5 +#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32 6 +#define CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32 7 +#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16 8 +#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16 9 +#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16 10 +#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16 11 +#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16 12 +#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32 13 +#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32 14 +#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16 16 +#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16 17 + +static unsigned radv_cik_get_num_tile_pipes(struct amdgpu_gpu_info *info) +{ + unsigned mode2d = info->gb_tile_mode[CIK_TILE_MODE_COLOR_2D]; + + switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) { + case CIK__PIPE_CONFIG__ADDR_SURF_P2: + return 2; + case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16: + case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16: + case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32: + case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32: + return 4; + case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16: + case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16: + case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16: + case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16: + case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16: + case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32: + case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32: + return 8; + case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16: + case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16: + return 16; + default: + fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n"); + assert(!"this should never occur"); + return 2; + } +} + +static const char * +get_chip_name(enum radeon_family family) +{ + switch (family) { + case CHIP_TAHITI: return "AMD RADV TAHITI"; + case CHIP_PITCAIRN: return "AMD RADV PITCAIRN"; + case CHIP_VERDE: return "AMD RADV CAPE VERDE"; + case CHIP_OLAND: return "AMD RADV OLAND"; + case CHIP_HAINAN: return "AMD RADV HAINAN"; + case CHIP_BONAIRE: return "AMD RADV BONAIRE"; + case CHIP_KAVERI: return "AMD RADV KAVERI"; + case CHIP_KABINI: return "AMD RADV KABINI"; + case CHIP_HAWAII: return "AMD RADV HAWAII"; + case CHIP_MULLINS: return "AMD RADV MULLINS"; + case CHIP_TONGA: return "AMD RADV TONGA"; + case CHIP_ICELAND: return "AMD RADV ICELAND"; + case CHIP_CARRIZO: return "AMD RADV CARRIZO"; + case CHIP_FIJI: return "AMD RADV FIJI"; + case CHIP_POLARIS10: return "AMD RADV POLARIS10"; + case CHIP_POLARIS11: return "AMD RADV POLARIS11"; + case CHIP_STONEY: return "AMD RADV STONEY"; + default: return "AMD RADV unknown"; + } +} + + +static bool +do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) +{ + struct amdgpu_buffer_size_alignments alignment_info = {}; + struct amdgpu_heap_info vram, gtt; + struct drm_amdgpu_info_hw_ip dma = {}; + drmDevicePtr devinfo; + int r; + int i, j; + /* Get PCI info. */ + r = drmGetDevice(fd, &devinfo); + if (r) { + fprintf(stderr, "amdgpu: drmGetDevice failed.\n"); + goto fail; + } + ws->info.pci_domain = devinfo->businfo.pci->domain; + ws->info.pci_bus = devinfo->businfo.pci->bus; + ws->info.pci_dev = devinfo->businfo.pci->dev; + ws->info.pci_func = devinfo->businfo.pci->func; + drmFreeDevice(&devinfo); + + /* Query hardware and driver information. */ + r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n"); + goto fail; + } + + r = amdgpu_query_buffer_size_alignment(ws->dev, &alignment_info); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n"); + goto fail; + } + + r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n"); + goto fail; + } + + r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, >t); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n"); + goto fail; + } + + r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_DMA, 0, &dma); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n"); + goto fail; + } + ws->info.pci_id = ws->amdinfo.asic_id; /* TODO: is this correct? */ + ws->info.vce_harvest_config = ws->amdinfo.vce_harvest_config; + + switch (ws->info.pci_id) { +#define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; break; +#include "pci_ids/radeonsi_pci_ids.h" +#undef CHIPSET + default: + fprintf(stderr, "amdgpu: Invalid PCI ID.\n"); + goto fail; + } + + if (ws->info.family >= CHIP_TONGA) + ws->info.chip_class = VI; + else if (ws->info.family >= CHIP_BONAIRE) + ws->info.chip_class = CIK; + else if (ws->info.family >= CHIP_TAHITI) + ws->info.chip_class = SI; + else { + fprintf(stderr, "amdgpu: Unknown family.\n"); + goto fail; + } + + /* family and rev_id are for addrlib */ + switch (ws->info.family) { + case CHIP_TAHITI: + ws->family = FAMILY_SI; + ws->rev_id = SI_TAHITI_P_A0; + break; + case CHIP_PITCAIRN: + ws->family = FAMILY_SI; + ws->rev_id = SI_PITCAIRN_PM_A0; + break; + case CHIP_VERDE: + ws->family = FAMILY_SI; + ws->rev_id = SI_CAPEVERDE_M_A0; + break; + case CHIP_OLAND: + ws->family = FAMILY_SI; + ws->rev_id = SI_OLAND_M_A0; + break; + case CHIP_HAINAN: + ws->family = FAMILY_SI; + ws->rev_id = SI_HAINAN_V_A0; + break; + case CHIP_BONAIRE: + ws->family = FAMILY_CI; + ws->rev_id = CI_BONAIRE_M_A0; + break; + case CHIP_KAVERI: + ws->family = FAMILY_KV; + ws->rev_id = KV_SPECTRE_A0; + break; + case CHIP_KABINI: + ws->family = FAMILY_KV; + ws->rev_id = KB_KALINDI_A0; + break; + case CHIP_HAWAII: + ws->family = FAMILY_CI; + ws->rev_id = CI_HAWAII_P_A0; + break; + case CHIP_MULLINS: + ws->family = FAMILY_KV; + ws->rev_id = ML_GODAVARI_A0; + break; + case CHIP_TONGA: + ws->family = FAMILY_VI; + ws->rev_id = VI_TONGA_P_A0; + break; + case CHIP_ICELAND: + ws->family = FAMILY_VI; + ws->rev_id = VI_ICELAND_M_A0; + break; + case CHIP_CARRIZO: + ws->family = FAMILY_CZ; + ws->rev_id = CARRIZO_A0; + break; + case CHIP_STONEY: + ws->family = FAMILY_CZ; + ws->rev_id = STONEY_A0; + break; + case CHIP_FIJI: + ws->family = FAMILY_VI; + ws->rev_id = VI_FIJI_P_A0; + break; + case CHIP_POLARIS10: + ws->family = FAMILY_VI; + ws->rev_id = VI_POLARIS10_P_A0; + break; + case CHIP_POLARIS11: + ws->family = FAMILY_VI; + ws->rev_id = VI_POLARIS11_M_A0; + break; + default: + fprintf(stderr, "amdgpu: Unknown family.\n"); + goto fail; + } + + ws->addrlib = radv_amdgpu_addr_create(&ws->amdinfo, ws->family, ws->rev_id, ws->info.chip_class); + if (!ws->addrlib) { + fprintf(stderr, "amdgpu: Cannot create addrlib.\n"); + goto fail; + } + /* Set hardware information. */ + ws->info.name = get_chip_name(ws->info.family); + ws->info.gart_size = gtt.heap_size; + ws->info.vram_size = vram.heap_size; + /* convert the shader clock from KHz to MHz */ + ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000; + ws->info.max_se = ws->amdinfo.num_shader_engines; + ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine; + ws->info.has_uvd = 0; + ws->info.vce_fw_version = 0; + ws->info.has_userptr = TRUE; + ws->info.num_render_backends = ws->amdinfo.rb_pipes; + ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq; + ws->info.num_tile_pipes = radv_cik_get_num_tile_pipes(&ws->amdinfo); + ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7); + ws->info.has_virtual_memory = TRUE; + ws->info.has_sdma = dma.available_rings != 0; + + /* Get the number of good compute units. */ + ws->info.num_good_compute_units = 0; + for (i = 0; i < ws->info.max_se; i++) + for (j = 0; j < ws->info.max_sh_per_se; j++) + ws->info.num_good_compute_units += + util_bitcount(ws->amdinfo.cu_bitmap[i][j]); + + memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode, + sizeof(ws->amdinfo.gb_tile_mode)); + ws->info.enabled_rb_mask = ws->amdinfo.enabled_rb_pipes_mask; + + memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode, + sizeof(ws->amdinfo.gb_macro_tile_mode)); + + ws->info.gart_page_size = alignment_info.size_remote; + + if (ws->info.chip_class == SI) + ws->info.gfx_ib_pad_with_type2 = TRUE; + + ws->use_ib_bos = ws->family >= FAMILY_CI; + return true; +fail: + return false; +} + +static void radv_amdgpu_winsys_query_info(struct radeon_winsys *rws, + struct radeon_info *info) +{ + *info = ((struct radv_amdgpu_winsys *)rws)->info; +} + +static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws) +{ + struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws; + + AddrDestroy(ws->addrlib); + amdgpu_device_deinitialize(ws->dev); + FREE(rws); +} + +struct radeon_winsys * +radv_amdgpu_winsys_create(int fd) +{ + uint32_t drm_major, drm_minor, r; + amdgpu_device_handle dev; + struct radv_amdgpu_winsys *ws; + + r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev); + if (r) + return NULL; + + ws = calloc(1, sizeof(struct radv_amdgpu_winsys)); + if (!ws) + return NULL; + + + ws->dev = dev; + ws->info.drm_major = drm_major; + ws->info.drm_minor = drm_minor; + if (!do_winsys_init(ws, fd)) + goto fail; + + ws->debug_all_bos = getenv("RADV_DEBUG_ALL_BOS") ? true : false; + LIST_INITHEAD(&ws->global_bo_list); + pthread_mutex_init(&ws->global_bo_list_lock, NULL); + ws->base.query_info = radv_amdgpu_winsys_query_info; + ws->base.destroy = radv_amdgpu_winsys_destroy; + radv_amdgpu_bo_init_functions(ws); + radv_amdgpu_cs_init_functions(ws); + radv_amdgpu_surface_init_functions(ws); + return &ws->base; +fail: + return NULL; +} diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h new file mode 100644 index 00000000000..b79495d9952 --- /dev/null +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h @@ -0,0 +1,57 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * based on amdgpu winsys. + * Copyright © 2011 Marek Olšák <[email protected]> + * Copyright © 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#pragma once + +#include "radv_radeon_winsys.h" +#include "addrlib/addrinterface.h" +#include <amdgpu.h> +#include "util/list.h" + +struct radv_amdgpu_winsys { + struct radeon_winsys base; + amdgpu_device_handle dev; + + struct radeon_info info; + struct amdgpu_gpu_info amdinfo; + ADDR_HANDLE addrlib; + + uint32_t rev_id; + unsigned family; + + bool debug_all_bos; + pthread_mutex_t global_bo_list_lock; + struct list_head global_bo_list; + unsigned num_buffers; + + bool use_ib_bos; +}; + +static inline struct radv_amdgpu_winsys * +radv_amdgpu_winsys(struct radeon_winsys *base) +{ + return (struct radv_amdgpu_winsys*)base; +} diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h new file mode 100644 index 00000000000..cf066011c26 --- /dev/null +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based on amdgpu winsys. + * Copyright © 2011 Marek Olšák <[email protected]> + * Copyright © 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#pragma once + +struct radeon_winsys *radv_amdgpu_winsys_create(int fd); |