diff options
author | Jonathan Marek <[email protected]> | 2020-01-21 21:12:57 -0500 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-04-24 17:42:01 +0000 |
commit | c3ef0275c49845f91a3f5d97088954a6d9b877d2 (patch) | |
tree | a69cfca239d8cbc9948aa25beec77a338e36a340 | |
parent | aa3624b8ab7815e7ac54ba656d4e8ffa6ae25e03 (diff) |
turnip: add adreno 650
Tile alignment is 96, with gmem alignment of 0x6000
Signed-off-by: Jonathan Marek <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4608>
-rw-r--r-- | src/freedreno/vulkan/tu_cmd_buffer.c | 27 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_device.c | 9 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_pass.c | 27 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_private.h | 5 |
4 files changed, 46 insertions, 22 deletions
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index a9e075ec225..10577c75985 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -113,10 +113,9 @@ tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other) static void tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling, const struct tu_device *dev, - uint32_t pixels) + const struct tu_render_pass *pass) { - const uint32_t tile_align_w = 64; /* note: 32 when no input attachments */ - const uint32_t tile_align_h = 16; + const uint32_t tile_align_w = pass->tile_align_w; const uint32_t max_tile_width = 1024; /* note: don't offset the tiling config by render_area.offset, @@ -139,43 +138,43 @@ tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling, .height = 1, }; tiling->tile0.extent = (VkExtent2D) { - .width = align(ra_width, tile_align_w), - .height = align(ra_height, tile_align_h), + .width = util_align_npot(ra_width, tile_align_w), + .height = align(ra_height, TILE_ALIGN_H), }; if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) { /* start with 2x2 tiles */ tiling->tile_count.width = 2; tiling->tile_count.height = 2; - tiling->tile0.extent.width = align(DIV_ROUND_UP(ra_width, 2), tile_align_w); - tiling->tile0.extent.height = align(DIV_ROUND_UP(ra_height, 2), tile_align_h); + tiling->tile0.extent.width = util_align_npot(DIV_ROUND_UP(ra_width, 2), tile_align_w); + tiling->tile0.extent.height = align(DIV_ROUND_UP(ra_height, 2), TILE_ALIGN_H); } /* do not exceed max tile width */ while (tiling->tile0.extent.width > max_tile_width) { tiling->tile_count.width++; tiling->tile0.extent.width = - align(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w); + util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w); } /* will force to sysmem, don't bother trying to have a valid tile config * TODO: just skip all GMEM stuff when sysmem is forced? */ - if (!pixels) + if (!pass->gmem_pixels) return; /* do not exceed gmem size */ - while (tiling->tile0.extent.width * tiling->tile0.extent.height > pixels) { + while (tiling->tile0.extent.width * tiling->tile0.extent.height > pass->gmem_pixels) { if (tiling->tile0.extent.width > MAX2(tile_align_w, tiling->tile0.extent.height)) { tiling->tile_count.width++; tiling->tile0.extent.width = - align(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w); + util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w); } else { /* if this assert fails then layout is impossible.. */ - assert(tiling->tile0.extent.height > tile_align_h); + assert(tiling->tile0.extent.height > TILE_ALIGN_H); tiling->tile_count.height++; tiling->tile0.extent.height = - align(DIV_ROUND_UP(ra_height, tiling->tile_count.height), tile_align_h); + align(DIV_ROUND_UP(ra_height, tiling->tile_count.height), TILE_ALIGN_H); } } } @@ -1378,7 +1377,7 @@ tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd, tiling->render_area = *render_area; tiling->force_sysmem = false; - tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass->gmem_pixels); + tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass); tu_tiling_config_update_pipe_layout(tiling, dev); tu_tiling_config_update_pipes(tiling, dev); } diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index ea1ee8ee8aa..30cc1442dd4 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -267,6 +267,7 @@ tu_physical_device_init(struct tu_physical_device *device, case 618: device->ccu_offset_gmem = 0x7c000; /* 0x7e000 in some cases? */ device->ccu_offset_bypass = 0x10000; + device->tile_align_w = 64; device->magic.PC_UNKNOWN_9805 = 0x0; device->magic.SP_UNKNOWN_A0F8 = 0x0; break; @@ -274,9 +275,17 @@ tu_physical_device_init(struct tu_physical_device *device, case 640: device->ccu_offset_gmem = 0xf8000; device->ccu_offset_bypass = 0x20000; + device->tile_align_w = 64; device->magic.PC_UNKNOWN_9805 = 0x1; device->magic.SP_UNKNOWN_A0F8 = 0x1; break; + case 650: + device->ccu_offset_gmem = 0x114000; + device->ccu_offset_bypass = 0x30000; + device->tile_align_w = 96; + device->magic.PC_UNKNOWN_9805 = 0x2; + device->magic.SP_UNKNOWN_A0F8 = 0x2; + break; default: result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "device %s is unsupported", device->name); diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c index 7d537973e5e..c86d7c81ff9 100644 --- a/src/freedreno/vulkan/tu_pass.c +++ b/src/freedreno/vulkan/tu_pass.c @@ -36,20 +36,32 @@ static void update_samples(struct tu_subpass *subpass, subpass->samples = samples; } -#define GMEM_ALIGN 0x4000 - static void create_render_pass_common(struct tu_render_pass *pass, const struct tu_physical_device *phys_dev) { + uint32_t block_align_shift = 4; /* log2(gmem_align/(tile_align_w*tile_align_h)) */ + uint32_t tile_align_w = phys_dev->tile_align_w; + uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * TILE_ALIGN_H; + /* calculate total bytes per pixel */ uint32_t cpp_total = 0; for (uint32_t i = 0; i < pass->attachment_count; i++) { struct tu_render_pass_attachment *att = &pass->attachments[i]; - if (att->gmem_offset >= 0) + if (att->gmem_offset >= 0) { cpp_total += att->cpp; + /* texture pitch must be aligned to 64, use a tile_align_w that is + * a multiple of 64 for cpp==1 attachment to work as input attachment + */ + if (att->cpp == 1 && tile_align_w % 64 != 0) { + tile_align_w *= 2; + block_align_shift -= 1; + } + } } + pass->tile_align_w = tile_align_w; + /* no gmem attachments */ if (cpp_total == 0) { /* any value non-zero value so tiling config works with no attachments */ @@ -64,7 +76,7 @@ create_render_pass_common(struct tu_render_pass *pass, * result: nblocks = {12, 52}, pixels = 196608 * optimal: nblocks = {13, 51}, pixels = 208896 */ - uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / GMEM_ALIGN; + uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align; uint32_t offset = 0, pixels = ~0u; for (uint32_t i = 0; i < pass->attachment_count; i++) { struct tu_render_pass_attachment *att = &pass->attachments[i]; @@ -73,14 +85,13 @@ create_render_pass_common(struct tu_render_pass *pass, att->gmem_offset = offset; - /* Note: divide by 16 is for GMEM_ALIGN=16k, tile align w=64/h=16 */ - uint32_t align = MAX2(1, att->cpp / 16); + uint32_t align = MAX2(1, att->cpp >> block_align_shift); uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align); gmem_blocks -= nblocks; cpp_total -= att->cpp; - offset += nblocks * GMEM_ALIGN; - pixels = MIN2(pixels, nblocks * GMEM_ALIGN / att->cpp); + offset += nblocks * gmem_align; + pixels = MIN2(pixels, nblocks * gmem_align / att->cpp); } pass->gmem_pixels = pixels; diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 062bcf98786..ff5f25dd0b8 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -324,6 +324,10 @@ struct tu_physical_device uint64_t gmem_base; uint32_t ccu_offset_gmem; uint32_t ccu_offset_bypass; + /* alignment for size of tiles */ + uint32_t tile_align_w; +#define TILE_ALIGN_H 16 + /* gmem store/load granularity */ #define GMEM_ALIGN_W 16 #define GMEM_ALIGN_H 4 @@ -1607,6 +1611,7 @@ struct tu_render_pass uint32_t attachment_count; uint32_t subpass_count; uint32_t gmem_pixels; + uint32_t tile_align_w; struct tu_subpass_attachment *subpass_attachments; struct tu_render_pass_attachment *attachments; struct tu_subpass subpasses[0]; |