aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/freedreno/vulkan/tu_clear_blit.c8
-rw-r--r--src/freedreno/vulkan/tu_cmd_buffer.c293
-rw-r--r--src/freedreno/vulkan/tu_device.c3
-rw-r--r--src/freedreno/vulkan/tu_private.h52
-rw-r--r--src/freedreno/vulkan/tu_util.c130
5 files changed, 215 insertions, 271 deletions
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index e93ef73c141..114786432eb 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -2321,10 +2321,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
uint32_t a,
uint32_t gmem_a)
{
- const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
- const VkRect2D *render_area = &tiling->render_area;
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
+ const VkRect2D *render_area = &cmd->state.render_area;
struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
- struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment;
+ struct tu_image_view *iview = fb->attachments[a].attachment;
struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
if (!dst->store)
@@ -2377,7 +2377,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + src->gmem_offset),
A6XX_SP_PS_2D_SRC_HI(),
- A6XX_SP_PS_2D_SRC_PITCH(.pitch = tiling->tile0.extent.width * src->cpp));
+ A6XX_SP_PS_2D_SRC_PITCH(.pitch = fb->tile0.width * src->cpp));
/* sync GMEM writes with CACHE. */
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 871a5b8d35d..fb24e17be0d 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -109,177 +109,29 @@ tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
}
static void
-tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
- const struct tu_device *dev,
- const struct tu_render_pass *pass)
-{
- const uint32_t tile_align_w = pass->tile_align_w;
- const uint32_t max_tile_width = 1024;
-
- /* note: don't offset the tiling config by render_area.offset,
- * because binning pass can't deal with it
- * this means we might end up with more tiles than necessary,
- * but load/store/etc are still scissored to the render_area
- */
- tiling->tile0.offset = (VkOffset2D) {};
-
- const uint32_t ra_width =
- tiling->render_area.extent.width +
- (tiling->render_area.offset.x - tiling->tile0.offset.x);
- const uint32_t ra_height =
- tiling->render_area.extent.height +
- (tiling->render_area.offset.y - tiling->tile0.offset.y);
-
- /* start from 1 tile */
- tiling->tile_count = (VkExtent2D) {
- .width = 1,
- .height = 1,
- };
- tiling->tile0.extent = (VkExtent2D) {
- .width = util_align_npot(ra_width, tile_align_w),
- .height = align(ra_height, TILE_ALIGN_H),
- };
-
- if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
- /* start with 2x2 tiles */
- tiling->tile_count.width = 2;
- tiling->tile_count.height = 2;
- tiling->tile0.extent.width = util_align_npot(DIV_ROUND_UP(ra_width, 2), tile_align_w);
- tiling->tile0.extent.height = align(DIV_ROUND_UP(ra_height, 2), TILE_ALIGN_H);
- }
-
- /* do not exceed max tile width */
- while (tiling->tile0.extent.width > max_tile_width) {
- tiling->tile_count.width++;
- tiling->tile0.extent.width =
- util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
- }
-
- /* will force to sysmem, don't bother trying to have a valid tile config
- * TODO: just skip all GMEM stuff when sysmem is forced?
- */
- if (!pass->gmem_pixels)
- return;
-
- /* do not exceed gmem size */
- while (tiling->tile0.extent.width * tiling->tile0.extent.height > pass->gmem_pixels) {
- if (tiling->tile0.extent.width > MAX2(tile_align_w, tiling->tile0.extent.height)) {
- tiling->tile_count.width++;
- tiling->tile0.extent.width =
- util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
- } else {
- /* if this assert fails then layout is impossible.. */
- assert(tiling->tile0.extent.height > TILE_ALIGN_H);
- tiling->tile_count.height++;
- tiling->tile0.extent.height =
- align(DIV_ROUND_UP(ra_height, tiling->tile_count.height), TILE_ALIGN_H);
- }
- }
-}
-
-static void
-tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
- const struct tu_device *dev)
-{
- const uint32_t max_pipe_count = 32; /* A6xx */
-
- /* start from 1 tile per pipe */
- tiling->pipe0 = (VkExtent2D) {
- .width = 1,
- .height = 1,
- };
- tiling->pipe_count = tiling->tile_count;
-
- while (tiling->pipe_count.width * tiling->pipe_count.height > max_pipe_count) {
- if (tiling->pipe0.width < tiling->pipe0.height) {
- tiling->pipe0.width += 1;
- tiling->pipe_count.width =
- DIV_ROUND_UP(tiling->tile_count.width, tiling->pipe0.width);
- } else {
- tiling->pipe0.height += 1;
- tiling->pipe_count.height =
- DIV_ROUND_UP(tiling->tile_count.height, tiling->pipe0.height);
- }
- }
-}
-
-static void
-tu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
- const struct tu_device *dev)
-{
- const uint32_t max_pipe_count = 32; /* A6xx */
- const uint32_t used_pipe_count =
- tiling->pipe_count.width * tiling->pipe_count.height;
- const VkExtent2D last_pipe = {
- .width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1,
- .height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1,
- };
-
- assert(used_pipe_count <= max_pipe_count);
- assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config));
-
- for (uint32_t y = 0; y < tiling->pipe_count.height; y++) {
- for (uint32_t x = 0; x < tiling->pipe_count.width; x++) {
- const uint32_t pipe_x = tiling->pipe0.width * x;
- const uint32_t pipe_y = tiling->pipe0.height * y;
- const uint32_t pipe_w = (x == tiling->pipe_count.width - 1)
- ? last_pipe.width
- : tiling->pipe0.width;
- const uint32_t pipe_h = (y == tiling->pipe_count.height - 1)
- ? last_pipe.height
- : tiling->pipe0.height;
- const uint32_t n = tiling->pipe_count.width * y + x;
-
- tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
- A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
- A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
- A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
- tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
- }
- }
-
- memset(tiling->pipe_config + used_pipe_count, 0,
- sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
-}
-
-static void
-tu_tiling_config_get_tile(const struct tu_tiling_config *tiling,
- const struct tu_device *dev,
+tu_tiling_config_get_tile(const struct tu_framebuffer *fb,
uint32_t tx,
uint32_t ty,
- struct tu_tile *tile)
+ uint32_t *pipe,
+ uint32_t *slot)
{
/* find the pipe and the slot for tile (tx, ty) */
- const uint32_t px = tx / tiling->pipe0.width;
- const uint32_t py = ty / tiling->pipe0.height;
- const uint32_t sx = tx - tiling->pipe0.width * px;
- const uint32_t sy = ty - tiling->pipe0.height * py;
+ const uint32_t px = tx / fb->pipe0.width;
+ const uint32_t py = ty / fb->pipe0.height;
+ const uint32_t sx = tx - fb->pipe0.width * px;
+ const uint32_t sy = ty - fb->pipe0.height * py;
/* last pipe has different width */
const uint32_t pipe_width =
- MIN2(tiling->pipe0.width,
- tiling->tile_count.width - px * tiling->pipe0.width);
+ MIN2(fb->pipe0.width,
+ fb->tile_count.width - px * fb->pipe0.width);
- assert(tx < tiling->tile_count.width && ty < tiling->tile_count.height);
- assert(px < tiling->pipe_count.width && py < tiling->pipe_count.height);
- assert(sx < tiling->pipe0.width && sy < tiling->pipe0.height);
+ assert(tx < fb->tile_count.width && ty < fb->tile_count.height);
+ assert(px < fb->pipe_count.width && py < fb->pipe_count.height);
+ assert(sx < fb->pipe0.width && sy < fb->pipe0.height);
/* convert to 1D indices */
- tile->pipe = tiling->pipe_count.width * py + px;
- tile->slot = pipe_width * sy + sx;
-
- /* get the blit area for the tile */
- tile->begin = (VkOffset2D) {
- .x = tiling->tile0.offset.x + tiling->tile0.extent.width * tx,
- .y = tiling->tile0.offset.y + tiling->tile0.extent.height * ty,
- };
- tile->end.x =
- (tx == tiling->tile_count.width - 1)
- ? tiling->render_area.offset.x + tiling->render_area.extent.width
- : tile->begin.x + tiling->tile0.extent.width;
- tile->end.y =
- (ty == tiling->tile_count.height - 1)
- ? tiling->render_area.offset.y + tiling->render_area.extent.height
- : tile->begin.y + tiling->tile0.extent.height;
+ *pipe = fb->pipe_count.width * py + px;
+ *slot = pipe_width * sy + sx;
}
void
@@ -602,7 +454,7 @@ tu6_emit_render_cntl(struct tu_cmd_buffer *cmd,
static void
tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool align)
{
- const VkRect2D *render_area = &cmd->state.tiling_config.render_area;
+ const VkRect2D *render_area = &cmd->state.render_area;
uint32_t x1 = render_area->offset.x;
uint32_t y1 = render_area->offset.y;
uint32_t x2 = x1 + render_area->extent.width - 1;
@@ -706,7 +558,7 @@ tu_cs_emit_sds_ib(struct tu_cs *cs, uint32_t id, struct tu_cs_entry entry)
static bool
use_hw_binning(struct tu_cmd_buffer *cmd)
{
- const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
/* XFB commands are emitted for BINNING || SYSMEM, which makes it incompatible
* with non-hw binning GMEM rendering. this is required because some of the
@@ -721,7 +573,7 @@ use_hw_binning(struct tu_cmd_buffer *cmd)
if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN))
return true;
- return (tiling->tile_count.width * tiling->tile_count.height) > 2;
+ return (fb->tile_count.width * fb->tile_count.height) > 2;
}
static bool
@@ -740,24 +592,29 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd)
if (cmd->has_tess)
return true;
- return cmd->state.tiling_config.force_sysmem;
+ return false;
}
static void
tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- const struct tu_tile *tile)
+ uint32_t tx, uint32_t ty)
{
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
+ uint32_t pipe, slot;
+
+ tu_tiling_config_get_tile(fb, tx, ty, &pipe, &slot);
+
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_YIELD));
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
- const uint32_t x1 = tile->begin.x;
- const uint32_t y1 = tile->begin.y;
- const uint32_t x2 = tile->end.x - 1;
- const uint32_t y2 = tile->end.y - 1;
+ const uint32_t x1 = fb->tile0.width * tx;
+ const uint32_t y1 = fb->tile0.height * ty;
+ const uint32_t x2 = x1 + fb->tile0.width - 1;
+ const uint32_t y2 = y1 + fb->tile0.height - 1;
tu6_emit_window_scissor(cs, x1, y1, x2, y2);
tu6_emit_window_offset(cs, x1, y1);
@@ -771,11 +628,11 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
tu_cs_emit(cs, 0x0);
tu_cs_emit_pkt7(cs, CP_SET_BIN_DATA5, 7);
- tu_cs_emit(cs, cmd->state.tiling_config.pipe_sizes[tile->pipe] |
- CP_SET_BIN_DATA5_0_VSC_N(tile->slot));
- tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + tile->pipe * cmd->vsc_draw_strm_pitch);
- tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + (tile->pipe * 4) + (32 * cmd->vsc_draw_strm_pitch));
- tu_cs_emit_qw(cs, cmd->vsc_prim_strm.iova + (tile->pipe * cmd->vsc_prim_strm_pitch));
+ tu_cs_emit(cs, fb->pipe_sizes[pipe] |
+ CP_SET_BIN_DATA5_0_VSC_N(slot));
+ tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + pipe * cmd->vsc_draw_strm_pitch);
+ tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + pipe * 4 + 32 * cmd->vsc_draw_strm_pitch);
+ tu_cs_emit_qw(cs, cmd->vsc_prim_strm.iova + pipe * cmd->vsc_prim_strm_pitch);
tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
tu_cs_emit(cs, 0x0);
@@ -801,7 +658,7 @@ tu6_emit_sysmem_resolve(struct tu_cmd_buffer *cmd,
struct tu_image_view *dst = fb->attachments[a].attachment;
struct tu_image_view *src = fb->attachments[gmem_a].attachment;
- tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.tiling_config.render_area);
+ tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.render_area);
}
static void
@@ -1009,21 +866,20 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
static void
update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
- const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
tu_cs_emit_regs(cs,
- A6XX_VSC_BIN_SIZE(.width = tiling->tile0.extent.width,
- .height = tiling->tile0.extent.height),
+ A6XX_VSC_BIN_SIZE(.width = fb->tile0.width,
+ .height = fb->tile0.height),
A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = &cmd->vsc_draw_strm,
.bo_offset = 32 * cmd->vsc_draw_strm_pitch));
tu_cs_emit_regs(cs,
- A6XX_VSC_BIN_COUNT(.nx = tiling->tile_count.width,
- .ny = tiling->tile_count.height));
+ A6XX_VSC_BIN_COUNT(.nx = fb->tile_count.width,
+ .ny = fb->tile_count.height));
tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
- for (unsigned i = 0; i < 32; i++)
- tu_cs_emit(cs, tiling->pipe_config[i]);
+ tu_cs_emit_array(cs, fb->pipe_config, 32);
tu_cs_emit_regs(cs,
A6XX_VSC_PRIM_STRM_ADDRESS(.bo = &cmd->vsc_prim_strm),
@@ -1039,9 +895,9 @@ update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
static void
emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
- const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
const uint32_t used_pipe_count =
- tiling->pipe_count.width * tiling->pipe_count.height;
+ fb->pipe_count.width * fb->pipe_count.height;
/* Clear vsc_scratch: */
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
@@ -1078,14 +934,9 @@ static void
tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
struct tu_physical_device *phys_dev = cmd->device->physical_device;
- const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
-
- uint32_t x1 = tiling->tile0.offset.x;
- uint32_t y1 = tiling->tile0.offset.y;
- uint32_t x2 = tiling->render_area.offset.x + tiling->render_area.extent.width - 1;
- uint32_t y2 = tiling->render_area.offset.y + tiling->render_area.extent.height - 1;
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
- tu6_emit_window_scissor(cs, x1, y1, x2, y2);
+ tu6_emit_window_scissor(cs, 0, 0, fb->width - 1, fb->height - 1);
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
@@ -1213,7 +1064,7 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
dst[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
dst[2] =
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
- A6XX_TEX_CONST_2_PITCH(cmd->state.tiling_config.tile0.extent.width * att->cpp);
+ A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * att->cpp);
dst[3] = 0;
dst[4] = cmd->device->physical_device->gmem_base + att->gmem_offset;
dst[5] = A6XX_TEX_CONST_5_DEPTH(1);
@@ -1282,8 +1133,7 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd,
}
static void
-tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
- const struct VkRect2D *renderArea)
+tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
const struct tu_framebuffer *fb = cmd->state.framebuffer;
@@ -1348,14 +1198,12 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_GMEM);
- const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
if (use_hw_binning(cmd)) {
/* enable stream-out during binning pass: */
tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=false));
- tu6_emit_bin_size(cs,
- tiling->tile0.extent.width,
- tiling->tile0.extent.height,
+ tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height,
A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);
tu6_emit_render_cntl(cmd, cmd->state.subpass, cs, true);
@@ -1365,9 +1213,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
/* and disable stream-out for draw pass: */
tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=true));
- tu6_emit_bin_size(cs,
- tiling->tile0.extent.width,
- tiling->tile0.extent.height,
+ tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height,
A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);
tu_cs_emit_regs(cs,
@@ -1383,10 +1229,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
/* no binning pass, so enable stream-out for draw pass:: */
tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=false));
- tu6_emit_bin_size(cs,
- tiling->tile0.extent.width,
- tiling->tile0.extent.height,
- 0x6000000);
+ tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height, 0x6000000);
}
tu_cs_sanity_check(cs);
@@ -1395,9 +1238,9 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
static void
tu6_render_tile(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
- const struct tu_tile *tile)
+ uint32_t tx, uint32_t ty)
{
- tu6_emit_tile_select(cmd, cs, tile);
+ tu6_emit_tile_select(cmd, cs, tx, ty);
tu_cs_emit_call(cs, &cmd->draw_cs);
@@ -1429,19 +1272,16 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
static void
tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
{
- const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
if (use_hw_binning(cmd))
cmd->use_vsc_data = true;
tu6_tile_render_begin(cmd, &cmd->cs);
- for (uint32_t y = 0; y < tiling->tile_count.height; y++) {
- for (uint32_t x = 0; x < tiling->tile_count.width; x++) {
- struct tu_tile tile;
- tu_tiling_config_get_tile(tiling, cmd->device, x, y, &tile);
- tu6_render_tile(cmd, &cmd->cs, &tile);
- }
+ for (uint32_t y = 0; y < fb->tile_count.height; y++) {
+ for (uint32_t x = 0; x < fb->tile_count.width; x++)
+ tu6_render_tile(cmd, &cmd->cs, x, y);
}
tu6_tile_render_end(cmd, &cmd->cs);
@@ -1450,9 +1290,7 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
static void
tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd)
{
- const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
-
- tu6_sysmem_render_begin(cmd, &cmd->cs, &tiling->render_area);
+ tu6_sysmem_render_begin(cmd, &cmd->cs);
tu_cs_emit_call(&cmd->cs, &cmd->draw_cs);
@@ -1478,21 +1316,6 @@ tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd)
cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->sub_cs, &sub_cs);
}
-static void
-tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
- const VkRect2D *render_area)
-{
- const struct tu_device *dev = cmd->device;
- struct tu_tiling_config *tiling = &cmd->state.tiling_config;
-
- tiling->render_area = *render_area;
- tiling->force_sysmem = false;
-
- tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass);
- tu_tiling_config_update_pipe_layout(tiling, dev);
- tu_tiling_config_update_pipes(tiling, dev);
-}
-
static VkResult
tu_create_cmd_buffer(struct tu_device *device,
struct tu_cmd_pool *pool,
@@ -2791,8 +2614,8 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
cmd->state.pass = pass;
cmd->state.subpass = pass->subpasses;
cmd->state.framebuffer = fb;
+ cmd->state.render_area = pRenderPassBegin->renderArea;
- tu_cmd_update_tiling_config(cmd, &pRenderPassBegin->renderArea);
tu_cmd_prepare_tile_store_ib(cmd);
/* Note: because this is external, any flushes will happen before draw_cs
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c
index ec2bae0b392..716c168d2e4 100644
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -2271,6 +2271,7 @@ tu_CreateFramebuffer(VkDevice _device,
VkFramebuffer *pFramebuffer)
{
TU_FROM_HANDLE(tu_device, device, _device);
+ TU_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
struct tu_framebuffer *framebuffer;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
@@ -2292,6 +2293,8 @@ tu_CreateFramebuffer(VkDevice _device,
framebuffer->attachments[i].attachment = iview;
}
+ tu_framebuffer_tiling_config(framebuffer, device, pass);
+
*pFramebuffer = tu_framebuffer_to_handle(framebuffer);
return VK_SUCCESS;
}
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index 0d59c3c0659..12e5b0739a6 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -654,36 +654,6 @@ struct tu_descriptor_state
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];
};
-struct tu_tile
-{
- uint8_t pipe;
- uint8_t slot;
- VkOffset2D begin;
- VkOffset2D end;
-};
-
-struct tu_tiling_config
-{
- VkRect2D render_area;
-
- /* position and size of the first tile */
- VkRect2D tile0;
- /* number of tiles */
- VkExtent2D tile_count;
-
- /* size of the first VSC pipe */
- VkExtent2D pipe0;
- /* number of VSC pipes */
- VkExtent2D pipe_count;
-
- /* pipe register values */
- uint32_t pipe_config[MAX_VSC_PIPES];
- uint32_t pipe_sizes[MAX_VSC_PIPES];
-
- /* Whether sysmem rendering must be used */
- bool force_sysmem;
-};
-
enum tu_cmd_dirty_bits
{
TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1,
@@ -859,8 +829,7 @@ struct tu_cmd_state
const struct tu_render_pass *pass;
const struct tu_subpass *subpass;
const struct tu_framebuffer *framebuffer;
-
- struct tu_tiling_config tiling_config;
+ VkRect2D render_area;
struct tu_cs_entry tile_store_ib;
@@ -1389,10 +1358,29 @@ struct tu_framebuffer
uint32_t height;
uint32_t layers;
+ /* size of the first tile */
+ VkExtent2D tile0;
+ /* number of tiles */
+ VkExtent2D tile_count;
+
+ /* size of the first VSC pipe */
+ VkExtent2D pipe0;
+ /* number of VSC pipes */
+ VkExtent2D pipe_count;
+
+ /* pipe register values */
+ uint32_t pipe_config[MAX_VSC_PIPES];
+ uint32_t pipe_sizes[MAX_VSC_PIPES];
+
uint32_t attachment_count;
struct tu_attachment_info attachments[0];
};
+void
+tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
+ const struct tu_device *device,
+ const struct tu_render_pass *pass);
+
struct tu_subpass_barrier {
VkPipelineStageFlags src_stage_mask;
VkAccessFlags src_access_mask;
diff --git a/src/freedreno/vulkan/tu_util.c b/src/freedreno/vulkan/tu_util.c
index 9a0e5cce4c9..ba1e4d53cd6 100644
--- a/src/freedreno/vulkan/tu_util.c
+++ b/src/freedreno/vulkan/tu_util.c
@@ -116,3 +116,133 @@ __vk_errorf(struct tu_instance *instance,
return error;
}
+
+static void
+tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb,
+ const struct tu_device *dev,
+ const struct tu_render_pass *pass)
+{
+ const uint32_t tile_align_w = pass->tile_align_w;
+ const uint32_t max_tile_width = 1024;
+
+ /* start from 1 tile */
+ fb->tile_count = (VkExtent2D) {
+ .width = 1,
+ .height = 1,
+ };
+ fb->tile0 = (VkExtent2D) {
+ .width = util_align_npot(fb->width, tile_align_w),
+ .height = align(fb->height, TILE_ALIGN_H),
+ };
+
+ if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
+ /* start with 2x2 tiles */
+ fb->tile_count.width = 2;
+ fb->tile_count.height = 2;
+ fb->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, 2), tile_align_w);
+ fb->tile0.height = align(DIV_ROUND_UP(fb->height, 2), TILE_ALIGN_H);
+ }
+
+ /* do not exceed max tile width */
+ while (fb->tile0.width > max_tile_width) {
+ fb->tile_count.width++;
+ fb->tile0.width =
+ util_align_npot(DIV_ROUND_UP(fb->width, fb->tile_count.width), tile_align_w);
+ }
+
+ /* will force to sysmem, don't bother trying to have a valid tile config
+ * TODO: just skip all GMEM stuff when sysmem is forced?
+ */
+ if (!pass->gmem_pixels)
+ return;
+
+ /* do not exceed gmem size */
+ while (fb->tile0.width * fb->tile0.height > pass->gmem_pixels) {
+ if (fb->tile0.width > MAX2(tile_align_w, fb->tile0.height)) {
+ fb->tile_count.width++;
+ fb->tile0.width =
+ util_align_npot(DIV_ROUND_UP(fb->width, fb->tile_count.width), tile_align_w);
+ } else {
+ /* if this assert fails then layout is impossible.. */
+ assert(fb->tile0.height > TILE_ALIGN_H);
+ fb->tile_count.height++;
+ fb->tile0.height =
+ align(DIV_ROUND_UP(fb->height, fb->tile_count.height), TILE_ALIGN_H);
+ }
+ }
+}
+
+static void
+tu_tiling_config_update_pipe_layout(struct tu_framebuffer *fb,
+ const struct tu_device *dev)
+{
+ const uint32_t max_pipe_count = 32; /* A6xx */
+
+ /* start from 1 tile per pipe */
+ fb->pipe0 = (VkExtent2D) {
+ .width = 1,
+ .height = 1,
+ };
+ fb->pipe_count = fb->tile_count;
+
+ while (fb->pipe_count.width * fb->pipe_count.height > max_pipe_count) {
+ if (fb->pipe0.width < fb->pipe0.height) {
+ fb->pipe0.width += 1;
+ fb->pipe_count.width =
+ DIV_ROUND_UP(fb->tile_count.width, fb->pipe0.width);
+ } else {
+ fb->pipe0.height += 1;
+ fb->pipe_count.height =
+ DIV_ROUND_UP(fb->tile_count.height, fb->pipe0.height);
+ }
+ }
+}
+
+static void
+tu_tiling_config_update_pipes(struct tu_framebuffer *fb,
+ const struct tu_device *dev)
+{
+ const uint32_t max_pipe_count = 32; /* A6xx */
+ const uint32_t used_pipe_count =
+ fb->pipe_count.width * fb->pipe_count.height;
+ const VkExtent2D last_pipe = {
+ .width = (fb->tile_count.width - 1) % fb->pipe0.width + 1,
+ .height = (fb->tile_count.height - 1) % fb->pipe0.height + 1,
+ };
+
+ assert(used_pipe_count <= max_pipe_count);
+ assert(max_pipe_count <= ARRAY_SIZE(fb->pipe_config));
+
+ for (uint32_t y = 0; y < fb->pipe_count.height; y++) {
+ for (uint32_t x = 0; x < fb->pipe_count.width; x++) {
+ const uint32_t pipe_x = fb->pipe0.width * x;
+ const uint32_t pipe_y = fb->pipe0.height * y;
+ const uint32_t pipe_w = (x == fb->pipe_count.width - 1)
+ ? last_pipe.width
+ : fb->pipe0.width;
+ const uint32_t pipe_h = (y == fb->pipe_count.height - 1)
+ ? last_pipe.height
+ : fb->pipe0.height;
+ const uint32_t n = fb->pipe_count.width * y + x;
+
+ fb->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
+ A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
+ A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
+ A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
+ fb->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
+ }
+ }
+
+ memset(fb->pipe_config + used_pipe_count, 0,
+ sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
+}
+
+void
+tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
+ const struct tu_device *device,
+ const struct tu_render_pass *pass)
+{
+ tu_tiling_config_update_tile_layout(fb, device, pass);
+ tu_tiling_config_update_pipe_layout(fb, device);
+ tu_tiling_config_update_pipes(fb, device);
+}