summaryrefslogtreecommitdiffstats
path: root/src/freedreno
diff options
context:
space:
mode:
authorHyunjun Ko <[email protected]>2020-02-25 10:08:25 +0900
committerMarge Bot <[email protected]>2020-03-12 03:10:16 +0000
commit9ff1959ca5d24afe48bec20edf7e3d059d254134 (patch)
treee8445c0611b8dca3bc419fad7234d59c068dcaff /src/freedreno
parent374406a7c420d266f920461f904864a94dc1b8c8 (diff)
turnip: Implement stream-out emit and vkApis for transform feedback
1. Implement vkCmdBindTransformFeedbackBuffersEXT, vkCmdBeginTransformFeedbackEXT and vkCmdEndTransformFeedbackEXT. - Not handling counter buffers yet. 2. Implement streamout emit function, mostly taken from fd6_emit.c v2. Replace emit_pkt4 funcs with emit_regs. v3. Don't copy the state of stream-output from tu_pipeline. v4. Set zero to VPC_SO_CNTL/VPC_SO_BUF_CNTL in tu6_init_hw. Signed-off-by: Hyunjun Ko <[email protected]> Reviewed-by: Jonathan Marek <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3942>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/vulkan/tu_cmd_buffer.c179
-rw-r--r--src/freedreno/vulkan/tu_extensions.py1
-rw-r--r--src/freedreno/vulkan/tu_private.h1
3 files changed, 148 insertions, 33 deletions
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 74997b31933..4367f8e4dfb 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -798,7 +798,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
tu6_emit_window_offset(cmd, cs, x1, y1);
tu_cs_emit_regs(cs,
- A6XX_VPC_SO_OVERRIDE(.so_disable = true));
+ A6XX_VPC_SO_OVERRIDE(.so_disable = false));
if (use_hw_binning(cmd)) {
tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
@@ -1144,38 +1144,12 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
- tu_cs_emit_regs(cs,
- A6XX_VPC_SO_BUFFER_BASE(0),
- A6XX_VPC_SO_BUFFER_SIZE(0));
-
- tu_cs_emit_regs(cs,
- A6XX_VPC_SO_FLUSH_BASE(0));
-
- tu_cs_emit_regs(cs,
- A6XX_VPC_SO_BUF_CNTL(0));
-
- tu_cs_emit_regs(cs,
- A6XX_VPC_SO_BUFFER_OFFSET(0, 0));
-
- tu_cs_emit_regs(cs,
- A6XX_VPC_SO_BUFFER_BASE(1, 0),
- A6XX_VPC_SO_BUFFER_SIZE(1, 0));
-
- tu_cs_emit_regs(cs,
- A6XX_VPC_SO_BUFFER_OFFSET(1, 0),
- A6XX_VPC_SO_FLUSH_BASE(1, 0),
- A6XX_VPC_SO_BUFFER_BASE(2, 0),
- A6XX_VPC_SO_BUFFER_SIZE(2, 0));
-
- tu_cs_emit_regs(cs,
- A6XX_VPC_SO_BUFFER_OFFSET(2, 0),
- A6XX_VPC_SO_FLUSH_BASE(2, 0),
- A6XX_VPC_SO_BUFFER_BASE(3, 0),
- A6XX_VPC_SO_BUFFER_SIZE(3, 0));
-
- tu_cs_emit_regs(cs,
- A6XX_VPC_SO_BUFFER_OFFSET(3, 0),
- A6XX_VPC_SO_FLUSH_BASE(3, 0));
+ /* Set not to use streamout by default, */
+ tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 4);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_CNTL);
+ tu_cs_emit(cs, 0);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_BUF_CNTL);
+ tu_cs_emit(cs, 0);
tu_cs_emit_regs(cs,
A6XX_SP_HS_CTRL_REG0(0));
@@ -1577,6 +1551,9 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
if (use_hw_binning(cmd)) {
+ /* enable stream-out during binning pass: */
+ tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=false));
+
tu6_emit_bin_size(cs,
tiling->tile0.extent.width,
tiling->tile0.extent.height,
@@ -1586,6 +1563,9 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu6_emit_binning_pass(cmd, cs);
+ /* and disable stream-out for draw pass: */
+ tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=true));
+
tu6_emit_bin_size(cs,
tiling->tile0.extent.width,
tiling->tile0.extent.height,
@@ -1601,6 +1581,9 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
tu_cs_emit(cs, 0x1);
} else {
+ /* no binning pass, so enable stream-out for draw pass:: */
+ tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=false));
+
tu6_emit_bin_size(cs,
tiling->tile0.extent.width,
tiling->tile0.extent.height,
@@ -2173,6 +2156,56 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
cmd_buffer->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS;
}
+void tu_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer,
+ uint32_t firstBinding,
+ uint32_t bindingCount,
+ const VkBuffer *pBuffers,
+ const VkDeviceSize *pOffsets,
+ const VkDeviceSize *pSizes)
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ assert(firstBinding + bindingCount <= IR3_MAX_SO_BUFFERS);
+
+ for (uint32_t i = 0; i < bindingCount; i++) {
+ uint32_t idx = firstBinding + i;
+ TU_FROM_HANDLE(tu_buffer, buf, pBuffers[i]);
+
+ if (pOffsets[i] != 0)
+ cmd->state.streamout_reset |= 1 << idx;
+
+ cmd->state.streamout_buf.buffers[idx] = buf;
+ cmd->state.streamout_buf.offsets[idx] = pOffsets[i];
+ cmd->state.streamout_buf.sizes[idx] = pSizes[i];
+
+ cmd->state.streamout_enabled |= 1 << idx;
+ }
+
+ cmd->state.dirty |= TU_CMD_DIRTY_STREAMOUT_BUFFERS;
+}
+
+void tu_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer,
+ uint32_t firstCounterBuffer,
+ uint32_t counterBufferCount,
+ const VkBuffer *pCounterBuffers,
+ const VkDeviceSize *pCounterBufferOffsets)
+{
+ assert(firstCounterBuffer + counterBufferCount <= IR3_MAX_SO_BUFFERS);
+ /* TODO do something with counter buffer? */
+}
+
+void tu_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer,
+ uint32_t firstCounterBuffer,
+ uint32_t counterBufferCount,
+ const VkBuffer *pCounterBuffers,
+ const VkDeviceSize *pCounterBufferOffsets)
+{
+ assert(firstCounterBuffer + counterBufferCount <= IR3_MAX_SO_BUFFERS);
+ /* TODO do something with counter buffer? */
+
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ cmd->state.streamout_enabled = 0;
+}
+
void
tu_CmdPushConstants(VkCommandBuffer commandBuffer,
VkPipelineLayout layout,
@@ -3374,6 +3407,67 @@ tu6_emit_border_color(struct tu_cmd_buffer *cmd,
return VK_SUCCESS;
}
+static void
+tu6_emit_streamout(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+{
+ struct tu_streamout_state *tf = &cmd->state.pipeline->streamout;
+
+ for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
+ struct tu_buffer *buf = cmd->state.streamout_buf.buffers[i];
+ if (!buf)
+ continue;
+
+ uint32_t offset;
+ offset = cmd->state.streamout_buf.offsets[i];
+
+ tu_cs_emit_regs(cs, A6XX_VPC_SO_BUFFER_BASE(i, .bo = buf->bo,
+ .bo_offset = buf->bo_offset));
+ tu_cs_emit_regs(cs, A6XX_VPC_SO_BUFFER_SIZE(i, buf->size));
+
+ if (cmd->state.streamout_reset & (1 << i)) {
+ offset *= tf->stride[i];
+
+ tu_cs_emit_regs(cs, A6XX_VPC_SO_BUFFER_OFFSET(i, offset));
+ cmd->state.streamout_reset &= ~(1 << i);
+ } else {
+ tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
+ tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) |
+ CP_MEM_TO_REG_0_SHIFT_BY_2 | CP_MEM_TO_REG_0_UNK31 |
+ CP_MEM_TO_REG_0_CNT(0));
+ tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_FLUSH * (i + 1));
+ }
+
+ tu_cs_emit_regs(cs, A6XX_VPC_SO_FLUSH_BASE(i, .bo = &cmd->scratch_bo,
+ .bo_offset = VSC_FLUSH * (i + 1)));
+ }
+
+ if (cmd->state.streamout_enabled) {
+ tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 12 + (2 * tf->prog_count));
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_BUF_CNTL);
+ tu_cs_emit(cs, tf->vpc_so_buf_cntl);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_NCOMP(0));
+ tu_cs_emit(cs, tf->ncomp[0]);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_NCOMP(1));
+ tu_cs_emit(cs, tf->ncomp[1]);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_NCOMP(2));
+ tu_cs_emit(cs, tf->ncomp[2]);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_NCOMP(3));
+ tu_cs_emit(cs, tf->ncomp[3]);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_CNTL);
+ tu_cs_emit(cs, A6XX_VPC_SO_CNTL_ENABLE);
+ for (unsigned i = 0; i < tf->prog_count; i++) {
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_PROG);
+ tu_cs_emit(cs, tf->prog[i]);
+ }
+ } else {
+ tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 4);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_CNTL);
+ tu_cs_emit(cs, 0);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_BUF_CNTL);
+ tu_cs_emit(cs, 0);
+ }
+}
+
static VkResult
tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
@@ -3505,6 +3599,9 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
};
}
+ if (cmd->state.dirty & TU_CMD_DIRTY_STREAMOUT_BUFFERS)
+ tu6_emit_streamout(cmd, cs);
+
if (cmd->state.dirty &
(TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DESCRIPTOR_SETS)) {
bool needs_border = false;
@@ -3623,6 +3720,15 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
}
}
}
+ if (cmd->state.dirty & TU_CMD_DIRTY_STREAMOUT_BUFFERS) {
+ for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
+ const struct tu_buffer *buf = cmd->state.streamout_buf.buffers[i];
+ if (buf) {
+ tu_bo_list_add(&cmd->bo_list, buf->bo,
+ MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
+ }
+ }
+ }
/* Fragment shader state overwrites compute shader state, so flag the
* compute pipeline for re-emit.
@@ -3742,6 +3848,13 @@ tu_draw(struct tu_cmd_buffer *cmd, const struct tu_draw_info *draw)
else
tu6_emit_draw_direct(cmd, cs, draw);
+ if (cmd->state.streamout_enabled) {
+ for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
+ if (cmd->state.streamout_enabled & (1 << i))
+ tu6_emit_event_write(cmd, cs, FLUSH_SO_0 + i, false);
+ }
+ }
+
cmd->wait_for_idle = true;
tu_cs_sanity_check(cs);
diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py
index 3e1a7cf6625..efeb8dc0c89 100644
--- a/src/freedreno/vulkan/tu_extensions.py
+++ b/src/freedreno/vulkan/tu_extensions.py
@@ -77,6 +77,7 @@ EXTENSIONS = [
Extension('VK_KHR_external_memory_fd', 1, True),
Extension('VK_EXT_external_memory_dma_buf', 1, True),
Extension('VK_EXT_image_drm_format_modifier', 1, False),
+ Extension('VK_EXT_transform_feedback', 1, False),
]
class VkVersion:
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index d04fe99aad1..661529fa537 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -984,6 +984,7 @@ struct tu_cmd_buffer
uint32_t scratch_seqno;
#define VSC_OVERFLOW 0x8
#define VSC_SCRATCH 0x10
+#define VSC_FLUSH 0x20
struct tu_bo vsc_data;
struct tu_bo vsc_data2;