summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2017-01-20 11:06:52 +1000
committerDave Airlie <[email protected]>2017-01-31 09:30:19 +1000
commit1fa5b755c23f97d6a92465b08d460bb4406e44dd (patch)
treed1c7eb48846f24e5c1ed7ce9c648493dc017e7c8
parent8f41fe4389a820d243651a31c7e44ba57735b3ff (diff)
radv: emit geometry ring size and pointers via preamble (v2)
This uses the scratch infrastructure to handle the esgs and gsvs rings. (this replaces the old code that did this with patching). v2: fix correct ring sizes, reset sizes (Bas) Reviewed-by: Bas Nieuwenhuizen <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c31
-rw-r--r--src/amd/vulkan/radv_device.c202
-rw-r--r--src/amd/vulkan/radv_private.h8
3 files changed, 230 insertions, 11 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 3d1542d4663..9bc50ad0929 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1457,12 +1457,17 @@ static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
cmd_buffer->scratch_size_needed = 0;
cmd_buffer->compute_scratch_size_needed = 0;
+ cmd_buffer->esgs_ring_size_needed = 0;
+ cmd_buffer->gsvs_ring_size_needed = 0;
+
if (cmd_buffer->upload.upload_bo)
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
cmd_buffer->upload.upload_bo, 8);
cmd_buffer->upload.offset = 0;
cmd_buffer->record_fail = false;
+
+ cmd_buffer->ring_offsets_idx = -1;
}
VkResult radv_ResetCommandBuffer(
@@ -1649,6 +1654,7 @@ VkResult radv_EndCommandBuffer(
if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER)
si_emit_cache_flush(cmd_buffer);
+
if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) ||
cmd_buffer->record_fail)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -1735,6 +1741,20 @@ void radv_CmdBindPipeline(
radv_dynamic_state_copy(&cmd_buffer->state.dynamic,
&pipeline->dynamic_state,
pipeline->dynamic_state_mask);
+
+ if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed)
+ cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size;
+ if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed)
+ cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size;
+
+ if (radv_pipeline_has_gs(pipeline)) {
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
+ AC_UD_SCRATCH_RING_OFFSETS);
+ if (cmd_buffer->ring_offsets_idx == -1)
+ cmd_buffer->ring_offsets_idx = loc->sgpr_idx;
+ else if (loc->sgpr_idx != -1)
+ assert(loc->sgpr_idx != cmd_buffer->ring_offsets_idx);
+ }
break;
default:
assert(!"invalid bind point");
@@ -1887,6 +1907,17 @@ void radv_CmdExecuteCommands(
primary->compute_scratch_size_needed = MAX2(primary->compute_scratch_size_needed,
secondary->compute_scratch_size_needed);
+ if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed)
+ primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
+ if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed)
+ primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
+
+ if (secondary->ring_offsets_idx != -1) {
+ if (primary->ring_offsets_idx == -1)
+ primary->ring_offsets_idx = secondary->ring_offsets_idx;
+ else
+ assert(secondary->ring_offsets_idx == primary->ring_offsets_idx);
+ }
primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs);
}
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index b7978bb16a2..984bd75bb80 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -764,6 +764,10 @@ radv_queue_finish(struct radv_queue *queue)
queue->device->ws->buffer_destroy(queue->descriptor_bo);
if (queue->scratch_bo)
queue->device->ws->buffer_destroy(queue->scratch_bo);
+ if (queue->esgs_ring_bo)
+ queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
+ if (queue->gsvs_ring_bo)
+ queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
if (queue->compute_scratch_bo)
queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
}
@@ -1046,24 +1050,118 @@ static void radv_dump_trace(struct radv_device *device,
fclose(f);
}
+static void
+fill_geom_rings(struct radv_queue *queue,
+ uint32_t *map,
+ uint32_t esgs_ring_size,
+ struct radeon_winsys_bo *esgs_ring_bo,
+ uint32_t gsvs_ring_size,
+ struct radeon_winsys_bo *gsvs_ring_bo)
+{
+ uint64_t esgs_va, gsvs_va;
+ esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
+ gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
+ uint32_t *desc = &map[4];
+
+ /* stride 0, num records - size, add tid, swizzle, elsize4,
+ index stride 64 */
+ desc[0] = esgs_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
+ S_008F04_STRIDE(0) |
+ S_008F04_SWIZZLE_ENABLE(true);
+ desc[2] = esgs_ring_size;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(1) |
+ S_008F0C_INDEX_STRIDE(3) |
+ S_008F0C_ADD_TID_ENABLE(true);
+
+ desc += 4;
+ /* GS entry for ES->GS ring */
+ /* stride 0, num records - size, elsize0,
+ index stride 0 */
+ desc[0] = esgs_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
+ S_008F04_STRIDE(0) |
+ S_008F04_SWIZZLE_ENABLE(false);
+ desc[2] = esgs_ring_size;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(0) |
+ S_008F0C_INDEX_STRIDE(0) |
+ S_008F0C_ADD_TID_ENABLE(false);
+
+ desc += 4;
+ /* VS entry for GS->VS ring */
+ /* stride 0, num records - size, elsize0,
+ index stride 0 */
+ desc[0] = gsvs_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
+ S_008F04_STRIDE(0) |
+ S_008F04_SWIZZLE_ENABLE(false);
+ desc[2] = gsvs_ring_size;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(0) |
+ S_008F0C_INDEX_STRIDE(0) |
+ S_008F0C_ADD_TID_ENABLE(false);
+ desc += 4;
+
+ /* stride gsvs_itemsize, num records 64
+ elsize 4, index stride 16 */
+ /* shader will patch stride and desc[2] */
+ desc[0] = gsvs_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
+ S_008F04_STRIDE(0) |
+ S_008F04_SWIZZLE_ENABLE(true);
+ desc[2] = 0;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(1) |
+ S_008F0C_INDEX_STRIDE(1) |
+ S_008F0C_ADD_TID_ENABLE(true);
+}
+
static VkResult
radv_get_preamble_cs(struct radv_queue *queue,
uint32_t scratch_size,
uint32_t compute_scratch_size,
+ uint32_t esgs_ring_size,
+ uint32_t gsvs_ring_size,
struct radeon_winsys_cs **preamble_cs)
{
struct radeon_winsys_bo *scratch_bo = NULL;
struct radeon_winsys_bo *descriptor_bo = NULL;
struct radeon_winsys_bo *compute_scratch_bo = NULL;
+ struct radeon_winsys_bo *esgs_ring_bo = NULL;
+ struct radeon_winsys_bo *gsvs_ring_bo = NULL;
struct radeon_winsys_cs *cs = NULL;
- if (!scratch_size && !compute_scratch_size) {
+ if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) {
*preamble_cs = NULL;
return VK_SUCCESS;
}
if (scratch_size <= queue->scratch_size &&
- compute_scratch_size <= queue->compute_scratch_size) {
+ compute_scratch_size <= queue->compute_scratch_size &&
+ esgs_ring_size <= queue->esgs_ring_size &&
+ gsvs_ring_size <= queue->gsvs_ring_size) {
*preamble_cs = queue->preamble_cs;
return VK_SUCCESS;
}
@@ -1091,9 +1189,43 @@ radv_get_preamble_cs(struct radv_queue *queue,
} else
compute_scratch_bo = queue->compute_scratch_bo;
- if (scratch_bo != queue->scratch_bo) {
+ if (esgs_ring_size > queue->esgs_ring_size) {
+ esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
+ esgs_ring_size,
+ 4096,
+ RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_NO_CPU_ACCESS);
+ if (!esgs_ring_bo)
+ goto fail;
+ } else {
+ esgs_ring_bo = queue->esgs_ring_bo;
+ esgs_ring_size = queue->esgs_ring_size;
+ }
+
+ if (gsvs_ring_size > queue->gsvs_ring_size) {
+ gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
+ gsvs_ring_size,
+ 4096,
+ RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_NO_CPU_ACCESS);
+ if (!gsvs_ring_bo)
+ goto fail;
+ } else {
+ gsvs_ring_bo = queue->gsvs_ring_bo;
+ gsvs_ring_size = queue->gsvs_ring_size;
+ }
+
+ if (scratch_bo != queue->scratch_bo ||
+ esgs_ring_bo != queue->esgs_ring_bo ||
+ gsvs_ring_bo != queue->gsvs_ring_bo) {
+ uint32_t size = 0;
+ if (gsvs_ring_bo || esgs_ring_bo)
+ size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
+ else if (scratch_bo)
+ size = 8; /* 2 dword */
+
descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
- 8,
+ size,
4096,
RADEON_DOMAIN_VRAM,
RADEON_FLAG_CPU_ACCESS);
@@ -1111,22 +1243,49 @@ radv_get_preamble_cs(struct radv_queue *queue,
if (scratch_bo)
queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
+ if (esgs_ring_bo)
+ queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
+
+ if (gsvs_ring_bo)
+ queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
+
if (descriptor_bo)
queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
if (descriptor_bo != queue->descriptor_bo) {
- uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
- uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
- S_008F04_SWIZZLE_ENABLE(1);
-
uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
- map[0] = scratch_va;
- map[1] = rsrc1;
+ if (scratch_bo) {
+ uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
+ uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
+ S_008F04_SWIZZLE_ENABLE(1);
+ map[0] = scratch_va;
+ map[1] = rsrc1;
+ }
+
+ if (esgs_ring_bo || gsvs_ring_bo)
+ fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
queue->device->ws->buffer_unmap(descriptor_bo);
}
+ if (esgs_ring_bo || gsvs_ring_bo) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+
+ if (queue->device->physical_device->rad_info.chip_class >= CIK) {
+ radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
+ radeon_emit(cs, esgs_ring_size >> 8);
+ radeon_emit(cs, gsvs_ring_size >> 8);
+ } else {
+ radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
+ radeon_emit(cs, esgs_ring_size >> 8);
+ radeon_emit(cs, gsvs_ring_size >> 8);
+ }
+ }
+
if (descriptor_bo) {
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
R_00B130_SPI_SHADER_USER_DATA_VS_0,
@@ -1178,6 +1337,20 @@ radv_get_preamble_cs(struct radv_queue *queue,
queue->compute_scratch_size = compute_scratch_size;
}
+ if (esgs_ring_bo != queue->esgs_ring_bo) {
+ if (queue->esgs_ring_bo)
+ queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
+ queue->esgs_ring_bo = esgs_ring_bo;
+ queue->esgs_ring_size = esgs_ring_size;
+ }
+
+ if (gsvs_ring_bo != queue->gsvs_ring_bo) {
+ if (queue->gsvs_ring_bo)
+ queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
+ queue->gsvs_ring_bo = gsvs_ring_bo;
+ queue->gsvs_ring_size = gsvs_ring_size;
+ }
+
if (descriptor_bo != queue->descriptor_bo) {
if (queue->descriptor_bo)
queue->device->ws->buffer_destroy(queue->descriptor_bo);
@@ -1196,6 +1369,10 @@ fail:
queue->device->ws->buffer_destroy(scratch_bo);
if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
queue->device->ws->buffer_destroy(compute_scratch_bo);
+ if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
+ queue->device->ws->buffer_destroy(esgs_ring_bo);
+ if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
+ queue->device->ws->buffer_destroy(gsvs_ring_bo);
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
@@ -1213,6 +1390,7 @@ VkResult radv_QueueSubmit(
uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
uint32_t scratch_size = 0;
uint32_t compute_scratch_size = 0;
+ uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
struct radeon_winsys_cs *preamble_cs = NULL;
VkResult result;
@@ -1226,10 +1404,12 @@ VkResult radv_QueueSubmit(
scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
compute_scratch_size = MAX2(compute_scratch_size,
cmd_buffer->compute_scratch_size_needed);
+ esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
+ gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
}
}
- result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, &preamble_cs);
+ result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, esgs_ring_size, gsvs_ring_size, &preamble_cs);
if (result != VK_SUCCESS)
return result;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 3a0318b9fc2..57aa9ead9b7 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -470,10 +470,14 @@ struct radv_queue {
uint32_t scratch_size;
uint32_t compute_scratch_size;
+ uint32_t esgs_ring_size;
+ uint32_t gsvs_ring_size;
struct radeon_winsys_bo *scratch_bo;
struct radeon_winsys_bo *descriptor_bo;
struct radeon_winsys_bo *compute_scratch_bo;
+ struct radeon_winsys_bo *esgs_ring_bo;
+ struct radeon_winsys_bo *gsvs_ring_bo;
struct radeon_winsys_cs *preamble_cs;
};
@@ -742,6 +746,10 @@ struct radv_cmd_buffer {
uint32_t scratch_size_needed;
uint32_t compute_scratch_size_needed;
+ uint32_t esgs_ring_size_needed;
+ uint32_t gsvs_ring_size_needed;
+
+ int ring_offsets_idx; /* just used for verification */
};
struct radv_image;