diff options
Diffstat (limited to 'src/amd/vulkan')
-rw-r--r-- | src/amd/vulkan/radv_cmd_buffer.c | 31 | ||||
-rw-r--r-- | src/amd/vulkan/radv_device.c | 202 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 8 |
3 files changed, 230 insertions, 11 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 3d1542d4663..9bc50ad0929 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1457,12 +1457,17 @@ static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->scratch_size_needed = 0; cmd_buffer->compute_scratch_size_needed = 0; + cmd_buffer->esgs_ring_size_needed = 0; + cmd_buffer->gsvs_ring_size_needed = 0; + if (cmd_buffer->upload.upload_bo) cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, cmd_buffer->upload.upload_bo, 8); cmd_buffer->upload.offset = 0; cmd_buffer->record_fail = false; + + cmd_buffer->ring_offsets_idx = -1; } VkResult radv_ResetCommandBuffer( @@ -1649,6 +1654,7 @@ VkResult radv_EndCommandBuffer( if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) si_emit_cache_flush(cmd_buffer); + if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) || cmd_buffer->record_fail) return VK_ERROR_OUT_OF_DEVICE_MEMORY; @@ -1735,6 +1741,20 @@ void radv_CmdBindPipeline( radv_dynamic_state_copy(&cmd_buffer->state.dynamic, &pipeline->dynamic_state, pipeline->dynamic_state_mask); + + if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) + cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size; + if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed) + cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size; + + if (radv_pipeline_has_gs(pipeline)) { + struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, + AC_UD_SCRATCH_RING_OFFSETS); + if (cmd_buffer->ring_offsets_idx == -1) + cmd_buffer->ring_offsets_idx = loc->sgpr_idx; + else if (loc->sgpr_idx != -1) + assert(loc->sgpr_idx != cmd_buffer->ring_offsets_idx); + } break; default: assert(!"invalid bind point"); @@ -1887,6 +1907,17 @@ void radv_CmdExecuteCommands( primary->compute_scratch_size_needed = MAX2(primary->compute_scratch_size_needed, secondary->compute_scratch_size_needed); + if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed) + primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed; + if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed) + primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed; + + if (secondary->ring_offsets_idx != -1) { + if (primary->ring_offsets_idx == -1) + primary->ring_offsets_idx = secondary->ring_offsets_idx; + else + assert(secondary->ring_offsets_idx == primary->ring_offsets_idx); + } primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs); } diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index b7978bb16a2..984bd75bb80 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -764,6 +764,10 @@ radv_queue_finish(struct radv_queue *queue) queue->device->ws->buffer_destroy(queue->descriptor_bo); if (queue->scratch_bo) queue->device->ws->buffer_destroy(queue->scratch_bo); + if (queue->esgs_ring_bo) + queue->device->ws->buffer_destroy(queue->esgs_ring_bo); + if (queue->gsvs_ring_bo) + queue->device->ws->buffer_destroy(queue->gsvs_ring_bo); if (queue->compute_scratch_bo) queue->device->ws->buffer_destroy(queue->compute_scratch_bo); } @@ -1046,24 +1050,118 @@ static void radv_dump_trace(struct radv_device *device, fclose(f); } +static void +fill_geom_rings(struct radv_queue *queue, + uint32_t *map, + uint32_t esgs_ring_size, + struct radeon_winsys_bo *esgs_ring_bo, + uint32_t gsvs_ring_size, + struct radeon_winsys_bo *gsvs_ring_bo) +{ + uint64_t esgs_va, gsvs_va; + esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo); + gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo); + uint32_t *desc = &map[4]; + + /* stride 0, num records - size, add tid, swizzle, elsize4, + index stride 64 */ + desc[0] = esgs_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(true); + desc[2] = esgs_ring_size; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1) | + S_008F0C_INDEX_STRIDE(3) | + S_008F0C_ADD_TID_ENABLE(true); + + desc += 4; + /* GS entry for ES->GS ring */ + /* stride 0, num records - size, elsize0, + index stride 0 */ + desc[0] = esgs_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)| + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(false); + desc[2] = esgs_ring_size; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(0) | + S_008F0C_INDEX_STRIDE(0) | + S_008F0C_ADD_TID_ENABLE(false); + + desc += 4; + /* VS entry for GS->VS ring */ + /* stride 0, num records - size, elsize0, + index stride 0 */ + desc[0] = gsvs_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(false); + desc[2] = gsvs_ring_size; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(0) | + S_008F0C_INDEX_STRIDE(0) | + S_008F0C_ADD_TID_ENABLE(false); + desc += 4; + + /* stride gsvs_itemsize, num records 64 + elsize 4, index stride 16 */ + /* shader will patch stride and desc[2] */ + desc[0] = gsvs_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(true); + desc[2] = 0; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1) | + S_008F0C_INDEX_STRIDE(1) | + S_008F0C_ADD_TID_ENABLE(true); +} + static VkResult radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size, uint32_t compute_scratch_size, + uint32_t esgs_ring_size, + uint32_t gsvs_ring_size, struct radeon_winsys_cs **preamble_cs) { struct radeon_winsys_bo *scratch_bo = NULL; struct radeon_winsys_bo *descriptor_bo = NULL; struct radeon_winsys_bo *compute_scratch_bo = NULL; + struct radeon_winsys_bo *esgs_ring_bo = NULL; + struct radeon_winsys_bo *gsvs_ring_bo = NULL; struct radeon_winsys_cs *cs = NULL; - if (!scratch_size && !compute_scratch_size) { + if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) { *preamble_cs = NULL; return VK_SUCCESS; } if (scratch_size <= queue->scratch_size && - compute_scratch_size <= queue->compute_scratch_size) { + compute_scratch_size <= queue->compute_scratch_size && + esgs_ring_size <= queue->esgs_ring_size && + gsvs_ring_size <= queue->gsvs_ring_size) { *preamble_cs = queue->preamble_cs; return VK_SUCCESS; } @@ -1091,9 +1189,43 @@ radv_get_preamble_cs(struct radv_queue *queue, } else compute_scratch_bo = queue->compute_scratch_bo; - if (scratch_bo != queue->scratch_bo) { + if (esgs_ring_size > queue->esgs_ring_size) { + esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, + esgs_ring_size, + 4096, + RADEON_DOMAIN_VRAM, + RADEON_FLAG_NO_CPU_ACCESS); + if (!esgs_ring_bo) + goto fail; + } else { + esgs_ring_bo = queue->esgs_ring_bo; + esgs_ring_size = queue->esgs_ring_size; + } + + if (gsvs_ring_size > queue->gsvs_ring_size) { + gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, + gsvs_ring_size, + 4096, + RADEON_DOMAIN_VRAM, + RADEON_FLAG_NO_CPU_ACCESS); + if (!gsvs_ring_bo) + goto fail; + } else { + gsvs_ring_bo = queue->gsvs_ring_bo; + gsvs_ring_size = queue->gsvs_ring_size; + } + + if (scratch_bo != queue->scratch_bo || + esgs_ring_bo != queue->esgs_ring_bo || + gsvs_ring_bo != queue->gsvs_ring_bo) { + uint32_t size = 0; + if (gsvs_ring_bo || esgs_ring_bo) + size = 80; /* 2 dword + 2 padding + 4 dword * 4 */ + else if (scratch_bo) + size = 8; /* 2 dword */ + descriptor_bo = queue->device->ws->buffer_create(queue->device->ws, - 8, + size, 4096, RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS); @@ -1111,22 +1243,49 @@ radv_get_preamble_cs(struct radv_queue *queue, if (scratch_bo) queue->device->ws->cs_add_buffer(cs, scratch_bo, 8); + if (esgs_ring_bo) + queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8); + + if (gsvs_ring_bo) + queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8); + if (descriptor_bo) queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8); if (descriptor_bo != queue->descriptor_bo) { - uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo); - uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | - S_008F04_SWIZZLE_ENABLE(1); - uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo); - map[0] = scratch_va; - map[1] = rsrc1; + if (scratch_bo) { + uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo); + uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | + S_008F04_SWIZZLE_ENABLE(1); + map[0] = scratch_va; + map[1] = rsrc1; + } + + if (esgs_ring_bo || gsvs_ring_bo) + fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo); queue->device->ws->buffer_unmap(descriptor_bo); } + if (esgs_ring_bo || gsvs_ring_bo) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); + + if (queue->device->physical_device->rad_info.chip_class >= CIK) { + radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2); + radeon_emit(cs, esgs_ring_size >> 8); + radeon_emit(cs, gsvs_ring_size >> 8); + } else { + radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2); + radeon_emit(cs, esgs_ring_size >> 8); + radeon_emit(cs, gsvs_ring_size >> 8); + } + } + if (descriptor_bo) { uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, @@ -1178,6 +1337,20 @@ radv_get_preamble_cs(struct radv_queue *queue, queue->compute_scratch_size = compute_scratch_size; } + if (esgs_ring_bo != queue->esgs_ring_bo) { + if (queue->esgs_ring_bo) + queue->device->ws->buffer_destroy(queue->esgs_ring_bo); + queue->esgs_ring_bo = esgs_ring_bo; + queue->esgs_ring_size = esgs_ring_size; + } + + if (gsvs_ring_bo != queue->gsvs_ring_bo) { + if (queue->gsvs_ring_bo) + queue->device->ws->buffer_destroy(queue->gsvs_ring_bo); + queue->gsvs_ring_bo = gsvs_ring_bo; + queue->gsvs_ring_size = gsvs_ring_size; + } + if (descriptor_bo != queue->descriptor_bo) { if (queue->descriptor_bo) queue->device->ws->buffer_destroy(queue->descriptor_bo); @@ -1196,6 +1369,10 @@ fail: queue->device->ws->buffer_destroy(scratch_bo); if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo) queue->device->ws->buffer_destroy(compute_scratch_bo); + if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo) + queue->device->ws->buffer_destroy(esgs_ring_bo); + if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo) + queue->device->ws->buffer_destroy(gsvs_ring_bo); return VK_ERROR_OUT_OF_DEVICE_MEMORY; } @@ -1213,6 +1390,7 @@ VkResult radv_QueueSubmit( uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX; uint32_t scratch_size = 0; uint32_t compute_scratch_size = 0; + uint32_t esgs_ring_size = 0, gsvs_ring_size = 0; struct radeon_winsys_cs *preamble_cs = NULL; VkResult result; @@ -1226,10 +1404,12 @@ VkResult radv_QueueSubmit( scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed); compute_scratch_size = MAX2(compute_scratch_size, cmd_buffer->compute_scratch_size_needed); + esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed); + gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed); } } - result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, &preamble_cs); + result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, esgs_ring_size, gsvs_ring_size, &preamble_cs); if (result != VK_SUCCESS) return result; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 3a0318b9fc2..57aa9ead9b7 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -470,10 +470,14 @@ struct radv_queue { uint32_t scratch_size; uint32_t compute_scratch_size; + uint32_t esgs_ring_size; + uint32_t gsvs_ring_size; struct radeon_winsys_bo *scratch_bo; struct radeon_winsys_bo *descriptor_bo; struct radeon_winsys_bo *compute_scratch_bo; + struct radeon_winsys_bo *esgs_ring_bo; + struct radeon_winsys_bo *gsvs_ring_bo; struct radeon_winsys_cs *preamble_cs; }; @@ -742,6 +746,10 @@ struct radv_cmd_buffer { uint32_t scratch_size_needed; uint32_t compute_scratch_size_needed; + uint32_t esgs_ring_size_needed; + uint32_t gsvs_ring_size_needed; + + int ring_offsets_idx; /* just used for verification */ }; struct radv_image; |