diff options
Diffstat (limited to 'src/vulkan/anv_aub.c')
-rw-r--r-- | src/vulkan/anv_aub.c | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/src/vulkan/anv_aub.c b/src/vulkan/anv_aub.c new file mode 100644 index 00000000000..97f124a0aad --- /dev/null +++ b/src/vulkan/anv_aub.c @@ -0,0 +1,310 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/mman.h> + +#include <drm.h> +#include <i915_drm.h> + +#include "anv_private.h" +#include "anv_aub.h" + +struct anv_aub_writer { + FILE *file; + uint32_t offset; + int gen; +}; + +static void +aub_out(struct anv_aub_writer *writer, uint32_t data) +{ + fwrite(&data, 1, 4, writer->file); +} + +static void +aub_out_data(struct anv_aub_writer *writer, const void *data, size_t size) +{ + fwrite(data, 1, size, writer->file); +} + +static struct anv_aub_writer * +get_anv_aub_writer(struct anv_device *device) +{ + struct anv_aub_writer *writer = device->aub_writer; + int entry = 0x200003; + int i; + int gtt_size = 0x10000; + const char *filename; + + if (geteuid() != getuid()) + return NULL; + + if (writer) + return writer; + + writer = malloc(sizeof(*writer)); + if (writer == NULL) + return NULL; + + filename = "intel.aub"; + writer->gen = device->info.gen; + writer->file = fopen(filename, "w+"); + if (!writer->file) { + free(writer); + return NULL; + } + + /* Start allocating objects from just after the GTT. */ + writer->offset = gtt_size; + + /* Start with a (required) version packet. */ + aub_out(writer, CMD_AUB_HEADER | (13 - 2)); + aub_out(writer, + (4 << AUB_HEADER_MAJOR_SHIFT) | + (0 << AUB_HEADER_MINOR_SHIFT)); + for (i = 0; i < 8; i++) { + aub_out(writer, 0); /* app name */ + } + aub_out(writer, 0); /* timestamp */ + aub_out(writer, 0); /* timestamp */ + aub_out(writer, 0); /* comment len */ + + /* Set up the GTT. The max we can handle is 256M */ + aub_out(writer, CMD_AUB_TRACE_HEADER_BLOCK | ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT_ENTRY | + AUB_TRACE_TYPE_NOTYPE | AUB_TRACE_OP_DATA_WRITE); + aub_out(writer, 0); /* subtype */ + aub_out(writer, 0); /* offset */ + aub_out(writer, gtt_size); /* size */ + if (writer->gen >= 8) + aub_out(writer, 0); + for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { + aub_out(writer, entry); + } + + return device->aub_writer = writer; +} + +void +anv_aub_writer_destroy(struct anv_aub_writer *writer) +{ + fclose(writer->file); + free(writer); +} + + +/** + * Break up large objects into multiple writes. Otherwise a 128kb VBO + * would overflow the 16 bits of size field in the packet header and + * everything goes badly after that. + */ +static void +aub_write_trace_block(struct anv_aub_writer *writer, uint32_t type, + void *virtual, uint32_t size, uint32_t gtt_offset) +{ + uint32_t block_size; + uint32_t offset; + uint32_t subtype = 0; + static const char null_block[8 * 4096]; + + for (offset = 0; offset < size; offset += block_size) { + block_size = size - offset; + + if (block_size > 8 * 4096) + block_size = 8 * 4096; + + aub_out(writer, + CMD_AUB_TRACE_HEADER_BLOCK | + ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT | + type | AUB_TRACE_OP_DATA_WRITE); + aub_out(writer, subtype); + aub_out(writer, gtt_offset + offset); + aub_out(writer, align_u32(block_size, 4)); + if (writer->gen >= 8) + aub_out(writer, 0); + + if (virtual) + aub_out_data(writer, (char *) virtual + offset, block_size); + else + aub_out_data(writer, null_block, block_size); + + /* Pad to a multiple of 4 bytes. */ + aub_out_data(writer, null_block, -block_size & 3); + } +} + +/* + * Make a ringbuffer on fly and dump it + */ +static void +aub_build_dump_ringbuffer(struct anv_aub_writer *writer, + uint32_t batch_offset, uint32_t offset, + int ring_flag) +{ + uint32_t ringbuffer[4096]; + int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ + int ring_count = 0; + + if (ring_flag == I915_EXEC_BSD) + ring = AUB_TRACE_TYPE_RING_PRB1; + else if (ring_flag == I915_EXEC_BLT) + ring = AUB_TRACE_TYPE_RING_PRB2; + + /* Make a ring buffer to execute our batchbuffer. */ + memset(ringbuffer, 0, sizeof(ringbuffer)); + if (writer->gen >= 8) { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); + ringbuffer[ring_count++] = batch_offset; + ringbuffer[ring_count++] = 0; + } else { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; + ringbuffer[ring_count++] = batch_offset; + } + + /* Write out the ring. This appears to trigger execution of + * the ring in the simulator. + */ + aub_out(writer, + CMD_AUB_TRACE_HEADER_BLOCK | + ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); + aub_out(writer, 0); /* general/surface subtype */ + aub_out(writer, offset); + aub_out(writer, ring_count * 4); + if (writer->gen >= 8) + aub_out(writer, 0); + + /* FIXME: Need some flush operations here? */ + aub_out_data(writer, ringbuffer, ring_count * 4); +} + +struct aub_bo { + uint32_t offset; + void *map; + void *relocated; +}; + +static void +relocate_bo(struct anv_bo *bo, struct drm_i915_gem_relocation_entry *relocs, + size_t num_relocs, struct aub_bo *bos) +{ + struct aub_bo *aub_bo = &bos[bo->index]; + struct drm_i915_gem_relocation_entry *reloc; + uint32_t *dw; + + aub_bo->relocated = malloc(bo->size); + memcpy(aub_bo->relocated, aub_bo->map, bo->size); + for (size_t i = 0; i < num_relocs; i++) { + reloc = &relocs[i]; + assert(reloc->offset < bo->size); + dw = aub_bo->relocated + reloc->offset; + *dw = bos[reloc->target_handle].offset + reloc->delta; + } +} + +void +anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_aub_writer *writer; + struct anv_bo *bo; + uint32_t ring_flag = 0; + uint32_t offset; + struct aub_bo *aub_bos; + + writer = get_anv_aub_writer(device); + if (writer == NULL) + return; + + aub_bos = malloc(cmd_buffer->exec2_bo_count * sizeof(aub_bos[0])); + offset = writer->offset; + for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) { + bo = cmd_buffer->exec2_bos[i]; + if (bo->map) + aub_bos[i].map = bo->map; + else + aub_bos[i].map = anv_gem_mmap(device, bo->gem_handle, 0, bo->size); + aub_bos[i].relocated = aub_bos[i].map; + aub_bos[i].offset = offset; + offset = align_u32(offset + bo->size + 4095, 4096); + } + + struct anv_batch_bo *first_bbo; + for (struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; + bbo != NULL; bbo = bbo->prev_batch_bo) { + /* Keep stashing the current BO until we get to the beginning */ + first_bbo = bbo; + + /* Handle relocations for this batch BO */ + relocate_bo(&bbo->bo, &batch->relocs.relocs[bbo->first_reloc], + bbo->num_relocs, aub_bos); + } + assert(first_bbo->prev_batch_bo == NULL); + + for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo; + bbo != NULL; bbo = bbo->prev_batch_bo) { + + /* Handle relocations for this surface state BO */ + relocate_bo(&bbo->bo, + &cmd_buffer->surface_relocs.relocs[bbo->first_reloc], + bbo->num_relocs, aub_bos); + } + + for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) { + bo = cmd_buffer->exec2_bos[i]; + if (i == cmd_buffer->exec2_bo_count - 1) { + assert(bo == &first_bbo->bo); + aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH, + aub_bos[i].relocated, + first_bbo->length, aub_bos[i].offset); + } else { + aub_write_trace_block(writer, AUB_TRACE_TYPE_NOTYPE, + aub_bos[i].relocated, + bo->size, aub_bos[i].offset); + } + if (aub_bos[i].relocated != aub_bos[i].map) + free(aub_bos[i].relocated); + if (aub_bos[i].map != bo->map) + anv_gem_munmap(aub_bos[i].map, bo->size); + } + + /* Dump ring buffer */ + aub_build_dump_ringbuffer(writer, aub_bos[first_bbo->bo.index].offset, + offset, ring_flag); + + free(aub_bos); + + fflush(writer->file); +} |