/* * Copyright © 2016 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util/macros.h" #include "common/gen_decoder.h" #include "intel_aub.h" #include "gen_disasm.h" /* Below is the only command missing from intel_aub.h in libdrm * So, reuse intel_aub.h from libdrm and #define the * AUB_MI_BATCH_BUFFER_END as below */ #define AUB_MI_BATCH_BUFFER_END (0x0500 << 16) #define CSI "\e[" #define BLUE_HEADER CSI "0;44m" #define GREEN_HEADER CSI "1;42m" #define NORMAL CSI "0m" /* options */ static bool option_full_decode = true; static bool option_print_offsets = true; static enum { COLOR_AUTO, COLOR_ALWAYS, COLOR_NEVER } option_color; /* state */ uint16_t pci_id = 0; char *input_file = NULL, *xml_path = NULL; struct gen_spec *spec; struct gen_disasm *disasm; uint64_t gtt_size, gtt_end; void *gtt; uint64_t general_state_base; uint64_t surface_state_base; uint64_t dynamic_state_base; uint64_t instruction_base; uint64_t instruction_bound; FILE *outfile; static inline uint32_t field(uint32_t value, int start, int end) { uint32_t mask; mask = ~0U >> (31 - end + start); return (value >> start) & mask; } struct brw_instruction; static inline int valid_offset(uint32_t offset) { return offset < gtt_end; } static void decode_group(struct gen_group *strct, const uint32_t *p, int starting_dword) { uint64_t offset = option_print_offsets ? (void *) p - gtt : 0; gen_print_group(outfile, strct, offset, p, option_color == COLOR_ALWAYS); } static void dump_binding_table(struct gen_spec *spec, uint32_t offset) { uint32_t *pointers, i; uint64_t start; struct gen_group *surface_state; surface_state = gen_spec_find_struct(spec, "RENDER_SURFACE_STATE"); if (surface_state == NULL) { fprintf(outfile, "did not find RENDER_SURFACE_STATE info\n"); return; } start = surface_state_base + offset; pointers = gtt + start; for (i = 0; i < 16; i++) { if (pointers[i] == 0) continue; start = pointers[i] + surface_state_base; if (!valid_offset(start)) { fprintf(outfile, "pointer %u: %08x \n", i, pointers[i]); continue; } else { fprintf(outfile, "pointer %u: %08x\n", i, pointers[i]); } decode_group(surface_state, gtt + start, 0); } } static void handle_3dstate_index_buffer(struct gen_spec *spec, uint32_t *p) { void *start; uint32_t length, i, type, size; start = gtt + p[2]; type = (p[1] >> 8) & 3; size = 1 << type; length = p[4] / size; if (length > 10) length = 10; fprintf(outfile, "\t"); for (i = 0; i < length; i++) { switch (type) { case 0: fprintf(outfile, "%3d ", ((uint8_t *)start)[i]); break; case 1: fprintf(outfile, "%3d ", ((uint16_t *)start)[i]); break; case 2: fprintf(outfile, "%3d ", ((uint32_t *)start)[i]); break; } } if (length < p[4] / size) fprintf(outfile, "...\n"); else fprintf(outfile, "\n"); } static inline uint64_t get_address(struct gen_spec *spec, uint32_t *p) { /* Addresses are always guaranteed to be page-aligned and sometimes * hardware packets have extra stuff stuffed in the bottom 12 bits. */ uint64_t addr = p[0] & ~0xfffu; if (gen_spec_get_gen(spec) >= gen_make_gen(8,0)) { /* On Broadwell and above, we have 48-bit addresses which consume two * dwords. Some packets require that these get stored in a "canonical * form" which means that bit 47 is sign-extended through the upper * bits. In order to correctly handle those aub dumps, we need to mask * off the top 16 bits. */ addr |= ((uint64_t)p[1] & 0xffff) << 32; } return addr; } static inline uint64_t get_offset(uint32_t *p, uint32_t start, uint32_t end) { assert(start <= end); assert(end < 64); uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; uint64_t offset = p[0]; if (end >= 32) offset |= (uint64_t) p[1] << 32; return offset & mask; } static void handle_state_base_address(struct gen_spec *spec, uint32_t *p) { if (gen_spec_get_gen(spec) >= gen_make_gen(8,0)) { if (p[1] & 1) general_state_base = get_address(spec, &p[1]); if (p[4] & 1) surface_state_base = get_address(spec, &p[4]); if (p[6] & 1) dynamic_state_base = get_address(spec, &p[6]); if (p[10] & 1) instruction_base = get_address(spec, &p[10]); if (p[15] & 1) instruction_bound = p[15] & 0xfff; } else { if (p[2] & 1) surface_state_base = get_address(spec, &p[2]); if (p[3] & 1) dynamic_state_base = get_address(spec, &p[3]); if (p[5] & 1) instruction_base = get_address(spec, &p[5]); if (p[9] & 1) instruction_bound = get_address(spec, &p[9]); } } static void dump_samplers(struct gen_spec *spec, uint32_t offset) { uint32_t i; uint64_t start; struct gen_group *sampler_state; sampler_state = gen_spec_find_struct(spec, "SAMPLER_STATE"); start = dynamic_state_base + offset; for (i = 0; i < 4; i++) { fprintf(outfile, "sampler state %d\n", i); decode_group(sampler_state, gtt + start + i * 16, 0); } } static void handle_media_interface_descriptor_load(struct gen_spec *spec, uint32_t *p) { int i, length = p[2] / 32; struct gen_group *descriptor_structure; uint32_t *descriptors; uint64_t start; struct brw_instruction *insns; descriptor_structure = gen_spec_find_struct(spec, "INTERFACE_DESCRIPTOR_DATA"); if (descriptor_structure == NULL) { fprintf(outfile, "did not find INTERFACE_DESCRIPTOR_DATA info\n"); return; } start = dynamic_state_base + p[3]; descriptors = gtt + start; for (i = 0; i < length; i++, descriptors += 8) { fprintf(outfile, "descriptor %u: %08x\n", i, *descriptors); decode_group(descriptor_structure, descriptors, 0); start = instruction_base + descriptors[0]; if (!valid_offset(start)) { fprintf(outfile, "kernel: %08"PRIx64" \n", start); continue; } else { fprintf(outfile, "kernel: %08"PRIx64"\n", start); } insns = (struct brw_instruction *) (gtt + start); gen_disasm_disassemble(disasm, insns, 0, stdout); dump_samplers(spec, descriptors[3] & ~0x1f); dump_binding_table(spec, descriptors[4] & ~0x1f); } } /* Heuristic to determine whether a uint32_t is probably actually a float * (http://stackoverflow.com/a/2953466) */ static bool probably_float(uint32_t bits) { int exp = ((bits & 0x7f800000U) >> 23) - 127; uint32_t mant = bits & 0x007fffff; /* +- 0.0 */ if (exp == -127 && mant == 0) return true; /* +- 1 billionth to 1 billion */ if (-30 <= exp && exp <= 30) return true; /* some value with only a few binary digits */ if ((mant & 0x0000ffff) == 0) return true; return false; } static void handle_3dstate_vertex_buffers(struct gen_spec *spec, uint32_t *p) { uint32_t *end, *s, *dw, *dwend; uint64_t offset; int n, i, count, stride; end = (p[0] & 0xff) + p + 2; for (s = &p[1], n = 0; s < end; s += 4, n++) { if (gen_spec_get_gen(spec) >= gen_make_gen(8, 0)) { offset = *(uint64_t *) &s[1]; dwend = gtt + offset + s[3]; } else { offset = s[1]; dwend = gtt + s[2] + 1; } stride = field(s[0], 0, 11); count = 0; fprintf(outfile, "vertex buffer %d, size %d\n", n, s[3]); for (dw = gtt + offset, i = 0; dw < dwend && i < 256; dw++) { if (count == 0 && count % (8 * 4) == 0) fprintf(outfile, " "); if (probably_float(*dw)) fprintf(outfile, " %8.2f", *(float *) dw); else fprintf(outfile, " 0x%08x", *dw); i++; count += 4; if (count == stride) { fprintf(outfile, "\n"); count = 0; } else if (count % (8 * 4) == 0) { fprintf(outfile, "\n"); } else { fprintf(outfile, " "); } } if (count > 0 && count % (8 * 4) != 0) fprintf(outfile, "\n"); } } static void handle_3dstate_vs(struct gen_spec *spec, uint32_t *p) { uint64_t start; struct brw_instruction *insns; int vs_enable; if (gen_spec_get_gen(spec) >= gen_make_gen(8, 0)) { start = get_offset(&p[1], 6, 63); vs_enable = p[7] & 1; } else { start = get_offset(&p[1], 6, 31); vs_enable = p[5] & 1; } if (vs_enable) { fprintf(outfile, "instruction_base %08"PRIx64", start %08"PRIx64"\n", instruction_base, start); insns = (struct brw_instruction *) (gtt + instruction_base + start); gen_disasm_disassemble(disasm, insns, 0, stdout); } } static void handle_3dstate_hs(struct gen_spec *spec, uint32_t *p) { uint64_t start; struct brw_instruction *insns; int hs_enable; if (gen_spec_get_gen(spec) >= gen_make_gen(8, 0)) { start = get_offset(&p[3], 6, 63); } else { start = get_offset(&p[3], 6, 31); } hs_enable = p[2] & 0x80000000; if (hs_enable) { fprintf(outfile, "instruction_base %08"PRIx64", start %08"PRIx64"\n", instruction_base, start); insns = (struct brw_instruction *) (gtt + instruction_base + start); gen_disasm_disassemble(disasm, insns, 0, stdout); } } static void handle_3dstate_constant(struct gen_spec *spec, uint32_t *p) { int i, j, length; uint32_t *dw; float *f; for (i = 0; i < 4; i++) { length = (p[1 + i / 2] >> (i & 1) * 16) & 0xffff; f = (float *) (gtt + p[3 + i * 2] + dynamic_state_base); dw = (uint32_t *) f; for (j = 0; j < length * 8; j++) { if (probably_float(dw[j])) fprintf(outfile, " %04.3f", f[j]); else fprintf(outfile, " 0x%08x", dw[j]); if ((j & 7) == 7) fprintf(outfile, "\n"); } } } static void handle_3dstate_ps(struct gen_spec *spec, uint32_t *p) { uint32_t mask = ~((1 << 6) - 1); uint64_t start; struct brw_instruction *insns; static const char unused[] = "unused"; static const char *pixel_type[3] = {"8 pixel", "16 pixel", "32 pixel"}; const char *k0, *k1, *k2; uint32_t k_mask, k1_offset, k2_offset; if (gen_spec_get_gen(spec) >= gen_make_gen(8, 0)) { k_mask = p[6] & 7; k1_offset = 8; k2_offset = 10; } else { k_mask = p[4] & 7; k1_offset = 6; k2_offset = 7; } #define DISPATCH_8 1 #define DISPATCH_16 2 #define DISPATCH_32 4 switch (k_mask) { case DISPATCH_8: k0 = pixel_type[0]; k1 = unused; k2 = unused; break; case DISPATCH_16: k0 = pixel_type[1]; k1 = unused; k2 = unused; break; case DISPATCH_8 | DISPATCH_16: k0 = pixel_type[0]; k1 = unused; k2 = pixel_type[1]; break; case DISPATCH_32: k0 = pixel_type[2]; k1 = unused; k2 = unused; break; case DISPATCH_16 | DISPATCH_32: k0 = unused; k1 = pixel_type[2]; k2 = pixel_type[1]; break; case DISPATCH_8 | DISPATCH_16 | DISPATCH_32: k0 = pixel_type[0]; k1 = pixel_type[2]; k2 = pixel_type[1]; break; default: k0 = unused; k1 = unused; k2 = unused; break; } start = instruction_base + (p[1] & mask); fprintf(outfile, " Kernel[0] %s\n", k0); if (k0 != unused) { insns = (struct brw_instruction *) (gtt + start); gen_disasm_disassemble(disasm, insns, 0, stdout); } start = instruction_base + (p[k1_offset] & mask); fprintf(outfile, " Kernel[1] %s\n", k1); if (k1 != unused) { insns = (struct brw_instruction *) (gtt + start); gen_disasm_disassemble(disasm, insns, 0, stdout); } start = instruction_base + (p[k2_offset] & mask); fprintf(outfile, " Kernel[2] %s\n", k2); if (k2 != unused) { insns = (struct brw_instruction *) (gtt + start); gen_disasm_disassemble(disasm, insns, 0, stdout); } } static void handle_3dstate_binding_table_pointers(struct gen_spec *spec, uint32_t *p) { dump_binding_table(spec, p[1]); } static void handle_3dstate_sampler_state_pointers(struct gen_spec *spec, uint32_t *p) { dump_samplers(spec, p[1]); } static void handle_3dstate_viewport_state_pointers_cc(struct gen_spec *spec, uint32_t *p) { uint64_t start; struct gen_group *cc_viewport; cc_viewport = gen_spec_find_struct(spec, "CC_VIEWPORT"); start = dynamic_state_base + (p[1] & ~0x1fu); for (uint32_t i = 0; i < 4; i++) { fprintf(outfile, "viewport %d\n", i); decode_group(cc_viewport, gtt + start + i * 8, 0); } } static void handle_3dstate_viewport_state_pointers_sf_clip(struct gen_spec *spec, uint32_t *p) { uint64_t start; struct gen_group *sf_clip_viewport; sf_clip_viewport = gen_spec_find_struct(spec, "SF_CLIP_VIEWPORT"); start = dynamic_state_base + (p[1] & ~0x3fu); for (uint32_t i = 0; i < 4; i++) { fprintf(outfile, "viewport %d\n", i); decode_group(sf_clip_viewport, gtt + start + i * 64, 0); } } static void handle_3dstate_blend_state_pointers(struct gen_spec *spec, uint32_t *p) { uint64_t start; struct gen_group *blend_state; blend_state = gen_spec_find_struct(spec, "BLEND_STATE"); start = dynamic_state_base + (p[1] & ~0x3fu); decode_group(blend_state, gtt + start, 0); } static void handle_3dstate_cc_state_pointers(struct gen_spec *spec, uint32_t *p) { uint64_t start; struct gen_group *cc_state; cc_state = gen_spec_find_struct(spec, "COLOR_CALC_STATE"); start = dynamic_state_base + (p[1] & ~0x3fu); decode_group(cc_state, gtt + start, 0); } static void handle_3dstate_scissor_state_pointers(struct gen_spec *spec, uint32_t *p) { uint64_t start; struct gen_group *scissor_rect; scissor_rect = gen_spec_find_struct(spec, "SCISSOR_RECT"); start = dynamic_state_base + (p[1] & ~0x1fu); decode_group(scissor_rect, gtt + start, 0); } static void handle_load_register_imm(struct gen_spec *spec, uint32_t *p) { struct gen_group *reg = gen_spec_find_register(spec, p[1]); if (reg != NULL) { fprintf(outfile, "register %s (0x%x): 0x%x\n", reg->name, reg->register_offset, p[2]); decode_group(reg, &p[2], 0); } } #define ARRAY_LENGTH(a) (sizeof (a) / sizeof (a)[0]) #define STATE_BASE_ADDRESS 0x61010000 #define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x70020000 #define _3DSTATE_INDEX_BUFFER 0x780a0000 #define _3DSTATE_VERTEX_BUFFERS 0x78080000 #define _3DSTATE_VS 0x78100000 #define _3DSTATE_GS 0x78110000 #define _3DSTATE_HS 0x781b0000 #define _3DSTATE_DS 0x781d0000 #define _3DSTATE_CONSTANT_VS 0x78150000 #define _3DSTATE_CONSTANT_GS 0x78160000 #define _3DSTATE_CONSTANT_PS 0x78170000 #define _3DSTATE_CONSTANT_HS 0x78190000 #define _3DSTATE_CONSTANT_DS 0x781A0000 #define _3DSTATE_PS 0x78200000 #define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x78260000 #define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x78270000 #define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x78280000 #define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x78290000 #define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782a0000 #define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782b0000 #define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782e0000 #define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782f0000 #define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x78230000 #define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP 0x78210000 #define _3DSTATE_BLEND_STATE_POINTERS 0x78240000 #define _3DSTATE_CC_STATE_POINTERS 0x780e0000 #define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f0000 #define _MI_LOAD_REGISTER_IMM 0x11000000 struct custom_handler { uint32_t opcode; void (*handle)(struct gen_spec *spec, uint32_t *p); } custom_handlers[] = { { STATE_BASE_ADDRESS, handle_state_base_address }, { MEDIA_INTERFACE_DESCRIPTOR_LOAD, handle_media_interface_descriptor_load }, { _3DSTATE_VERTEX_BUFFERS, handle_3dstate_vertex_buffers }, { _3DSTATE_INDEX_BUFFER, handle_3dstate_index_buffer }, { _3DSTATE_VS, handle_3dstate_vs }, { _3DSTATE_GS, handle_3dstate_vs }, { _3DSTATE_DS, handle_3dstate_vs }, { _3DSTATE_HS, handle_3dstate_hs }, { _3DSTATE_CONSTANT_VS, handle_3dstate_constant }, { _3DSTATE_CONSTANT_GS, handle_3dstate_constant }, { _3DSTATE_CONSTANT_PS, handle_3dstate_constant }, { _3DSTATE_CONSTANT_HS, handle_3dstate_constant }, { _3DSTATE_CONSTANT_DS, handle_3dstate_constant }, { _3DSTATE_PS, handle_3dstate_ps }, { _3DSTATE_BINDING_TABLE_POINTERS_VS, handle_3dstate_binding_table_pointers }, { _3DSTATE_BINDING_TABLE_POINTERS_HS, handle_3dstate_binding_table_pointers }, { _3DSTATE_BINDING_TABLE_POINTERS_DS, handle_3dstate_binding_table_pointers }, { _3DSTATE_BINDING_TABLE_POINTERS_GS, handle_3dstate_binding_table_pointers }, { _3DSTATE_BINDING_TABLE_POINTERS_PS, handle_3dstate_binding_table_pointers }, { _3DSTATE_SAMPLER_STATE_POINTERS_VS, handle_3dstate_sampler_state_pointers }, { _3DSTATE_SAMPLER_STATE_POINTERS_GS, handle_3dstate_sampler_state_pointers }, { _3DSTATE_SAMPLER_STATE_POINTERS_PS, handle_3dstate_sampler_state_pointers }, { _3DSTATE_VIEWPORT_STATE_POINTERS_CC, handle_3dstate_viewport_state_pointers_cc }, { _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, handle_3dstate_viewport_state_pointers_sf_clip }, { _3DSTATE_BLEND_STATE_POINTERS, handle_3dstate_blend_state_pointers }, { _3DSTATE_CC_STATE_POINTERS, handle_3dstate_cc_state_pointers }, { _3DSTATE_SCISSOR_STATE_POINTERS, handle_3dstate_scissor_state_pointers }, { _MI_LOAD_REGISTER_IMM, handle_load_register_imm } }; static void parse_commands(struct gen_spec *spec, uint32_t *cmds, int size, int engine) { uint32_t *p, *end = cmds + size / 4; int length, i; struct gen_group *inst; for (p = cmds; p < end; p += length) { inst = gen_spec_find_instruction(spec, p); length = gen_group_get_length(inst, p); assert(inst == NULL || length > 0); length = MAX2(1, length); if (inst == NULL) { fprintf(outfile, "unknown instruction %08x\n", p[0]); continue; } const char *color, *reset_color = NORMAL; uint64_t offset; if (option_full_decode) { if ((p[0] & 0xffff0000) == AUB_MI_BATCH_BUFFER_START || (p[0] & 0xffff0000) == AUB_MI_BATCH_BUFFER_END) color = GREEN_HEADER; else color = BLUE_HEADER; } else color = NORMAL; if (option_color == COLOR_NEVER) { color = ""; reset_color = ""; } if (option_print_offsets) offset = (void *) p - gtt; else offset = 0; fprintf(outfile, "%s0x%08"PRIx64": 0x%08x: %-80s%s\n", color, offset, p[0], gen_group_get_name(inst), reset_color); if (option_full_decode) { decode_group(inst, p, 0); for (i = 0; i < ARRAY_LENGTH(custom_handlers); i++) { if (gen_group_get_opcode(inst) == custom_handlers[i].opcode) { custom_handlers[i].handle(spec, p); break; } } } if ((p[0] & 0xffff0000) == AUB_MI_BATCH_BUFFER_START) { uint64_t start = get_address(spec, &p[1]); if (p[0] & (1 << 22)) { /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts * like a subroutine call. Commands that come afterwards get * processed once the 2nd level batch buffer returns with * MI_BATCH_BUFFER_END. */ parse_commands(spec, gtt + start, gtt_end - start, engine); } else { /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts * like a goto. Nothing after it will ever get processed. In * order to prevent the recursion from growing, we just reset the * loop and continue; */ p = gtt + start; /* We don't know where secondaries end so use the GTT end */ end = gtt + gtt_end; length = 0; continue; } } else if ((p[0] & 0xffff0000) == AUB_MI_BATCH_BUFFER_END) { break; } } } #define GEN_ENGINE_RENDER 1 #define GEN_ENGINE_BLITTER 2 static void handle_trace_block(uint32_t *p) { int operation = p[1] & AUB_TRACE_OPERATION_MASK; int type = p[1] & AUB_TRACE_TYPE_MASK; int address_space = p[1] & AUB_TRACE_ADDRESS_SPACE_MASK; uint64_t offset = p[3]; uint32_t size = p[4]; int header_length = p[0] & 0xffff; uint32_t *data = p + header_length + 2; int engine = GEN_ENGINE_RENDER; if (gen_spec_get_gen(spec) >= gen_make_gen(8,0)) offset += (uint64_t) p[5] << 32; switch (operation) { case AUB_TRACE_OP_DATA_WRITE: if (address_space != AUB_TRACE_MEMTYPE_GTT) break; if (gtt_size < offset + size) { fprintf(stderr, "overflow gtt space: %s\n", strerror(errno)); exit(EXIT_FAILURE); } memcpy((char *) gtt + offset, data, size); if (gtt_end < offset + size) gtt_end = offset + size; break; case AUB_TRACE_OP_COMMAND_WRITE: switch (type) { case AUB_TRACE_TYPE_RING_PRB0: engine = GEN_ENGINE_RENDER; break; case AUB_TRACE_TYPE_RING_PRB2: engine = GEN_ENGINE_BLITTER; break; default: fprintf(outfile, "command write to unknown ring %d\n", type); break; } parse_commands(spec, data, size, engine); gtt_end = 0; break; } } static void handle_trace_header(uint32_t *p) { /* The intel_aubdump tool from IGT is kind enough to put a PCI-ID= tag in * the AUB header comment. If the user hasn't specified a hardware * generation, try to use the one from the AUB file. */ uint32_t *end = p + (p[0] & 0xffff) + 2; int aub_pci_id = 0; if (end > &p[12] && p[12] > 0) sscanf((char *)&p[13], "PCI-ID=%i", &aub_pci_id); if (pci_id == 0) pci_id = aub_pci_id; struct gen_device_info devinfo; if (!gen_get_device_info(pci_id, &devinfo)) { fprintf(stderr, "can't find device information: pci_id=0x%x\n", pci_id); exit(EXIT_FAILURE); } if (xml_path == NULL) spec = gen_spec_load(&devinfo); else spec = gen_spec_load_from_path(&devinfo, xml_path); disasm = gen_disasm_create(pci_id); if (spec == NULL || disasm == NULL) exit(EXIT_FAILURE); fprintf(outfile, "%sAubinator: Intel AUB file decoder.%-80s%s\n", GREEN_HEADER, "", NORMAL); if (input_file) fprintf(outfile, "File name: %s\n", input_file); if (aub_pci_id) fprintf(outfile, "PCI ID: 0x%x\n", aub_pci_id); char app_name[33]; strncpy(app_name, (char *)&p[2], 32); app_name[32] = 0; fprintf(outfile, "Application name: %s\n", app_name); fprintf(outfile, "Decoding as: %s\n", gen_get_device_name(pci_id)); /* Throw in a new line before the first batch */ fprintf(outfile, "\n"); } struct aub_file { FILE *stream; uint32_t *map, *end, *cursor; uint32_t *mem_end; }; static struct aub_file * aub_file_open(const char *filename) { struct aub_file *file; struct stat sb; int fd; file = calloc(1, sizeof *file); fd = open(filename, O_RDONLY); if (fd == -1) { fprintf(stderr, "open %s failed: %s\n", filename, strerror(errno)); exit(EXIT_FAILURE); } if (fstat(fd, &sb) == -1) { fprintf(stderr, "stat failed: %s\n", strerror(errno)); exit(EXIT_FAILURE); } file->map = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0); if (file->map == MAP_FAILED) { fprintf(stderr, "mmap failed: %s\n", strerror(errno)); exit(EXIT_FAILURE); } file->cursor = file->map; file->end = file->map + sb.st_size / 4; return file; } static struct aub_file * aub_file_stdin(void) { struct aub_file *file; file = calloc(1, sizeof *file); file->stream = stdin; return file; } #define TYPE(dw) (((dw) >> 29) & 7) #define OPCODE(dw) (((dw) >> 23) & 0x3f) #define SUBOPCODE(dw) (((dw) >> 16) & 0x7f) #define MAKE_HEADER(type, opcode, subopcode) \ (((type) << 29) | ((opcode) << 23) | ((subopcode) << 16)) #define TYPE_AUB 0x7 /* Classic AUB opcodes */ #define OPCODE_AUB 0x01 #define SUBOPCODE_HEADER 0x05 #define SUBOPCODE_BLOCK 0x41 #define SUBOPCODE_BMP 0x1e /* Newer version AUB opcode */ #define OPCODE_NEW_AUB 0x2e #define SUBOPCODE_VERSION 0x00 #define SUBOPCODE_REG_WRITE 0x03 #define SUBOPCODE_MEM_POLL 0x05 #define SUBOPCODE_MEM_WRITE 0x06 #define MAKE_GEN(major, minor) ( ((major) << 8) | (minor) ) struct { const char *name; uint32_t gen; } device_map[] = { { "bwr", MAKE_GEN(4, 0) }, { "cln", MAKE_GEN(4, 0) }, { "blc", MAKE_GEN(4, 0) }, { "ctg", MAKE_GEN(4, 0) }, { "el", MAKE_GEN(4, 0) }, { "il", MAKE_GEN(4, 0) }, { "sbr", MAKE_GEN(6, 0) }, { "ivb", MAKE_GEN(7, 0) }, { "lrb2", MAKE_GEN(0, 0) }, { "hsw", MAKE_GEN(7, 5) }, { "vlv", MAKE_GEN(7, 0) }, { "bdw", MAKE_GEN(8, 0) }, { "skl", MAKE_GEN(9, 0) }, { "chv", MAKE_GEN(8, 0) }, { "bxt", MAKE_GEN(9, 0) }, { "cnl", MAKE_GEN(10, 0) }, }; enum { AUB_ITEM_DECODE_OK, AUB_ITEM_DECODE_FAILED, AUB_ITEM_DECODE_NEED_MORE_DATA, }; static int aub_file_decode_batch(struct aub_file *file) { uint32_t *p, h, device, data_type, *new_cursor; int header_length, bias; if (file->end - file->cursor < 1) return AUB_ITEM_DECODE_NEED_MORE_DATA; p = file->cursor; h = *p; header_length = h & 0xffff; switch (OPCODE(h)) { case OPCODE_AUB: bias = 2; break; case OPCODE_NEW_AUB: bias = 1; break; default: fprintf(outfile, "unknown opcode %d at %td/%td\n", OPCODE(h), file->cursor - file->map, file->end - file->map); return AUB_ITEM_DECODE_FAILED; } new_cursor = p + header_length + bias; if ((h & 0xffff0000) == MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_BLOCK)) { if (file->end - file->cursor < 4) return AUB_ITEM_DECODE_NEED_MORE_DATA; new_cursor += p[4] / 4; } if (new_cursor > file->end) return AUB_ITEM_DECODE_NEED_MORE_DATA; switch (h & 0xffff0000) { case MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_HEADER): handle_trace_header(p); break; case MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_BLOCK): handle_trace_block(p); break; case MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_BMP): break; case MAKE_HEADER(TYPE_AUB, OPCODE_NEW_AUB, SUBOPCODE_VERSION): fprintf(outfile, "version block: dw1 %08x\n", p[1]); device = (p[1] >> 8) & 0xff; fprintf(outfile, " device %s\n", device_map[device].name); break; case MAKE_HEADER(TYPE_AUB, OPCODE_NEW_AUB, SUBOPCODE_REG_WRITE): fprintf(outfile, "register write block: (dwords %d)\n", h & 0xffff); fprintf(outfile, " reg 0x%x, data 0x%x\n", p[1], p[5]); break; case MAKE_HEADER(TYPE_AUB, OPCODE_NEW_AUB, SUBOPCODE_MEM_WRITE): fprintf(outfile, "memory write block (dwords %d):\n", h & 0xffff); fprintf(outfile, " address 0x%"PRIx64"\n", *(uint64_t *) &p[1]); data_type = (p[3] >> 20) & 0xff; if (data_type != 0) fprintf(outfile, " data type 0x%x\n", data_type); fprintf(outfile, " address space 0x%x\n", (p[3] >> 28) & 0xf); break; case MAKE_HEADER(TYPE_AUB, OPCODE_NEW_AUB, SUBOPCODE_MEM_POLL): fprintf(outfile, "memory poll block (dwords %d):\n", h & 0xffff); break; default: fprintf(outfile, "unknown block type=0x%x, opcode=0x%x, " "subopcode=0x%x (%08x)\n", TYPE(h), OPCODE(h), SUBOPCODE(h), h); break; } file->cursor = new_cursor; return AUB_ITEM_DECODE_OK; } static int aub_file_more_stuff(struct aub_file *file) { return file->cursor < file->end || (file->stream && !feof(file->stream)); } #define AUB_READ_BUFFER_SIZE (4096) #define MAX(a, b) ((a) < (b) ? (b) : (a)) static void aub_file_data_grow(struct aub_file *file) { size_t old_size = (file->mem_end - file->map) * 4; size_t new_size = MAX(old_size * 2, AUB_READ_BUFFER_SIZE); uint32_t *new_start = realloc(file->map, new_size); file->cursor = new_start + (file->cursor - file->map); file->end = new_start + (file->end - file->map); file->map = new_start; file->mem_end = file->map + (new_size / 4); } static bool aub_file_data_load(struct aub_file *file) { size_t r; if (file->stream == NULL) return false; /* First remove any consumed data */ if (file->cursor > file->map) { memmove(file->map, file->cursor, (file->end - file->cursor) * 4); file->end -= file->cursor - file->map; file->cursor = file->map; } /* Then load some new data in */ if ((file->mem_end - file->end) < (AUB_READ_BUFFER_SIZE / 4)) aub_file_data_grow(file); r = fread(file->end, 1, (file->mem_end - file->end) * 4, file->stream); file->end += r / 4; return r != 0; } static void setup_pager(void) { int fds[2]; pid_t pid; if (!isatty(1)) return; if (pipe(fds) == -1) return; pid = fork(); if (pid == -1) return; if (pid == 0) { close(fds[1]); dup2(fds[0], 0); execlp("less", "less", "-FRSi", NULL); } close(fds[0]); dup2(fds[1], 1); close(fds[1]); } static void print_help(const char *progname, FILE *file) { fprintf(file, "Usage: %s [OPTION]... [FILE]\n" "Decode aub file contents from either FILE or the standard input.\n\n" "A valid --gen option must be provided.\n\n" " --help display this help and exit\n" " --gen=platform decode for given platform (ivb, byt, hsw, bdw, chv, skl, kbl, bxt or cnl)\n" " --headers decode only command headers\n" " --color[=WHEN] colorize the output; WHEN can be 'auto' (default\n" " if omitted), 'always', or 'never'\n" " --no-pager don't launch pager\n" " --no-offsets don't print instruction offsets\n" " --xml=DIR load hardware xml description from directory DIR\n", progname); } int main(int argc, char *argv[]) { struct aub_file *file; int c, i; bool help = false, pager = true; const struct { const char *name; int pci_id; } gens[] = { { "ilk", 0x0046 }, /* Intel(R) Ironlake Mobile */ { "snb", 0x0126 }, /* Intel(R) Sandybridge Mobile GT2 */ { "ivb", 0x0166 }, /* Intel(R) Ivybridge Mobile GT2 */ { "hsw", 0x0416 }, /* Intel(R) Haswell Mobile GT2 */ { "byt", 0x0155 }, /* Intel(R) Bay Trail */ { "bdw", 0x1616 }, /* Intel(R) HD Graphics 5500 (Broadwell GT2) */ { "chv", 0x22B3 }, /* Intel(R) HD Graphics (Cherryview) */ { "skl", 0x1912 }, /* Intel(R) HD Graphics 530 (Skylake GT2) */ { "kbl", 0x591D }, /* Intel(R) Kabylake GT2 */ { "bxt", 0x0A84 }, /* Intel(R) HD Graphics (Broxton) */ { "cnl", 0x5A52 }, /* Intel(R) HD Graphics (Cannonlake) */ }; const struct option aubinator_opts[] = { { "help", no_argument, (int *) &help, true }, { "no-pager", no_argument, (int *) &pager, false }, { "no-offsets", no_argument, (int *) &option_print_offsets, false }, { "gen", required_argument, NULL, 'g' }, { "headers", no_argument, (int *) &option_full_decode, false }, { "color", required_argument, NULL, 'c' }, { "xml", required_argument, NULL, 'x' }, { NULL, 0, NULL, 0 } }; outfile = stdout; i = 0; while ((c = getopt_long(argc, argv, "", aubinator_opts, &i)) != -1) { switch (c) { case 'g': for (i = 0; i < ARRAY_SIZE(gens); i++) { if (!strcmp(optarg, gens[i].name)) { pci_id = gens[i].pci_id; break; } } if (i == ARRAY_SIZE(gens)) { fprintf(stderr, "can't parse gen: '%s', expected ivb, byt, hsw, " "bdw, chv, skl, kbl or bxt\n", optarg); exit(EXIT_FAILURE); } break; case 'c': if (optarg == NULL || strcmp(optarg, "always") == 0) option_color = COLOR_ALWAYS; else if (strcmp(optarg, "never") == 0) option_color = COLOR_NEVER; else if (strcmp(optarg, "auto") == 0) option_color = COLOR_AUTO; else { fprintf(stderr, "invalid value for --color: %s", optarg); exit(EXIT_FAILURE); } break; case 'x': xml_path = strdup(optarg); break; default: break; } } if (help || argc == 1) { print_help(argv[0], stderr); exit(0); } if (optind < argc) input_file = argv[optind]; /* Do this before we redirect stdout to pager. */ if (option_color == COLOR_AUTO) option_color = isatty(1) ? COLOR_ALWAYS : COLOR_NEVER; if (isatty(1) && pager) setup_pager(); if (input_file == NULL) file = aub_file_stdin(); else file = aub_file_open(input_file); /* mmap a terabyte for our gtt space. */ gtt_size = 1ull << 40; gtt = mmap(NULL, gtt_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); if (gtt == MAP_FAILED) { fprintf(stderr, "failed to alloc gtt space: %s\n", strerror(errno)); exit(EXIT_FAILURE); } while (aub_file_more_stuff(file)) { switch (aub_file_decode_batch(file)) { case AUB_ITEM_DECODE_OK: break; case AUB_ITEM_DECODE_NEED_MORE_DATA: if (!file->stream) { file->cursor = file->end; break; } if (aub_file_more_stuff(file) && !aub_file_data_load(file)) { fprintf(stderr, "failed to load data from stdin\n"); exit(EXIT_FAILURE); } break; default: fprintf(stderr, "failed to parse aubdump data\n"); exit(EXIT_FAILURE); } } fflush(stdout); /* close the stdout which is opened to write the output */ close(1); free(xml_path); wait(NULL); return EXIT_SUCCESS; }