diff options
-rw-r--r-- | src/gallium/drivers/r600/Makefile.am | 5 | ||||
-rw-r--r-- | src/gallium/drivers/r600/Makefile.sources | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/eg_debug.c | 359 | ||||
-rw-r--r-- | src/gallium/drivers/r600/egd_tables.py | 310 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 29 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreend.h | 15 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 39 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 3 |
11 files changed, 785 insertions, 7 deletions
diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am index 21762d838d0..44fd51d216a 100644 --- a/src/gallium/drivers/r600/Makefile.am +++ b/src/gallium/drivers/r600/Makefile.am @@ -1,6 +1,11 @@ include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc +egd_tables.h: $(srcdir)/egd_tables.py $(srcdir)/evergreend.h + $(AM_V_at)$(MKDIR_P) $(@D) + $(AM_V_GEN) $(PYTHON2) $(srcdir)/egd_tables.py $(srcdir)/evergreend.h > $@ + +BUILT_SOURCES = $(R600_GENERATED_FILES) AM_CFLAGS = \ $(GALLIUM_DRIVER_CFLAGS) \ $(RADEON_CFLAGS) \ diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources index 8bf8083bbab..2f206520fa7 100644 --- a/src/gallium/drivers/r600/Makefile.sources +++ b/src/gallium/drivers/r600/Makefile.sources @@ -2,6 +2,7 @@ C_SOURCES = \ compute_memory_pool.c \ compute_memory_pool.h \ eg_asm.c \ + eg_debug.c \ eg_sq.h \ evergreen_compute.c \ evergreen_compute.h \ @@ -64,3 +65,6 @@ CXX_SOURCES = \ sb/sb_shader.h \ sb/sb_ssa_builder.cpp \ sb/sb_valtable.cpp + +R600_GENERATED_FILES = \ + egd_tables.h
\ No newline at end of file diff --git a/src/gallium/drivers/r600/eg_debug.c b/src/gallium/drivers/r600/eg_debug.c new file mode 100644 index 00000000000..32a4f232d90 --- /dev/null +++ b/src/gallium/drivers/r600/eg_debug.c @@ -0,0 +1,359 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Marek Olšák <[email protected]> + */ +#include "r600_pipe.h" +#include "evergreend.h" + +#include "egd_tables.h" + +#define AC_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000) +#define AC_GET_TRACE_POINT_ID(x) ((x) & 0xffff) + +/* Parsed IBs are difficult to read without colors. Use "less -R file" to + * read them, or use "aha -b -f file" to convert them to html. + */ +#define COLOR_RESET "\033[0m" +#define COLOR_RED "\033[31m" +#define COLOR_GREEN "\033[1;32m" +#define COLOR_YELLOW "\033[1;33m" +#define COLOR_CYAN "\033[1;36m" + +#define INDENT_PKT 8 + +typedef void *(*ac_debug_addr_callback)(void *data, uint64_t addr); +static void print_spaces(FILE *f, unsigned num) +{ + fprintf(f, "%*s", num, ""); +} + +static void print_value(FILE *file, uint32_t value, int bits) +{ + /* Guess if it's int or float */ + if (value <= (1 << 15)) { + if (value <= 9) + fprintf(file, "%u\n", value); + else + fprintf(file, "%u (0x%0*x)\n", value, bits / 4, value); + } else { + float f = uif(value); + + if (fabs(f) < 100000 && f*10 == floor(f*10)) + fprintf(file, "%.1ff (0x%0*x)\n", f, bits / 4, value); + else + /* Don't print more leading zeros than there are bits. */ + fprintf(file, "0x%0*x\n", bits / 4, value); + } +} + +static void print_named_value(FILE *file, const char *name, uint32_t value, + int bits) +{ + print_spaces(file, INDENT_PKT); + fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", name); + print_value(file, value, bits); +} + +static void eg_dump_reg(FILE *file, unsigned offset, uint32_t value, + uint32_t field_mask) +{ + int r, f; + + for (r = 0; r < ARRAY_SIZE(egd_reg_table); r++) { + const struct eg_reg *reg = &egd_reg_table[r]; + const char *reg_name = egd_strings + reg->name_offset; + + if (reg->offset == offset) { + bool first_field = true; + + print_spaces(file, INDENT_PKT); + fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", + reg_name); + + if (!reg->num_fields) { + print_value(file, value, 32); + return; + } + + for (f = 0; f < reg->num_fields; f++) { + const struct eg_field *field = egd_fields_table + reg->fields_offset + f; + const int *values_offsets = egd_strings_offsets + field->values_offset; + uint32_t val = (value & field->mask) >> + (ffs(field->mask) - 1); + + if (!(field->mask & field_mask)) + continue; + + /* Indent the field. */ + if (!first_field) + print_spaces(file, + INDENT_PKT + strlen(reg_name) + 4); + + /* Print the field. */ + fprintf(file, "%s = ", egd_strings + field->name_offset); + + if (val < field->num_values && values_offsets[val] >= 0) + fprintf(file, "%s\n", egd_strings + values_offsets[val]); + else + print_value(file, val, + util_bitcount(field->mask)); + + first_field = false; + } + return; + } + } + + print_spaces(file, INDENT_PKT); + fprintf(file, COLOR_YELLOW "0x%05x" COLOR_RESET " <- 0x%08x\n", offset, value); +} + + +static void ac_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count, + unsigned reg_offset) +{ + unsigned reg = (ib[1] << 2) + reg_offset; + int i; + + for (i = 0; i < count; i++) + eg_dump_reg(f, reg + i*4, ib[2+i], ~0); +} + +static uint32_t *ac_parse_packet3(FILE *f, uint32_t *ib, int *num_dw, + int trace_id, enum chip_class chip_class, + ac_debug_addr_callback addr_callback, + void *addr_callback_data) +{ + unsigned count = PKT_COUNT_G(ib[0]); + unsigned op = PKT3_IT_OPCODE_G(ib[0]); + const char *predicate = PKT3_PREDICATE(ib[0]) ? "(predicate)" : ""; + int i; + + /* Print the name first. */ + for (i = 0; i < ARRAY_SIZE(packet3_table); i++) + if (packet3_table[i].op == op) + break; + + if (i < ARRAY_SIZE(packet3_table)) { + const char *name = egd_strings + packet3_table[i].name_offset; + + if (op == PKT3_SET_CONTEXT_REG || + op == PKT3_SET_CONFIG_REG || + op == PKT3_SET_UCONFIG_REG || + op == PKT3_SET_SH_REG) + fprintf(f, COLOR_CYAN "%s%s" COLOR_CYAN ":\n", + name, predicate); + else + fprintf(f, COLOR_GREEN "%s%s" COLOR_RESET ":\n", + name, predicate); + } else + fprintf(f, COLOR_RED "PKT3_UNKNOWN 0x%x%s" COLOR_RESET ":\n", + op, predicate); + + /* Print the contents. */ + switch (op) { + case PKT3_SET_CONTEXT_REG: + ac_parse_set_reg_packet(f, ib, count, EVERGREEN_CONTEXT_REG_OFFSET); + break; + case PKT3_SET_CONFIG_REG: + ac_parse_set_reg_packet(f, ib, count, EVERGREEN_CONFIG_REG_OFFSET); + break; + case PKT3_SURFACE_SYNC: + eg_dump_reg(f, R_0085F0_CP_COHER_CNTL, ib[1], ~0); + eg_dump_reg(f, R_0085F4_CP_COHER_SIZE, ib[2], ~0); + eg_dump_reg(f, R_0085F8_CP_COHER_BASE, ib[3], ~0); + print_named_value(f, "POLL_INTERVAL", ib[4], 16); + break; + case PKT3_EVENT_WRITE: + /* TODO dump VGT_EVENT_INITIATOR */ +#if 0 + eg_dump_reg(f, R_028A90_VGT_EVENT_INITIATOR, ib[1], + S_028A90_EVENT_TYPE(~0)); +#endif + print_named_value(f, "EVENT_INDEX", (ib[1] >> 8) & 0xf, 4); + print_named_value(f, "INV_L2", (ib[1] >> 20) & 0x1, 1); + if (count > 0) { + print_named_value(f, "ADDRESS_LO", ib[2], 32); + print_named_value(f, "ADDRESS_HI", ib[3], 16); + } + break; + case PKT3_DRAW_INDEX_AUTO: + eg_dump_reg(f, R_008970_VGT_NUM_INDICES, ib[1], ~0); + eg_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[2], ~0); + break; + case PKT3_DRAW_INDEX_2: + eg_dump_reg(f, R_028A78_VGT_DMA_MAX_SIZE, ib[1], ~0); + eg_dump_reg(f, R_0287E8_VGT_DMA_BASE, ib[2], ~0); + eg_dump_reg(f, R_0287E4_VGT_DMA_BASE_HI, ib[3], ~0); + eg_dump_reg(f, R_008970_VGT_NUM_INDICES, ib[4], ~0); + eg_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[5], ~0); + break; + case PKT3_INDEX_TYPE: + eg_dump_reg(f, R_028A7C_VGT_DMA_INDEX_TYPE, ib[1], ~0); + break; + case PKT3_NUM_INSTANCES: + eg_dump_reg(f, R_028A88_VGT_NUM_INSTANCES, ib[1], ~0); + break; + case PKT3_INDIRECT_BUFFER: + break; + case PKT3_PFP_SYNC_ME: + break; + case PKT3_NOP: + if (ib[0] == 0xffff1000) { + count = -1; /* One dword NOP. */ + break; + } else if (count == 0 && AC_IS_TRACE_POINT(ib[1])) { + unsigned packet_id = AC_GET_TRACE_POINT_ID(ib[1]); + + print_spaces(f, INDENT_PKT); + fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id); + + if (trace_id == -1) + break; /* tracing was disabled */ + + print_spaces(f, INDENT_PKT); + if (packet_id < trace_id) + fprintf(f, COLOR_RED + "This trace point was reached by the CP." + COLOR_RESET "\n"); + else if (packet_id == trace_id) + fprintf(f, COLOR_RED + "!!!!! This is the last trace point that " + "was reached by the CP !!!!!" + COLOR_RESET "\n"); + else if (packet_id+1 == trace_id) + fprintf(f, COLOR_RED + "!!!!! This is the first trace point that " + "was NOT been reached by the CP !!!!!" + COLOR_RESET "\n"); + else + fprintf(f, COLOR_RED + "!!!!! This trace point was NOT reached " + "by the CP !!!!!" + COLOR_RESET "\n"); + break; + } + /* fall through, print all dwords */ + default: + for (i = 0; i < count+1; i++) { + print_spaces(f, INDENT_PKT); + fprintf(f, "0x%08x\n", ib[1+i]); + } + } + + ib += count + 2; + *num_dw -= count + 2; + return ib; +} + +/** + * Parse and print an IB into a file. + * + * \param f file + * \param ib IB + * \param num_dw size of the IB + * \param chip_class chip class + * \param trace_id the last trace ID that is known to have been reached + * and executed by the CP, typically read from a buffer + * \param addr_callback Get a mapped pointer of the IB at a given address. Can + * be NULL. + * \param addr_callback_data user data for addr_callback + */ +static void eg_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id, + const char *name, enum chip_class chip_class, + ac_debug_addr_callback addr_callback, void *addr_callback_data) +{ + fprintf(f, "------------------ %s begin ------------------\n", name); + + while (num_dw > 0) { + unsigned type = PKT_TYPE_G(ib[0]); + + switch (type) { + case 3: + ib = ac_parse_packet3(f, ib, &num_dw, trace_id, + chip_class, addr_callback, + addr_callback_data); + break; + case 2: + /* type-2 nop */ + if (ib[0] == 0x80000000) { + fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n"); + ib++; + num_dw--; + break; + } + /* fall through */ + default: + fprintf(f, "Unknown packet type %i\n", type); + return; + } + } + + fprintf(f, "------------------- %s end -------------------\n", name); + if (num_dw < 0) { + printf("Packet ends after the end of IB.\n"); + exit(0); + } + fprintf(f, "\n"); +} + +static void eg_dump_last_ib(struct r600_context *rctx, FILE *f) +{ + int last_trace_id = -1; + + if (!rctx->last_gfx.ib) + return; + + if (rctx->last_trace_buf) { + /* We are expecting that the ddebug pipe has already + * waited for the context, so this buffer should be idle. + * If the GPU is hung, there is no point in waiting for it. + */ + uint32_t *map = rctx->b.ws->buffer_map(rctx->last_trace_buf->buf, + NULL, + PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_READ); + if (map) + last_trace_id = *map; + } + + eg_parse_ib(f, rctx->last_gfx.ib, rctx->last_gfx.num_dw, + last_trace_id, "IB", rctx->b.chip_class, + NULL, NULL); +} + + +void eg_dump_debug_state(struct pipe_context *ctx, FILE *f, + unsigned flags) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + + eg_dump_last_ib(rctx, f); + + fprintf(f, "Done.\n"); + + /* dump only once */ + radeon_clear_saved_cs(&rctx->last_gfx); + r600_resource_reference(&rctx->last_trace_buf, NULL); +} diff --git a/src/gallium/drivers/r600/egd_tables.py b/src/gallium/drivers/r600/egd_tables.py new file mode 100644 index 00000000000..4c606025ba7 --- /dev/null +++ b/src/gallium/drivers/r600/egd_tables.py @@ -0,0 +1,310 @@ + +CopyRight = ''' +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ +''' + +import sys +import re + + +class StringTable: + """ + A class for collecting multiple strings in a single larger string that is + used by indexing (to avoid relocations in the resulting binary) + """ + def __init__(self): + self.table = [] + self.length = 0 + + def add(self, string): + # We might get lucky with string being a suffix of a previously added string + for te in self.table: + if te[0].endswith(string): + idx = te[1] + len(te[0]) - len(string) + te[2].add(idx) + return idx + + idx = self.length + self.table.append((string, idx, set((idx,)))) + self.length += len(string) + 1 + + return idx + + def emit(self, filp, name, static=True): + """ + Write + [static] const char name[] = "..."; + to filp. + """ + fragments = [ + '"%s\\0" /* %s */' % ( + te[0].encode('string_escape'), + ', '.join(str(idx) for idx in te[2]) + ) + for te in self.table + ] + filp.write('%sconst char %s[] =\n%s;\n' % ( + 'static ' if static else '', + name, + '\n'.join('\t' + fragment for fragment in fragments) + )) + +class IntTable: + """ + A class for collecting multiple arrays of integers in a single big array + that is used by indexing (to avoid relocations in the resulting binary) + """ + def __init__(self, typename): + self.typename = typename + self.table = [] + self.idxs = set() + + def add(self, array): + # We might get lucky and find the array somewhere in the existing data + try: + idx = 0 + while True: + idx = self.table.index(array[0], idx, len(self.table) - len(array) + 1) + + for i in range(1, len(array)): + if array[i] != self.table[idx + i]: + break + else: + self.idxs.add(idx) + return idx + + idx += 1 + except ValueError: + pass + + idx = len(self.table) + self.table += array + self.idxs.add(idx) + return idx + + def emit(self, filp, name, static=True): + """ + Write + [static] const typename name[] = { ... }; + to filp. + """ + idxs = sorted(self.idxs) + [-1] + + fragments = [ + ('\t/* %s */ %s' % ( + idxs[i], + ' '.join((str(elt) + ',') for elt in self.table[idxs[i]:idxs[i+1]]) + )) + for i in range(len(idxs) - 1) + ] + + filp.write('%sconst %s %s[] = {\n%s\n};\n' % ( + 'static ' if static else '', + self.typename, name, + '\n'.join(fragments) + )) + +class Field: + def __init__(self, reg, s_name): + self.s_name = s_name + self.name = strip_prefix(s_name) + self.values = [] + self.varname_values = '%s__%s__values' % (reg.r_name.lower(), self.name.lower()) + +class Reg: + def __init__(self, r_name): + self.r_name = r_name + self.name = strip_prefix(r_name) + self.fields = [] + self.own_fields = True + + +def strip_prefix(s): + '''Strip prefix in the form ._.*_, e.g. R_001234_''' + return s[s[2:].find('_')+3:] + +def parse(filename, regs, packets): + stream = open(filename) + + for line in stream: + if not line.startswith('#define '): + continue + + line = line[8:].strip() + + if line.startswith('R_'): + name = line.split()[0] + + for it in regs: + if it.r_name == name: + reg = it + break + else: + reg = Reg(name) + regs.append(reg) + + elif line.startswith('S_'): + name = line[:line.find('(')] + + for it in reg.fields: + if it.s_name == name: + field = it + break + else: + field = Field(reg, name) + reg.fields.append(field) + + elif line.startswith('V_'): + split = line.split() + name = split[0] + value = int(split[1], 0) + + for (n,v) in field.values: + if n == name: + if v != value: + sys.exit('Value mismatch: name = ' + name) + + field.values.append((name, value)) + + elif line.startswith('PKT3_') and line.find('0x') != -1 and line.find('(') == -1: + packets.append(line.split()[0]) + + # Copy fields to indexed registers which have their fields only defined + # at register index 0. + # For example, copy fields from CB_COLOR0_INFO to CB_COLORn_INFO, n > 0. + match_number = re.compile('[0-9]+') + reg_dict = dict() + + # Create a dict of registers with fields and '0' in their name + for reg in regs: + if len(reg.fields) and reg.name.find('0') != -1: + reg_dict[reg.name] = reg + + # Assign fields + for reg in regs: + if not len(reg.fields): + reg0 = reg_dict.get(match_number.sub('0', reg.name)) + if reg0 != None: + reg.fields = reg0.fields + reg.fields_owner = reg0 + reg.own_fields = False + + +def write_tables(regs, packets): + + strings = StringTable() + strings_offsets = IntTable("int") + + print '/* This file is autogenerated by egd_tables.py from evergreend.h. Do not edit directly. */' + print + print CopyRight.strip() + print ''' +#ifndef EG_TABLES_H +#define EG_TABLES_H + +struct eg_field { + unsigned name_offset; + unsigned mask; + unsigned num_values; + unsigned values_offset; /* offset into eg_strings_offsets */ +}; + +struct eg_reg { + unsigned name_offset; + unsigned offset; + unsigned num_fields; + unsigned fields_offset; +}; + +struct eg_packet3 { + unsigned name_offset; + unsigned op; +}; +''' + + print 'static const struct eg_packet3 packet3_table[] = {' + for pkt in packets: + print '\t{%s, %s},' % (strings.add(pkt[5:]), pkt) + print '};' + print + + print 'static const struct eg_field egd_fields_table[] = {' + + fields_idx = 0 + for reg in regs: + if len(reg.fields) and reg.own_fields: + print '\t/* %s */' % (fields_idx) + + reg.fields_idx = fields_idx + + for field in reg.fields: + if len(field.values): + values_offsets = [] + for value in field.values: + while value[1] >= len(values_offsets): + values_offsets.append(-1) + values_offsets[value[1]] = strings.add(strip_prefix(value[0])) + print '\t{%s, %s(~0u), %s, %s},' % ( + strings.add(field.name), field.s_name, + len(values_offsets), strings_offsets.add(values_offsets)) + else: + print '\t{%s, %s(~0u)},' % (strings.add(field.name), field.s_name) + fields_idx += 1 + + print '};' + print + + print 'static const struct eg_reg egd_reg_table[] = {' + for reg in regs: + if len(reg.fields): + print '\t{%s, %s, %s, %s},' % (strings.add(reg.name), reg.r_name, + len(reg.fields), reg.fields_idx if reg.own_fields else reg.fields_owner.fields_idx) + else: + print '\t{%s, %s},' % (strings.add(reg.name), reg.r_name) + print '};' + print + + strings.emit(sys.stdout, "egd_strings") + + print + + strings_offsets.emit(sys.stdout, "egd_strings_offsets") + + print + print '#endif' + + +def main(): + regs = [] + packets = [] + for arg in sys.argv[1:]: + parse(arg, regs, packets) + write_tables(regs, packets) + + +if __name__ == '__main__': + main() + +# kate: space-indent on; indent-width 4; replace-tabs on; diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 37ef1058d3f..d30024d5377 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -442,6 +442,9 @@ static void evergreen_emit_dispatch(struct r600_context *rctx, radeon_emit(cs, info->grid[2]); /* VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN */ radeon_emit(cs, 1); + + if (rctx->is_debug) + eg_trace_emit(rctx); } static void compute_emit_cs(struct r600_context *rctx, @@ -867,10 +870,9 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *rctx) r600_store_context_reg(cb, R_028B54_VGT_SHADER_STAGES_EN, 2/*CS_ON*/); r600_store_context_reg(cb, R_0286E8_SPI_COMPUTE_INPUT_CNTL, - S_0286E8_TID_IN_GROUP_ENA - | S_0286E8_TGID_ENA - | S_0286E8_DISABLE_INDEX_PACK) - ; + S_0286E8_TID_IN_GROUP_ENA(1) | + S_0286E8_TGID_ENA(1) | + S_0286E8_DISABLE_INDEX_PACK(1)); /* The LOOP_CONST registers are an optimizations for loops that allows * you to store the initial counter, increment value, and maximum diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 64935013471..c3b939fc72e 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -4073,3 +4073,32 @@ bool evergreen_adjust_gprs(struct r600_context *rctx) } return true; } + +#define AC_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff)) + +void eg_trace_emit(struct r600_context *rctx) +{ + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; + unsigned reloc; + + if (rctx->b.chip_class < EVERGREEN) + return; + + /* This must be done after r600_need_cs_space. */ + reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, + (struct r600_resource*)rctx->trace_buf, RADEON_USAGE_WRITE, + RADEON_PRIO_CP_DMA); + + rctx->trace_id++; + radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rctx->trace_buf, + RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); + radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); + radeon_emit(cs, rctx->trace_buf->gpu_address); + radeon_emit(cs, rctx->trace_buf->gpu_address >> 32 | MEM_WRITE_32_BITS | MEM_WRITE_CONFIRM); + radeon_emit(cs, rctx->trace_id); + radeon_emit(cs, 0); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, reloc); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, AC_ENCODE_TRACE_POINT(rctx->trace_id)); +} diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index d664a2d0fb2..d51181855ca 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -87,6 +87,8 @@ #define PKT3_WAIT_REG_MEM 0x3C #define WAIT_REG_MEM_EQUAL 3 #define PKT3_MEM_WRITE 0x3D +#define MEM_WRITE_CONFIRM (1 << 17) +#define MEM_WRITE_32_BITS (1 << 18) #define PKT3_INDIRECT_BUFFER 0x32 #define PKT3_PFP_SYNC_ME 0x42 #define PKT3_SURFACE_SYNC 0x43 @@ -1078,6 +1080,11 @@ #define G_028208_BR_Y(x) (((x) >> 16) & 0x7FFF) #define C_028208_BR_Y 0x8000FFFF +#define R_028A78_VGT_DMA_MAX_SIZE 0x028A78 +#define R_028A7C_VGT_DMA_INDEX_TYPE 0x028A7C +#define R_028A88_VGT_NUM_INSTANCES 0x028A88 +#define R_0287E4_VGT_DMA_BASE_HI 0x0287E4 +#define R_0287E8_VGT_DMA_BASE 0x0287E8 #define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0 #define S_0287F0_SOURCE_SELECT(x) (((unsigned)(x) & 0x3) << 0) #define G_0287F0_SOURCE_SELECT(x) (((x) >> 0) & 0x3) @@ -1951,9 +1958,9 @@ #define R_0286DC_SPI_FOG_CNTL 0x000286DC #define R_0286E4_SPI_PS_IN_CONTROL_2 0x000286E4 #define R_0286E8_SPI_COMPUTE_INPUT_CNTL 0x000286E8 -#define S_0286E8_TID_IN_GROUP_ENA 1 -#define S_0286E8_TGID_ENA 2 -#define S_0286E8_DISABLE_INDEX_PACK 4 +#define S_0286E8_TID_IN_GROUP_ENA(x) (((unsigned)(x) & 0x1) << 0) +#define S_0286E8_TGID_ENA(x) (((unsigned)(x) & 0x1) << 1) +#define S_0286E8_DISABLE_INDEX_PACK(x) (((unsigned)(x) & 0x1) << 2) #define R_028720_GDS_ADDR_BASE 0x00028720 #define R_028724_GDS_ADDR_SIZE 0x00028724 #define R_028728_GDS_ORDERED_WAVE_PER_SE 0x00028728 @@ -2501,6 +2508,8 @@ #define S_0085F0_CR2_ACTION_ENA(x) (((unsigned)(x) & 0x1) << 31) #define G_0085F0_CR2_ACTION_ENA(x) (((x) >> 31) & 0x1) #define C_0085F0_CR2_ACTION_ENA 0x7FFFFFFF +#define R_0085F4_CP_COHER_SIZE 0x0085F4 +#define R_0085F8_CP_COHER_BASE 0x0085F8 #define R_008970_VGT_NUM_INDICES 0x008970 #define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0 diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 4511ce0c01e..9b01e8e7d5c 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -274,17 +274,39 @@ void r600_context_gfx_flush(void *context, unsigned flags, r600_flush_emit(ctx); + if (ctx->trace_buf) + eg_trace_emit(ctx); /* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */ if (ctx->b.chip_class == R600) { radeon_set_context_reg(cs, R_028350_SX_MISC, 0); } + if (ctx->is_debug) { + /* Save the IB for debug contexts. */ + radeon_clear_saved_cs(&ctx->last_gfx); + radeon_save_cs(ws, cs, &ctx->last_gfx); + r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf); + r600_resource_reference(&ctx->trace_buf, NULL); + } /* Flush the CS. */ ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence); if (fence) ws->fence_reference(fence, ctx->b.last_gfx_fence); ctx->b.num_gfx_cs_flushes++; + if (ctx->is_debug) { + bool ret = ws->fence_wait(ws, ctx->b.last_gfx_fence, 10000000); + if (ret == false) { + const char *fname = getenv("R600_TRACE"); + if (!fname) + exit(-1); + FILE *fl = fopen(fname, "w+"); + if (fl) + eg_dump_debug_state(&ctx->b.b, fl, 0); + fclose(fl); + exit(-1); + } + } r600_begin_new_cs(ctx); } @@ -292,6 +314,23 @@ void r600_begin_new_cs(struct r600_context *ctx) { unsigned shader; + if (ctx->is_debug) { + uint32_t zero = 0; + + /* Create a buffer used for writing trace IDs and initialize it to 0. */ + assert(!ctx->trace_buf); + ctx->trace_buf = (struct r600_resource*) + pipe_buffer_create(ctx->b.b.screen, 0, + PIPE_USAGE_STAGING, 4); + if (ctx->trace_buf) + pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b, + 0, sizeof(zero), &zero); + ctx->trace_id = 0; + } + + if (ctx->trace_buf) + eg_trace_emit(ctx); + ctx->b.flags = 0; ctx->b.gtt = 0; ctx->b.vram = 0; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 2ed6880d590..71dc16e5061 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -111,6 +111,11 @@ static void r600_destroy_context(struct pipe_context *context) FREE(rctx->start_compute_cs_cmd.buf); r600_common_context_cleanup(&rctx->b); + + r600_resource_reference(&rctx->trace_buf, NULL); + r600_resource_reference(&rctx->last_trace_buf, NULL); + radeon_clear_saved_cs(&rctx->last_gfx); + FREE(rctx); } @@ -145,6 +150,8 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, rctx->b.b.create_video_buffer = vl_video_buffer_create; } + if (getenv("R600_TRACE")) + rctx->is_debug = true; r600_init_common_state_functions(rctx); switch (rctx->b.chip_class) { diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index e5acd415423..3fa7d77d37e 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -524,6 +524,13 @@ struct r600_context { struct r600_pipe_shader_selector *last_tcs; unsigned last_num_tcs_input_cp; unsigned lds_alloc; + + /* Debug state. */ + bool is_debug; + struct radeon_saved_cs last_gfx; + struct r600_resource *last_trace_buf; + struct r600_resource *trace_buf; + unsigned trace_id; }; static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs, @@ -952,4 +959,8 @@ static inline unsigned r600_get_flush_flags(enum r600_coherency coher) #define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2 unsigned r600_conv_prim_to_gs_out(unsigned mode); + +void eg_trace_emit(struct r600_context *rctx); +void eg_dump_debug_state(struct pipe_context *ctx, FILE *f, + unsigned flags); #endif diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 99ec5e7f7dc..3b24f36119c 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -2015,6 +2015,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SQ_NON_EVENT)); } + if (rctx->trace_buf) + eg_trace_emit(rctx); + if (rctx->framebuffer.do_update_surf_dirtiness) { /* Set the depth buffer as dirty. */ if (rctx->framebuffer.state.zsbuf) { |