diff options
author | Samuel Pitoiset <[email protected]> | 2017-08-31 11:43:59 +0200 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2017-09-01 09:46:32 +0200 |
commit | 12cbd9a13f97f67b62657728bc244b96de7f7b83 (patch) | |
tree | f881b7c68eaf80c88c0d897714e93bc8f2a2a6a6 /src/amd/common | |
parent | 72d9ffc72cba5852ec09d2e8d1f55fdbbefcfe6f (diff) |
radeonsi: move si_vm_fault_occured() to AMD common code
For radv, in order to report VM faults when detected.
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Marek Olšák <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd/common')
-rw-r--r-- | src/amd/common/ac_debug.c | 109 | ||||
-rw-r--r-- | src/amd/common/ac_debug.h | 4 |
2 files changed, 113 insertions, 0 deletions
diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c index d46fc27a9e1..0de00e27e75 100644 --- a/src/amd/common/ac_debug.c +++ b/src/amd/common/ac_debug.c @@ -34,6 +34,8 @@ #define VG(x) #endif +#include <inttypes.h> + #include "sid.h" #include "gfx9d.h" #include "sid_tables.h" @@ -597,3 +599,110 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, fprintf(f, "------------------- %s end -------------------\n\n", name); } + +/** + * Parse dmesg and return TRUE if a VM fault has been detected. + * + * \param chip_class chip class + * \param old_dmesg_timestamp previous dmesg timestamp parsed at init time + * \param out_addr detected VM fault addr + */ +bool ac_vm_fault_occured(enum chip_class chip_class, + uint64_t *old_dmesg_timestamp, uint64_t *out_addr) +{ + char line[2000]; + unsigned sec, usec; + int progress = 0; + uint64_t dmesg_timestamp = 0; + bool fault = false; + + FILE *p = popen("dmesg", "r"); + if (!p) + return false; + + while (fgets(line, sizeof(line), p)) { + char *msg, len; + + if (!line[0] || line[0] == '\n') + continue; + + /* Get the timestamp. */ + if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) { + static bool hit = false; + if (!hit) { + fprintf(stderr, "%s: failed to parse line '%s'\n", + __func__, line); + hit = true; + } + continue; + } + dmesg_timestamp = sec * 1000000ull + usec; + + /* If just updating the timestamp. */ + if (!out_addr) + continue; + + /* Process messages only if the timestamp is newer. */ + if (dmesg_timestamp <= *old_dmesg_timestamp) + continue; + + /* Only process the first VM fault. */ + if (fault) + continue; + + /* Remove trailing \n */ + len = strlen(line); + if (len && line[len-1] == '\n') + line[len-1] = 0; + + /* Get the message part. */ + msg = strchr(line, ']'); + if (!msg) + continue; + msg++; + + const char *header_line, *addr_line_prefix, *addr_line_format; + + if (chip_class >= GFX9) { + /* Match this: + * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0) + * ..: at page 0x0000000219f8f000 from 27 + * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C + */ + header_line = "VMC page fault"; + addr_line_prefix = " at page"; + addr_line_format = "%"PRIx64; + } else { + header_line = "GPU fault detected:"; + addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR"; + addr_line_format = "%"PRIX64; + } + + switch (progress) { + case 0: + if (strstr(msg, header_line)) + progress = 1; + break; + case 1: + msg = strstr(msg, addr_line_prefix); + if (msg) { + msg = strstr(msg, "0x"); + if (msg) { + msg += 2; + if (sscanf(msg, addr_line_format, out_addr) == 1) + fault = true; + } + } + progress = 0; + break; + default: + progress = 0; + } + } + pclose(p); + + if (dmesg_timestamp > *old_dmesg_timestamp) + *old_dmesg_timestamp = dmesg_timestamp; + + return fault; +} diff --git a/src/amd/common/ac_debug.h b/src/amd/common/ac_debug.h index a37acd20296..277025d8b66 100644 --- a/src/amd/common/ac_debug.h +++ b/src/amd/common/ac_debug.h @@ -28,6 +28,7 @@ #include <stdint.h> #include <stdio.h> +#include <stdbool.h> #include "amd_family.h" @@ -46,4 +47,7 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsigned trace_id_count, const char *name, enum chip_class chip_class, ac_debug_addr_callback addr_callback, void *addr_callback_data); +bool ac_vm_fault_occured(enum chip_class chip_class, + uint64_t *old_dmesg_timestamp, uint64_t *out_addr); + #endif |