summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/common/ac_debug.c109
-rw-r--r--src/amd/common/ac_debug.h4
-rw-r--r--src/gallium/drivers/radeonsi/si_debug.c106
3 files changed, 117 insertions, 102 deletions
diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c
index d46fc27a9e1..0de00e27e75 100644
--- a/src/amd/common/ac_debug.c
+++ b/src/amd/common/ac_debug.c
@@ -34,6 +34,8 @@
#define VG(x)
#endif
+#include <inttypes.h>
+
#include "sid.h"
#include "gfx9d.h"
#include "sid_tables.h"
@@ -597,3 +599,110 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
fprintf(f, "------------------- %s end -------------------\n\n", name);
}
+
+/**
+ * Parse dmesg and return TRUE if a VM fault has been detected.
+ *
+ * \param chip_class chip class
+ * \param old_dmesg_timestamp previous dmesg timestamp parsed at init time
+ * \param out_addr detected VM fault addr
+ */
+bool ac_vm_fault_occured(enum chip_class chip_class,
+ uint64_t *old_dmesg_timestamp, uint64_t *out_addr)
+{
+ char line[2000];
+ unsigned sec, usec;
+ int progress = 0;
+ uint64_t dmesg_timestamp = 0;
+ bool fault = false;
+
+ FILE *p = popen("dmesg", "r");
+ if (!p)
+ return false;
+
+ while (fgets(line, sizeof(line), p)) {
+ char *msg, len;
+
+ if (!line[0] || line[0] == '\n')
+ continue;
+
+ /* Get the timestamp. */
+ if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
+ static bool hit = false;
+ if (!hit) {
+ fprintf(stderr, "%s: failed to parse line '%s'\n",
+ __func__, line);
+ hit = true;
+ }
+ continue;
+ }
+ dmesg_timestamp = sec * 1000000ull + usec;
+
+ /* If just updating the timestamp. */
+ if (!out_addr)
+ continue;
+
+ /* Process messages only if the timestamp is newer. */
+ if (dmesg_timestamp <= *old_dmesg_timestamp)
+ continue;
+
+ /* Only process the first VM fault. */
+ if (fault)
+ continue;
+
+ /* Remove trailing \n */
+ len = strlen(line);
+ if (len && line[len-1] == '\n')
+ line[len-1] = 0;
+
+ /* Get the message part. */
+ msg = strchr(line, ']');
+ if (!msg)
+ continue;
+ msg++;
+
+ const char *header_line, *addr_line_prefix, *addr_line_format;
+
+ if (chip_class >= GFX9) {
+ /* Match this:
+ * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
+ * ..: at page 0x0000000219f8f000 from 27
+ * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
+ */
+ header_line = "VMC page fault";
+ addr_line_prefix = " at page";
+ addr_line_format = "%"PRIx64;
+ } else {
+ header_line = "GPU fault detected:";
+ addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
+ addr_line_format = "%"PRIX64;
+ }
+
+ switch (progress) {
+ case 0:
+ if (strstr(msg, header_line))
+ progress = 1;
+ break;
+ case 1:
+ msg = strstr(msg, addr_line_prefix);
+ if (msg) {
+ msg = strstr(msg, "0x");
+ if (msg) {
+ msg += 2;
+ if (sscanf(msg, addr_line_format, out_addr) == 1)
+ fault = true;
+ }
+ }
+ progress = 0;
+ break;
+ default:
+ progress = 0;
+ }
+ }
+ pclose(p);
+
+ if (dmesg_timestamp > *old_dmesg_timestamp)
+ *old_dmesg_timestamp = dmesg_timestamp;
+
+ return fault;
+}
diff --git a/src/amd/common/ac_debug.h b/src/amd/common/ac_debug.h
index a37acd20296..277025d8b66 100644
--- a/src/amd/common/ac_debug.h
+++ b/src/amd/common/ac_debug.h
@@ -28,6 +28,7 @@
#include <stdint.h>
#include <stdio.h>
+#include <stdbool.h>
#include "amd_family.h"
@@ -46,4 +47,7 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
unsigned trace_id_count, const char *name, enum chip_class chip_class,
ac_debug_addr_callback addr_callback, void *addr_callback_data);
+bool ac_vm_fault_occured(enum chip_class chip_class,
+ uint64_t *old_dmesg_timestamp, uint64_t *out_addr);
+
#endif
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 1b97ea40970..4e8d861019a 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -1092,106 +1092,6 @@ static void si_dump_dma(struct si_context *sctx,
fprintf(f, "SDMA Dump Done.\n");
}
-static bool si_vm_fault_occured(struct si_context *sctx, uint64_t *out_addr)
-{
- char line[2000];
- unsigned sec, usec;
- int progress = 0;
- uint64_t timestamp = 0;
- bool fault = false;
-
- FILE *p = popen("dmesg", "r");
- if (!p)
- return false;
-
- while (fgets(line, sizeof(line), p)) {
- char *msg, len;
-
- if (!line[0] || line[0] == '\n')
- continue;
-
- /* Get the timestamp. */
- if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
- static bool hit = false;
- if (!hit) {
- fprintf(stderr, "%s: failed to parse line '%s'\n",
- __func__, line);
- hit = true;
- }
- continue;
- }
- timestamp = sec * 1000000ull + usec;
-
- /* If just updating the timestamp. */
- if (!out_addr)
- continue;
-
- /* Process messages only if the timestamp is newer. */
- if (timestamp <= sctx->dmesg_timestamp)
- continue;
-
- /* Only process the first VM fault. */
- if (fault)
- continue;
-
- /* Remove trailing \n */
- len = strlen(line);
- if (len && line[len-1] == '\n')
- line[len-1] = 0;
-
- /* Get the message part. */
- msg = strchr(line, ']');
- if (!msg) {
- assert(0);
- continue;
- }
- msg++;
-
- const char *header_line, *addr_line_prefix, *addr_line_format;
-
- if (sctx->b.chip_class >= GFX9) {
- /* Match this:
- * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
- * ..: at page 0x0000000219f8f000 from 27
- * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
- */
- header_line = "VMC page fault";
- addr_line_prefix = " at page";
- addr_line_format = "%"PRIx64;
- } else {
- header_line = "GPU fault detected:";
- addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
- addr_line_format = "%"PRIX64;
- }
-
- switch (progress) {
- case 0:
- if (strstr(msg, header_line))
- progress = 1;
- break;
- case 1:
- msg = strstr(msg, addr_line_prefix);
- if (msg) {
- msg = strstr(msg, "0x");
- if (msg) {
- msg += 2;
- if (sscanf(msg, addr_line_format, out_addr) == 1)
- fault = true;
- }
- }
- progress = 0;
- break;
- default:
- progress = 0;
- }
- }
- pclose(p);
-
- if (timestamp > sctx->dmesg_timestamp)
- sctx->dmesg_timestamp = timestamp;
- return fault;
-}
-
void si_check_vm_faults(struct r600_common_context *ctx,
struct radeon_saved_cs *saved, enum ring_type ring)
{
@@ -1201,7 +1101,8 @@ void si_check_vm_faults(struct r600_common_context *ctx,
uint64_t addr;
char cmd_line[4096];
- if (!si_vm_fault_occured(sctx, &addr))
+ if (!ac_vm_fault_occured(sctx->b.chip_class,
+ &sctx->dmesg_timestamp, &addr))
return;
f = dd_get_debug_file(false);
@@ -1255,5 +1156,6 @@ void si_init_debug_functions(struct si_context *sctx)
* only new messages will be checked for VM faults.
*/
if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
- si_vm_fault_occured(sctx, NULL);
+ ac_vm_fault_occured(sctx->b.chip_class,
+ &sctx->dmesg_timestamp, NULL);
}