diff options
Diffstat (limited to 'src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c')
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c | 296 |
1 files changed, 165 insertions, 131 deletions
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c index c1085511a85..a01ab3fba4c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c @@ -24,32 +24,51 @@ #include "nvc0/nvc0_query_hw_metric.h" #include "nvc0/nvc0_query_hw_sm.h" -/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */ -static const char *nvc0_hw_metric_names[] = -{ - "metric-achieved_occupancy", - "metric-branch_efficiency", - "metric-inst_issued", - "metric-inst_per_wrap", - "metric-inst_replay_overhead", - "metric-issued_ipc", - "metric-issue_slots", - "metric-issue_slot_utilization", - "metric-ipc", +#define _Q(t,n) { NVC0_HW_METRIC_QUERY_##t, n } +struct { + unsigned type; + const char *name; +} nvc0_hw_metric_queries[] = { + _Q(ACHIEVED_OCCUPANCY, "metric-achieved_occupancy" ), + _Q(BRANCH_EFFICIENCY, "metric-branch_efficiency" ), + _Q(INST_ISSUED, "metric-inst_issued" ), + _Q(INST_PER_WRAP, "metric-inst_per_wrap" ), + _Q(INST_REPLAY_OVERHEAD, "metric-inst_replay_overhead" ), + _Q(ISSUED_IPC, "metric-issued_ipc" ), + _Q(ISSUE_SLOTS, "metric-issue_slots" ), + _Q(ISSUE_SLOT_UTILIZATION, "metric-issue_slot_utilization" ), + _Q(IPC, "metric-ipc" ), + _Q(SHARED_REPLAY_OVERHEAD, "metric-shared_replay_overhead" ), }; +#undef _Q + +static inline const char * +nvc0_hw_metric_query_get_name(unsigned query_type) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(nvc0_hw_metric_queries); i++) { + if (nvc0_hw_metric_queries[i].type == query_type) + return nvc0_hw_metric_queries[i].name; + } + assert(0); + return NULL; +} + struct nvc0_hw_metric_query_cfg { + unsigned type; uint32_t queries[8]; uint32_t num_queries; }; #define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n) -#define _M(n, c) [NVC0_HW_METRIC_QUERY_##n] = c /* ==== Compute capability 2.0 (GF100/GF110) ==== */ static const struct nvc0_hw_metric_query_cfg sm20_achieved_occupancy = { + .type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY, .queries[0] = _SM(ACTIVE_WARPS), .queries[1] = _SM(ACTIVE_CYCLES), .num_queries = 2, @@ -58,6 +77,7 @@ sm20_achieved_occupancy = static const struct nvc0_hw_metric_query_cfg sm20_branch_efficiency = { + .type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY, .queries[0] = _SM(BRANCH), .queries[1] = _SM(DIVERGENT_BRANCH), .num_queries = 2, @@ -66,6 +86,7 @@ sm20_branch_efficiency = static const struct nvc0_hw_metric_query_cfg sm20_inst_per_wrap = { + .type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP, .queries[0] = _SM(INST_EXECUTED), .queries[1] = _SM(WARPS_LAUNCHED), .num_queries = 2, @@ -74,6 +95,7 @@ sm20_inst_per_wrap = static const struct nvc0_hw_metric_query_cfg sm20_inst_replay_overhead = { + .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD, .queries[0] = _SM(INST_ISSUED), .queries[1] = _SM(INST_EXECUTED), .num_queries = 2, @@ -82,6 +104,16 @@ sm20_inst_replay_overhead = static const struct nvc0_hw_metric_query_cfg sm20_issued_ipc = { + .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC, + .queries[0] = _SM(INST_ISSUED), + .queries[1] = _SM(ACTIVE_CYCLES), + .num_queries = 2, +}; + +static const struct nvc0_hw_metric_query_cfg +sm20_issue_slot_utilization = +{ + .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION, .queries[0] = _SM(INST_ISSUED), .queries[1] = _SM(ACTIVE_CYCLES), .num_queries = 2, @@ -90,6 +122,7 @@ sm20_issued_ipc = static const struct nvc0_hw_metric_query_cfg sm20_ipc = { + .type = NVC0_HW_METRIC_QUERY_IPC, .queries[0] = _SM(INST_EXECUTED), .queries[1] = _SM(ACTIVE_CYCLES), .num_queries = 2, @@ -97,21 +130,20 @@ sm20_ipc = static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] = { - _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy), - _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency), - _M(INST_ISSUED, NULL), - _M(INST_PER_WRAP, &sm20_inst_per_wrap), - _M(INST_REPLAY_OVERHEAD, &sm20_inst_replay_overhead), - _M(ISSUED_IPC, &sm20_issued_ipc), - _M(ISSUE_SLOTS, NULL), - _M(ISSUE_SLOT_UTILIZATION, &sm20_issued_ipc), - _M(IPC, &sm20_ipc), + &sm20_achieved_occupancy, + &sm20_branch_efficiency, + &sm20_inst_per_wrap, + &sm20_inst_replay_overhead, + &sm20_issued_ipc, + &sm20_issue_slot_utilization, + &sm20_ipc, }; /* ==== Compute capability 2.1 (GF108+ except GF110) ==== */ static const struct nvc0_hw_metric_query_cfg sm21_inst_issued = { + .type = NVC0_HW_METRIC_QUERY_INST_ISSUED, .queries[0] = _SM(INST_ISSUED1_0), .queries[1] = _SM(INST_ISSUED1_1), .queries[2] = _SM(INST_ISSUED2_0), @@ -122,6 +154,7 @@ sm21_inst_issued = static const struct nvc0_hw_metric_query_cfg sm21_inst_replay_overhead = { + .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD, .queries[0] = _SM(INST_ISSUED1_0), .queries[1] = _SM(INST_ISSUED1_1), .queries[2] = _SM(INST_ISSUED2_0), @@ -133,6 +166,7 @@ sm21_inst_replay_overhead = static const struct nvc0_hw_metric_query_cfg sm21_issued_ipc = { + .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC, .queries[0] = _SM(INST_ISSUED1_0), .queries[1] = _SM(INST_ISSUED1_1), .queries[2] = _SM(INST_ISSUED2_0), @@ -141,44 +175,36 @@ sm21_issued_ipc = .num_queries = 5, }; -static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] = +static const struct nvc0_hw_metric_query_cfg +sm21_issue_slot_utilization = { - _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy), - _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency), - _M(INST_ISSUED, &sm21_inst_issued), - _M(INST_PER_WRAP, &sm20_inst_per_wrap), - _M(INST_REPLAY_OVERHEAD, &sm21_inst_replay_overhead), - _M(ISSUED_IPC, &sm21_issued_ipc), - _M(ISSUE_SLOTS, &sm21_inst_issued), - _M(ISSUE_SLOT_UTILIZATION, &sm21_issued_ipc), - _M(IPC, &sm20_ipc), + .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION, + .queries[0] = _SM(INST_ISSUED1_0), + .queries[1] = _SM(INST_ISSUED1_1), + .queries[2] = _SM(INST_ISSUED2_0), + .queries[3] = _SM(INST_ISSUED2_1), + .queries[4] = _SM(ACTIVE_CYCLES), + .num_queries = 5, }; -#undef _SM -#undef _M - -/* === PERFORMANCE MONITORING METRICS for NVE4+ === */ -static const char *nve4_hw_metric_names[] = -{ - "metric-achieved_occupancy", - "metric-branch_efficiency", - "metric-inst_issued", - "metric-inst_per_wrap", - "metric-inst_replay_overhead", - "metric-issued_ipc", - "metric-issue_slots", - "metric-issue_slot_utilization", - "metric-ipc", - "metric-shared_replay_overhead", +static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] = +{ + &sm20_achieved_occupancy, + &sm20_branch_efficiency, + &sm21_inst_issued, + &sm20_inst_per_wrap, + &sm21_inst_replay_overhead, + &sm21_issued_ipc, + &sm21_inst_issued, + &sm21_issue_slot_utilization, + &sm20_ipc, }; -#define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n) -#define _M(n, c) [NVE4_HW_METRIC_QUERY_##n] = c - /* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */ static const struct nvc0_hw_metric_query_cfg sm30_achieved_occupancy = { + .type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY, .queries[0] = _SM(ACTIVE_WARPS), .queries[1] = _SM(ACTIVE_CYCLES), .num_queries = 2, @@ -187,6 +213,7 @@ sm30_achieved_occupancy = static const struct nvc0_hw_metric_query_cfg sm30_branch_efficiency = { + .type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY, .queries[0] = _SM(BRANCH), .queries[1] = _SM(DIVERGENT_BRANCH), .num_queries = 2, @@ -195,6 +222,7 @@ sm30_branch_efficiency = static const struct nvc0_hw_metric_query_cfg sm30_inst_issued = { + .type = NVC0_HW_METRIC_QUERY_INST_ISSUED, .queries[0] = _SM(INST_ISSUED1), .queries[1] = _SM(INST_ISSUED2), .num_queries = 2, @@ -203,6 +231,7 @@ sm30_inst_issued = static const struct nvc0_hw_metric_query_cfg sm30_inst_per_wrap = { + .type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP, .queries[0] = _SM(INST_EXECUTED), .queries[1] = _SM(WARPS_LAUNCHED), .num_queries = 2, @@ -211,6 +240,7 @@ sm30_inst_per_wrap = static const struct nvc0_hw_metric_query_cfg sm30_inst_replay_overhead = { + .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD, .queries[0] = _SM(INST_ISSUED1), .queries[1] = _SM(INST_ISSUED2), .queries[2] = _SM(INST_EXECUTED), @@ -220,6 +250,17 @@ sm30_inst_replay_overhead = static const struct nvc0_hw_metric_query_cfg sm30_issued_ipc = { + .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC, + .queries[0] = _SM(INST_ISSUED1), + .queries[1] = _SM(INST_ISSUED2), + .queries[2] = _SM(ACTIVE_CYCLES), + .num_queries = 3, +}; + +static const struct nvc0_hw_metric_query_cfg +sm30_issue_slot_utilization = +{ + .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION, .queries[0] = _SM(INST_ISSUED1), .queries[1] = _SM(INST_ISSUED2), .queries[2] = _SM(ACTIVE_CYCLES), @@ -229,6 +270,7 @@ sm30_issued_ipc = static const struct nvc0_hw_metric_query_cfg sm30_ipc = { + .type = NVC0_HW_METRIC_QUERY_IPC, .queries[0] = _SM(INST_EXECUTED), .queries[1] = _SM(ACTIVE_CYCLES), .num_queries = 2, @@ -237,6 +279,7 @@ sm30_ipc = static const struct nvc0_hw_metric_query_cfg sm30_shared_replay_overhead = { + .type = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD, .queries[0] = _SM(SHARED_LD_REPLAY), .queries[1] = _SM(SHARED_ST_REPLAY), .queries[2] = _SM(INST_EXECUTED), @@ -245,44 +288,71 @@ sm30_shared_replay_overhead = static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] = { - _M(ACHIEVED_OCCUPANCY, &sm30_achieved_occupancy), - _M(BRANCH_EFFICIENCY, &sm30_branch_efficiency), - _M(INST_ISSUED, &sm30_inst_issued), - _M(INST_PER_WRAP, &sm30_inst_per_wrap), - _M(INST_REPLAY_OVERHEAD, &sm30_inst_replay_overhead), - _M(ISSUED_IPC, &sm30_issued_ipc), - _M(ISSUE_SLOTS, &sm30_inst_issued), - _M(ISSUE_SLOT_UTILIZATION, &sm30_issued_ipc), - _M(IPC, &sm30_ipc), - _M(SHARED_REPLAY_OVERHEAD, &sm30_shared_replay_overhead), + &sm30_achieved_occupancy, + &sm30_branch_efficiency, + &sm30_inst_issued, + &sm30_inst_per_wrap, + &sm30_inst_replay_overhead, + &sm30_issued_ipc, + &sm30_inst_issued, + &sm30_issue_slot_utilization, + &sm30_ipc, + &sm30_shared_replay_overhead, }; #undef _SM -#undef _M static inline const struct nvc0_hw_metric_query_cfg ** nvc0_hw_metric_get_queries(struct nvc0_screen *screen) { struct nouveau_device *dev = screen->base.device; - if (dev->chipset == 0xc0 || dev->chipset == 0xc8) - return sm20_hw_metric_queries; - return sm21_hw_metric_queries; + switch (screen->base.class_3d) { + case NVE4_3D_CLASS: + return sm30_hw_metric_queries; + default: + if (dev->chipset == 0xc0 || dev->chipset == 0xc8) + return sm20_hw_metric_queries; + return sm21_hw_metric_queries; + } + assert(0); + return NULL; +} + +unsigned +nvc0_hw_metric_get_num_queries(struct nvc0_screen *screen) +{ + struct nouveau_device *dev = screen->base.device; + + switch (screen->base.class_3d) { + case NVE4_3D_CLASS: + return ARRAY_SIZE(sm30_hw_metric_queries); + default: + if (dev->chipset == 0xc0 || dev->chipset == 0xc8) + return ARRAY_SIZE(sm20_hw_metric_queries); + return ARRAY_SIZE(sm21_hw_metric_queries); + } + return 0; } static const struct nvc0_hw_metric_query_cfg * -nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, - struct nvc0_hw_query *hq) +nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) { const struct nvc0_hw_metric_query_cfg **queries; struct nvc0_screen *screen = nvc0->screen; struct nvc0_query *q = &hq->base; + unsigned num_queries; + unsigned i; - if (screen->base.class_3d >= NVE4_3D_CLASS) - return sm30_hw_metric_queries[q->type - NVE4_HW_METRIC_QUERY(0)]; - + num_queries = nvc0_hw_metric_get_num_queries(screen); queries = nvc0_hw_metric_get_queries(screen); - return queries[q->type - NVC0_HW_METRIC_QUERY(0)]; + + for (i = 0; i < num_queries; i++) { + if (NVC0_HW_METRIC_QUERY(queries[i]->type) == q->type) + return queries[i]; + } + assert(0); + return NULL; } static void @@ -419,47 +489,47 @@ sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) static uint64_t sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) { - switch (hq->base.type - NVE4_HW_METRIC_QUERY(0)) { - case NVE4_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: + switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { + case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: /* (active_warps / active_cycles) / max. number of warps on a MP */ if (res64[1]) return (res64[0] / (double)res64[1]) / 64; break; - case NVE4_HW_METRIC_QUERY_BRANCH_EFFICIENCY: + case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY: return sm20_hw_metric_calc_result(hq, res64); - case NVE4_HW_METRIC_QUERY_INST_ISSUED: + case NVC0_HW_METRIC_QUERY_INST_ISSUED: /* inst_issued1 + inst_issued2 * 2 */ return res64[0] + res64[1] * 2; - case NVE4_HW_METRIC_QUERY_INST_PER_WRAP: + case NVC0_HW_METRIC_QUERY_INST_PER_WRAP: return sm20_hw_metric_calc_result(hq, res64); - case NVE4_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD: + case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD: /* (metric-inst_issued - inst_executed) / inst_executed */ if (res64[2]) return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]); break; - case NVE4_HW_METRIC_QUERY_ISSUED_IPC: + case NVC0_HW_METRIC_QUERY_ISSUED_IPC: /* metric-inst_issued / active_cycles */ if (res64[2]) return (res64[0] + res64[1] * 2) / (double)res64[2]; break; - case NVE4_HW_METRIC_QUERY_ISSUE_SLOTS: + case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS: /* inst_issued1 + inst_issued2 */ return res64[0] + res64[1]; - case NVE4_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION: + case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION: /* ((metric-issue_slots / 2) / active_cycles) * 100 */ if (res64[2]) return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100; break; - case NVE4_HW_METRIC_QUERY_IPC: + case NVC0_HW_METRIC_QUERY_IPC: return sm20_hw_metric_calc_result(hq, res64); - case NVE4_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD: + case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD: /* (shared_load_replay + shared_store_replay) / inst_executed */ if (res64[2]) return (res64[0] + res64[1]) / (double)res64[2]; break; default: debug_printf("invalid metric type: %d\n", - hq->base.type - NVE4_HW_METRIC_QUERY(0)); + hq->base.type - NVC0_HW_METRIC_QUERY(0)); break; } return 0; @@ -487,13 +557,16 @@ nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0, res64[i] = *(uint64_t *)&results[i]; } - if (screen->base.class_3d >= NVE4_3D_CLASS) { + switch (screen->base.class_3d) { + case NVE4_3D_CLASS: value = sm30_hw_metric_calc_result(hq, res64); - } else { + break; + default: if (dev->chipset == 0xc0 || dev->chipset == 0xc8) value = sm20_hw_metric_calc_result(hq, res64); else value = sm21_hw_metric_calc_result(hq, res64); + break; } *(uint64_t *)result = value; @@ -515,8 +588,7 @@ nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type) struct nvc0_hw_query *hq; unsigned i; - if ((type < NVE4_HW_METRIC_QUERY(0) || type > NVE4_HW_METRIC_QUERY_LAST) && - (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)) + if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST) return NULL; hmq = CALLOC_STRUCT(nvc0_hw_metric_query); @@ -541,46 +613,15 @@ nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type) return hq; } -static int -nvc0_hw_metric_get_next_query_id(const struct nvc0_hw_metric_query_cfg **queries, - unsigned id) -{ - unsigned i, next = 0; - - for (i = 0; i < NVC0_HW_METRIC_QUERY_COUNT; i++) { - if (!queries[i]) { - next++; - } else - if (i >= id && queries[id + next]) { - break; - } - } - return id + next; -} - int nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id, struct pipe_driver_query_info *info) { - uint16_t class_3d = screen->base.class_3d; int count = 0; if (screen->base.drm->version >= 0x01000101) { - if (screen->compute) { - if (screen->base.class_3d == NVE4_3D_CLASS) { - count += NVE4_HW_METRIC_QUERY_COUNT; - } else - if (class_3d < NVE4_3D_CLASS) { - const struct nvc0_hw_metric_query_cfg **queries = - nvc0_hw_metric_get_queries(screen); - unsigned i; - - for (i = 0; i < NVC0_HW_METRIC_QUERY_COUNT; i++) { - if (queries[i]) - count++; - } - } - } + if (screen->compute) + count = nvc0_hw_metric_get_num_queries(screen); } if (!info) @@ -588,19 +629,12 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id, if (id < count) { if (screen->compute) { - if (screen->base.class_3d == NVE4_3D_CLASS) { - info->name = nve4_hw_metric_names[id]; - info->query_type = NVE4_HW_METRIC_QUERY(id); - info->group_id = NVC0_HW_METRIC_QUERY_GROUP; - return 1; - } else - if (class_3d < NVE4_3D_CLASS) { - const struct nvc0_hw_metric_query_cfg **queries = + if (screen->base.class_3d <= NVE4_3D_CLASS) { + const struct nvc0_hw_metric_query_cfg **queries = nvc0_hw_metric_get_queries(screen); - id = nvc0_hw_metric_get_next_query_id(queries, id); - info->name = nvc0_hw_metric_names[id]; - info->query_type = NVC0_HW_METRIC_QUERY(id); + info->name = nvc0_hw_metric_query_get_name(queries[id]->type); + info->query_type = NVC0_HW_METRIC_QUERY(queries[id]->type); info->group_id = NVC0_HW_METRIC_QUERY_GROUP; return 1; } |