summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2016-11-02 23:57:25 +0100
committerSamuel Pitoiset <[email protected]>2016-11-02 23:35:42 +0100
commit524703da5899443ca83cb7aaa587c50d5d74a996 (patch)
treef8984d15de4f313932a2ed47f12b381560462cf1 /src
parent51fe48660a2eda1f0b3574c4b4e5386f90245139 (diff)
nvc0: add new warp_execution_efficiency metric on SM30+
Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c23
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h1
2 files changed, 24 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
index f390ebdb2da..e5034f79b5a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -83,6 +83,12 @@ static const struct nvc0_hw_metric_cfg {
UINT64,
"Average number of replays due to shared memory conflicts for each "
"instruction executed"),
+
+ _Q(WARP_EXECUTION_EFFICIENCY,
+ "metric-warp_execution_efficiency",
+ PERCENTAGE,
+ "Ratio of the average active threads per warp to the maximum number of "
+ "threads per warp supported on a multiprocessor"),
};
#undef _Q
@@ -314,6 +320,15 @@ sm30_shared_replay_overhead =
.num_queries = 3,
};
+static const struct nvc0_hw_metric_query_cfg
+sm30_warp_execution_efficiency =
+{
+ .type = NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY,
+ .queries[0] = _SM(INST_EXECUTED),
+ .queries[1] = _SM(TH_INST_EXECUTED),
+ .num_queries = 2,
+};
+
static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
{
&sm20_achieved_occupancy,
@@ -326,6 +341,7 @@ static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
&sm30_issue_slots,
&sm30_issue_slot_utilization,
&sm30_shared_replay_overhead,
+ &sm30_warp_execution_efficiency,
};
/* ==== Compute capability 3.5 (GK110) ==== */
@@ -340,6 +356,7 @@ static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
&sm30_inst_issued,
&sm30_issue_slot_utilization,
&sm30_shared_replay_overhead,
+ &sm30_warp_execution_efficiency,
};
#undef _SM
@@ -573,6 +590,12 @@ sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
if (res64[2])
return (res64[0] + res64[1]) / (double)res64[2];
break;
+ case NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY:
+ /* thread_inst_executed / (inst_executed * max. number of threads per
+ * wrap) * 100 */
+ if (res64[0])
+ return (res64[1] / ((double)res64[0] * 32)) * 100;
+ break;
default:
debug_printf("invalid metric type: %d\n",
hq->base.type - NVC0_HW_METRIC_QUERY(0));
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
index 3203a8ca2b9..c9a54c9493d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
@@ -32,6 +32,7 @@ enum nvc0_hw_metric_queries
NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
NVC0_HW_METRIC_QUERY_IPC,
NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
+ NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY,
NVC0_HW_METRIC_QUERY_COUNT
};