summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2016-11-02 23:50:03 +0100
committerSamuel Pitoiset <[email protected]>2016-11-02 23:35:49 +0100
commitb6137f226c593a31f85a7551594ce161799992af (patch)
treefb09fe322b8d322b1e3f90ab4ceee4ca60119531 /src/gallium/drivers/nouveau
parent98a382d013c8963e0854f4859b81065171992e04 (diff)
nvc0: add new warp_nonpred_execution_efficiency metric on SM35
Event not_predicated_off_thread_inst_executed is SM35+. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau')
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c37
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h1
2 files changed, 37 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
index 03a3ff07b20..0e2d89f5a0b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -89,6 +89,13 @@ static const struct nvc0_hw_metric_cfg {
PERCENTAGE,
"Ratio of the average active threads per warp to the maximum number of "
"threads per warp supported on a multiprocessor"),
+
+ _Q(WARP_NONPRED_EXECUTION_EFFICIENCY,
+ "metric-warp_nonpred_execution_efficiency",
+ PERCENTAGE,
+ "Ratio of the average active threads per warp executing non-predicated "
+ "instructions to the maximum number of threads per warp supported on a "
+ "multiprocessor"),
};
#undef _Q
@@ -344,7 +351,16 @@ static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
&sm30_warp_execution_efficiency,
};
-/* ==== Compute capability 3.5 (GK110) ==== */
+/* ==== Compute capability 3.5 (GK110/GK208) ==== */
+static const struct nvc0_hw_metric_query_cfg
+sm35_warp_nonpred_execution_efficiency =
+{
+ .type = NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY,
+ .queries[0] = _SM(INST_EXECUTED),
+ .queries[1] = _SM(NOT_PRED_OFF_INST_EXECUTED),
+ .num_queries = 2,
+};
+
static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
{
&sm20_achieved_occupancy,
@@ -357,6 +373,7 @@ static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
&sm30_issue_slot_utilization,
&sm30_shared_replay_overhead,
&sm30_warp_execution_efficiency,
+ &sm35_warp_nonpred_execution_efficiency,
};
#undef _SM
@@ -604,6 +621,22 @@ sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
return 0;
}
+static uint64_t
+sm35_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
+{
+ switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
+ case NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY:
+ /* not_predicated_off_thread_inst_executed / (inst_executed * max. number
+ * of threads per wrap) * 100 */
+ if (res64[0])
+ return (res64[1] / ((double)res64[0] * 32)) * 100;
+ break;
+ default:
+ return sm30_hw_metric_calc_result(hq, res64);
+ }
+ return 0;
+}
+
static boolean
nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
struct nvc0_hw_query *hq, boolean wait,
@@ -628,6 +661,8 @@ nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
switch (screen->base.class_3d) {
case NVF0_3D_CLASS:
+ value = sm35_hw_metric_calc_result(hq, res64);
+ break;
case NVE4_3D_CLASS:
value = sm30_hw_metric_calc_result(hq, res64);
break;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
index c9a54c9493d..878df3e209c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
@@ -33,6 +33,7 @@ enum nvc0_hw_metric_queries
NVC0_HW_METRIC_QUERY_IPC,
NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY,
+ NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY,
NVC0_HW_METRIC_QUERY_COUNT
};