diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/intel/Android.mk | 1 | ||||
-rw-r--r-- | src/intel/Android.perf.mk | 52 | ||||
-rw-r--r-- | src/intel/Makefile.perf.am | 41 | ||||
-rw-r--r-- | src/intel/Makefile.sources | 24 | ||||
-rw-r--r-- | src/intel/meson.build | 1 | ||||
-rw-r--r-- | src/intel/perf/gen_perf.c | 435 | ||||
-rw-r--r-- | src/intel/perf/gen_perf.h | 258 | ||||
-rw-r--r-- | src/intel/perf/gen_perf.py (renamed from src/mesa/drivers/dri/i965/brw_oa.py) | 79 | ||||
-rw-r--r-- | src/intel/perf/meson.build | 40 | ||||
-rw-r--r-- | src/intel/perf/oa-bdw.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_bdw.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-bxt.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_bxt.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-cflgt2.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_cflgt2.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-cflgt3.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_cflgt3.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-chv.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_chv.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-cnl.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_cnl.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-glk.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_glk.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-hsw.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_hsw.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-icl.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_icl.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-kblgt2.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_kblgt2.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-kblgt3.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_kblgt3.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-sklgt2.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_sklgt2.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-sklgt3.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_sklgt3.xml) | 0 | ||||
-rw-r--r-- | src/intel/perf/oa-sklgt4.xml (renamed from src/mesa/drivers/dri/i965/brw_oa_sklgt4.xml) | 0 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/Android.mk | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 73 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query.c | 653 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query.h | 80 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c | 114 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query_metrics.h | 57 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/meson.build | 33 |
31 files changed, 1098 insertions, 866 deletions
diff --git a/src/intel/Android.mk b/src/intel/Android.mk index 380473d0acb..96498141dd2 100644 --- a/src/intel/Android.mk +++ b/src/intel/Android.mk @@ -31,4 +31,5 @@ include $(LOCAL_PATH)/Android.compiler.mk include $(LOCAL_PATH)/Android.dev.mk include $(LOCAL_PATH)/Android.genxml.mk include $(LOCAL_PATH)/Android.isl.mk +include $(LOCAL_PATH)/Android.perf.mk include $(LOCAL_PATH)/Android.vulkan.mk diff --git a/src/intel/Android.perf.mk b/src/intel/Android.perf.mk new file mode 100644 index 00000000000..0d7d746a632 --- /dev/null +++ b/src/intel/Android.perf.mk @@ -0,0 +1,52 @@ +# Copyright © 2018 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# + +# --------------------------------------- +# Build libmesa_intel_perf +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_MODULE := libmesa_intel_perf + +LOCAL_MODULE_CLASS := STATIC_LIBRARIES + +intermediates := $(call local-generated-sources-dir) + +LOCAL_C_INCLUDES := $(MESA_TOP)/include/drm-uapi + +LOCAL_SRC_FILES := $(GEN_PERF_FILES) + +LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ + $(GEN_PERF_GENERATED_FILES)) + +$(intermediates)/perf/gen_perf_metrics.c: $(LOCAL_PATH)/perf/gen_perf.py $(addprefix $(MESA_TOP)/src/intel/,$(GEN_PERF_XML_FILES)) + @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" + @mkdir -p $(dir $@) + $(hide) $(MESA_PYTHON2) $< \ + --code=$@ \ + --header=$(@:%.c=%.h) \ + $(addprefix $(MESA_TOP)/src/intel/,$(GEN_PERF_XML_FILES)) + +$(intermediates)/perf/gen_perf_metrics.h: $(intermediates)/perf/gen_perf_metrics.c + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/intel/Makefile.perf.am b/src/intel/Makefile.perf.am new file mode 100644 index 00000000000..a9d896c458d --- /dev/null +++ b/src/intel/Makefile.perf.am @@ -0,0 +1,41 @@ +# Copyright © 2018 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +BUILT_SOURCES += \ + $(GEN_PERF_GENERATED_FILES) + +noinst_LTLIBRARIES += perf/libintel_perf.la + +perf_libintel_perf_la_SOURCES = $(GEN_PERF_FILES) $(GEN_PERF_GENERATED_FILES) +perf_libintel_perf_la_CFLAGS = $(AM_CFLAGS) + +perf/gen_perf_metrics.c: perf/gen_perf.py $(GEN_PERF_XML_FILES) + $(MKDIR_GEN) + $(PYTHON_GEN) $(PYTHON_FLAGS) $(srcdir)/perf/gen_perf.py \ + --code=$(builddir)/perf/gen_perf_metrics.c \ + --header=$(builddir)/perf/gen_perf_metrics.h \ + $(GEN_PERF_XML_FILES:%=$(srcdir)/%) + +perf/gen_perf_metrics.h: perf/gen_perf_metrics.c + +EXTRA_DIST += \ + $(GEN_PERF_XML_FILES) \ + perf/gen_perf.py diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index ffe51f87698..b5915a0d9a4 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -314,3 +314,27 @@ VULKAN_GEN10_FILES := \ VULKAN_GEN11_FILES := \ vulkan/gen8_cmd_buffer.c \ $(VULKAN_GENX_FILES) + +GEN_PERF_XML_FILES = \ + perf/oa-hsw.xml \ + perf/oa-bdw.xml \ + perf/oa-chv.xml \ + perf/oa-sklgt2.xml \ + perf/oa-sklgt3.xml \ + perf/oa-sklgt4.xml \ + perf/oa-bxt.xml \ + perf/oa-kblgt2.xml \ + perf/oa-kblgt3.xml \ + perf/oa-glk.xml \ + perf/oa-cflgt2.xml \ + perf/oa-cflgt3.xml \ + perf/oa-cnl.xml \ + perf/oa-icl.xml + +GEN_PERF_FILES = \ + perf/gen_perf.c \ + perf/gen_perf.h + +GEN_PERF_GENERATED_FILES = \ + perf/gen_perf_metrics.c \ + perf/gen_perf_metrics.h diff --git a/src/intel/meson.build b/src/intel/meson.build index a5bb03e314a..7b6aa40ba8d 100644 --- a/src/intel/meson.build +++ b/src/intel/meson.build @@ -27,6 +27,7 @@ subdir('dev') subdir('isl') subdir('common') subdir('compiler') +subdir('perf') if with_tools.contains('intel') or with_tools.contains('intel-ui') subdir('tools') endif diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c new file mode 100644 index 00000000000..bc26cff9c99 --- /dev/null +++ b/src/intel/perf/gen_perf.c @@ -0,0 +1,435 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <dirent.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> + +#include <drm-uapi/i915_drm.h> + +#include "gen_perf.h" +#include "perf/gen_perf_metrics.h" + +#include "dev/gen_debug.h" +#include "dev/gen_device_info.h" +#include "util/bitscan.h" + +#define FILE_DEBUG_FLAG DEBUG_PERFMON + +static bool +get_sysfs_dev_dir(struct gen_perf *perf, int fd) +{ + struct stat sb; + int min, maj; + DIR *drmdir; + struct dirent *drm_entry; + int len; + + perf->sysfs_dev_dir[0] = '\0'; + + if (fstat(fd, &sb)) { + DBG("Failed to stat DRM fd\n"); + return false; + } + + maj = major(sb.st_rdev); + min = minor(sb.st_rdev); + + if (!S_ISCHR(sb.st_mode)) { + DBG("DRM fd is not a character device as expected\n"); + return false; + } + + len = snprintf(perf->sysfs_dev_dir, + sizeof(perf->sysfs_dev_dir), + "/sys/dev/char/%d:%d/device/drm", maj, min); + if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) { + DBG("Failed to concatenate sysfs path to drm device\n"); + return false; + } + + drmdir = opendir(perf->sysfs_dev_dir); + if (!drmdir) { + DBG("Failed to open %s: %m\n", perf->sysfs_dev_dir); + return false; + } + + while ((drm_entry = readdir(drmdir))) { + if ((drm_entry->d_type == DT_DIR || + drm_entry->d_type == DT_LNK) && + strncmp(drm_entry->d_name, "card", 4) == 0) + { + len = snprintf(perf->sysfs_dev_dir, + sizeof(perf->sysfs_dev_dir), + "/sys/dev/char/%d:%d/device/drm/%s", + maj, min, drm_entry->d_name); + closedir(drmdir); + if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) + return false; + else + return true; + } + } + + closedir(drmdir); + + DBG("Failed to find cardX directory under /sys/dev/char/%d:%d/device/drm\n", + maj, min); + + return false; +} + +static bool +read_file_uint64(const char *file, uint64_t *val) +{ + char buf[32]; + int fd, n; + + fd = open(file, 0); + if (fd < 0) + return false; + while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 && + errno == EINTR); + close(fd); + if (n < 0) + return false; + + buf[n] = '\0'; + *val = strtoull(buf, NULL, 0); + + return true; +} + +static bool +read_sysfs_drm_device_file_uint64(struct gen_perf *perf, + const char *file, + uint64_t *value) +{ + char buf[512]; + int len; + + len = snprintf(buf, sizeof(buf), "%s/%s", perf->sysfs_dev_dir, file); + if (len < 0 || len >= sizeof(buf)) { + DBG("Failed to concatenate sys filename to read u64 from\n"); + return false; + } + + return read_file_uint64(buf, value); +} + +static void +register_oa_config(struct gen_perf *perf, + const struct gen_perf_query_info *query, + uint64_t config_id) +{ + struct gen_perf_query_info *registred_query = + gen_perf_query_append_query_info(perf, 0); + + *registred_query = *query; + registred_query->oa_metrics_set_id = config_id; + DBG("metric set registred: id = %" PRIu64", guid = %s\n", + registred_query->oa_metrics_set_id, query->guid); +} + +static void +enumerate_sysfs_metrics(struct gen_perf *perf) +{ + DIR *metricsdir = NULL; + struct dirent *metric_entry; + char buf[256]; + int len; + + len = snprintf(buf, sizeof(buf), "%s/metrics", perf->sysfs_dev_dir); + if (len < 0 || len >= sizeof(buf)) { + DBG("Failed to concatenate path to sysfs metrics/ directory\n"); + return; + } + + metricsdir = opendir(buf); + if (!metricsdir) { + DBG("Failed to open %s: %m\n", buf); + return; + } + + while ((metric_entry = readdir(metricsdir))) { + struct hash_entry *entry; + + if ((metric_entry->d_type != DT_DIR && + metric_entry->d_type != DT_LNK) || + metric_entry->d_name[0] == '.') + continue; + + DBG("metric set: %s\n", metric_entry->d_name); + entry = _mesa_hash_table_search(perf->oa_metrics_table, + metric_entry->d_name); + if (entry) { + uint64_t id; + + len = snprintf(buf, sizeof(buf), "%s/metrics/%s/id", + perf->sysfs_dev_dir, metric_entry->d_name); + if (len < 0 || len >= sizeof(buf)) { + DBG("Failed to concatenate path to sysfs metric id file\n"); + continue; + } + + if (!read_file_uint64(buf, &id)) { + DBG("Failed to read metric set id from %s: %m", buf); + continue; + } + + register_oa_config(perf, (const struct gen_perf_query_info *)entry->data, id); + } else + DBG("metric set not known by mesa (skipping)\n"); + } + + closedir(metricsdir); +} + +static bool +kernel_has_dynamic_config_support(struct gen_perf *perf, int fd) +{ + hash_table_foreach(perf->oa_metrics_table, entry) { + struct gen_perf_query_info *query = entry->data; + char config_path[280]; + uint64_t config_id; + + snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", + perf->sysfs_dev_dir, query->guid); + + /* Look for the test config, which we know we can't replace. */ + if (read_file_uint64(config_path, &config_id) && config_id == 1) { + return perf->ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, + &config_id) < 0 && errno == ENOENT; + } + } + + return false; +} + +bool +gen_perf_load_metric_id(struct gen_perf *perf, const char *guid, + uint64_t *metric_id) +{ + char config_path[280]; + + snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", + perf->sysfs_dev_dir, guid); + + /* Don't recreate already loaded configs. */ + return read_file_uint64(config_path, metric_id); +} + +static void +init_oa_configs(struct gen_perf *perf, int fd) +{ + hash_table_foreach(perf->oa_metrics_table, entry) { + const struct gen_perf_query_info *query = entry->data; + struct drm_i915_perf_oa_config config; + uint64_t config_id; + int ret; + + if (gen_perf_load_metric_id(perf, query->guid, &config_id)) { + DBG("metric set: %s (already loaded)\n", query->guid); + register_oa_config(perf, query, config_id); + continue; + } + + memset(&config, 0, sizeof(config)); + + memcpy(config.uuid, query->guid, sizeof(config.uuid)); + + config.n_mux_regs = query->n_mux_regs; + config.mux_regs_ptr = (uintptr_t) query->mux_regs; + + config.n_boolean_regs = query->n_b_counter_regs; + config.boolean_regs_ptr = (uintptr_t) query->b_counter_regs; + + config.n_flex_regs = query->n_flex_regs; + config.flex_regs_ptr = (uintptr_t) query->flex_regs; + + ret = perf->ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config); + if (ret < 0) { + DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n", + query->name, query->guid, strerror(errno)); + continue; + } + + register_oa_config(perf, query, ret); + DBG("metric set: %s (added)\n", query->guid); + } +} + +static void +compute_topology_builtins(struct gen_perf *perf, + const struct gen_device_info *devinfo) +{ + perf->sys_vars.slice_mask = devinfo->slice_masks; + perf->sys_vars.n_eu_slices = devinfo->num_slices; + + for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) { + perf->sys_vars.n_eu_sub_slices += + __builtin_popcount(devinfo->subslice_masks[i]); + } + + for (int i = 0; i < sizeof(devinfo->eu_masks); i++) + perf->sys_vars.n_eus += __builtin_popcount(devinfo->eu_masks[i]); + + perf->sys_vars.eu_threads_count = + perf->sys_vars.n_eus * devinfo->num_thread_per_eu; + + /* The subslice mask builtin contains bits for all slices. Prior to Gen11 + * it had groups of 3bits for each slice, on Gen11 it's 8bits for each + * slice. + * + * Ideally equations would be updated to have a slice/subslice query + * function/operator. + */ + perf->sys_vars.subslice_mask = 0; + + int bits_per_subslice = devinfo->gen == 11 ? 8 : 3; + + for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) { + for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) { + if (gen_device_info_subslice_available(devinfo, s, ss)) + perf->sys_vars.subslice_mask |= 1ULL << (s * bits_per_subslice + ss); + } + } +} + +static bool +init_oa_sys_vars(struct gen_perf *perf, const struct gen_device_info *devinfo) +{ + uint64_t min_freq_mhz = 0, max_freq_mhz = 0; + + if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz)) + return false; + + if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz)) + return false; + + memset(&perf->sys_vars, 0, sizeof(perf->sys_vars)); + perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000; + perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000; + perf->sys_vars.timestamp_frequency = devinfo->timestamp_frequency; + perf->sys_vars.revision = devinfo->revision; + compute_topology_builtins(perf, devinfo); + + return true; +} + +typedef void (*perf_register_oa_queries_t)(struct gen_perf *); + +static perf_register_oa_queries_t +get_register_queries_function(const struct gen_device_info *devinfo) +{ + if (devinfo->is_haswell) + return gen_oa_register_queries_hsw; + if (devinfo->is_cherryview) + return gen_oa_register_queries_chv; + if (devinfo->is_broadwell) + return gen_oa_register_queries_bdw; + if (devinfo->is_broxton) + return gen_oa_register_queries_bxt; + if (devinfo->is_skylake) { + if (devinfo->gt == 2) + return gen_oa_register_queries_sklgt2; + if (devinfo->gt == 3) + return gen_oa_register_queries_sklgt3; + if (devinfo->gt == 4) + return gen_oa_register_queries_sklgt4; + } + if (devinfo->is_kabylake) { + if (devinfo->gt == 2) + return gen_oa_register_queries_kblgt2; + if (devinfo->gt == 3) + return gen_oa_register_queries_kblgt3; + } + if (devinfo->is_geminilake) + return gen_oa_register_queries_glk; + if (devinfo->is_coffeelake) { + if (devinfo->gt == 2) + return gen_oa_register_queries_cflgt2; + if (devinfo->gt == 3) + return gen_oa_register_queries_cflgt3; + } + if (devinfo->is_cannonlake) + return gen_oa_register_queries_cnl; + + return NULL; +} + +bool +gen_perf_load_oa_metrics(struct gen_perf *perf, int fd, + const struct gen_device_info *devinfo) +{ + perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo); + bool i915_perf_oa_available = false; + struct stat sb; + + /* The existence of this sysctl parameter implies the kernel supports + * the i915 perf interface. + */ + if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) { + + /* If _paranoid == 1 then on Gen8+ we won't be able to access OA + * metrics unless running as root. + */ + if (devinfo->is_haswell) + i915_perf_oa_available = true; + else { + uint64_t paranoid = 1; + + read_file_uint64("/proc/sys/dev/i915/perf_stream_paranoid", ¶noid); + + if (paranoid == 0 || geteuid() == 0) + i915_perf_oa_available = true; + } + } + + if (!i915_perf_oa_available || + !oa_register || + !get_sysfs_dev_dir(perf, fd) || + !init_oa_sys_vars(perf, devinfo)) + return false; + + perf->oa_metrics_table = + _mesa_hash_table_create(perf, _mesa_key_hash_string, + _mesa_key_string_equal); + + /* Index all the metric sets mesa knows about before looking to see what + * the kernel is advertising. + */ + oa_register(perf); + + if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) && + kernel_has_dynamic_config_support(perf, fd)) + init_oa_configs(perf, fd); + else + enumerate_sysfs_metrics(perf); + + return true; +} diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h new file mode 100644 index 00000000000..5d47ebd2925 --- /dev/null +++ b/src/intel/perf/gen_perf.h @@ -0,0 +1,258 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef GEN_PERF_H +#define GEN_PERF_H + +#include <stdio.h> +#include <stdint.h> +#include <string.h> + +#include <sys/sysmacros.h> + +#include "util/hash_table.h" +#include "util/ralloc.h" + +struct gen_device_info; + +struct gen_perf; +struct gen_perf_query_info; + +enum gen_perf_counter_type { + GEN_PERF_COUNTER_TYPE_EVENT, + GEN_PERF_COUNTER_TYPE_DURATION_NORM, + GEN_PERF_COUNTER_TYPE_DURATION_RAW, + GEN_PERF_COUNTER_TYPE_THROUGHPUT, + GEN_PERF_COUNTER_TYPE_RAW, + GEN_PERF_COUNTER_TYPE_TIMESTAMP, +}; + +enum gen_perf_counter_data_type { + GEN_PERF_COUNTER_DATA_TYPE_BOOL32, + GEN_PERF_COUNTER_DATA_TYPE_UINT32, + GEN_PERF_COUNTER_DATA_TYPE_UINT64, + GEN_PERF_COUNTER_DATA_TYPE_FLOAT, + GEN_PERF_COUNTER_DATA_TYPE_DOUBLE, +}; + +struct gen_pipeline_stat { + uint32_t reg; + uint32_t numerator; + uint32_t denominator; +}; + +struct gen_perf_query_counter { + const char *name; + const char *desc; + enum gen_perf_counter_type type; + enum gen_perf_counter_data_type data_type; + uint64_t raw_max; + size_t offset; + size_t size; + + union { + uint64_t (*oa_counter_read_uint64)(struct gen_perf *perf, + const struct gen_perf_query_info *query, + uint64_t *accumulator); + float (*oa_counter_read_float)(struct gen_perf *perf, + const struct gen_perf_query_info *query, + uint64_t *accumulator); + struct gen_pipeline_stat pipeline_stat; + }; +}; + +struct gen_perf_query_register_prog { + uint32_t reg; + uint32_t val; +}; + +struct gen_perf_query_info { + enum gen_perf_query_type { + GEN_PERF_QUERY_TYPE_OA, + GEN_PERF_QUERY_TYPE_RAW, + GEN_PERF_QUERY_TYPE_PIPELINE, + } kind; + const char *name; + const char *guid; + struct gen_perf_query_counter *counters; + int n_counters; + int max_counters; + size_t data_size; + + /* OA specific */ + uint64_t oa_metrics_set_id; + int oa_format; + + /* For indexing into the accumulator[] ... */ + int gpu_time_offset; + int gpu_clock_offset; + int a_offset; + int b_offset; + int c_offset; + + /* Register programming for a given query */ + struct gen_perf_query_register_prog *flex_regs; + uint32_t n_flex_regs; + + struct gen_perf_query_register_prog *mux_regs; + uint32_t n_mux_regs; + + struct gen_perf_query_register_prog *b_counter_regs; + uint32_t n_b_counter_regs; +}; + +struct gen_perf { + struct gen_perf_query_info *queries; + int n_queries; + + /* Variables referenced in the XML meta data for OA performance + * counters, e.g in the normalization equations. + * + * All uint64_t for consistent operand types in generated code + */ + struct { + uint64_t timestamp_frequency; /** $GpuTimestampFrequency */ + uint64_t n_eus; /** $EuCoresTotalCount */ + uint64_t n_eu_slices; /** $EuSlicesTotalCount */ + uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */ + uint64_t eu_threads_count; /** $EuThreadsCount */ + uint64_t slice_mask; /** $SliceMask */ + uint64_t subslice_mask; /** $SubsliceMask */ + uint64_t gt_min_freq; /** $GpuMinFrequency */ + uint64_t gt_max_freq; /** $GpuMaxFrequency */ + uint64_t revision; /** $SkuRevisionId */ + } sys_vars; + + /* OA metric sets, indexed by GUID, as know by Mesa at build time, to + * cross-reference with the GUIDs of configs advertised by the kernel at + * runtime + */ + struct hash_table *oa_metrics_table; + + /* Location of the device's sysfs entry. */ + char sysfs_dev_dir[256]; + + int (*ioctl)(int, unsigned long, void *); +}; + +static inline struct gen_perf_query_info * +gen_perf_query_append_query_info(struct gen_perf *perf, int max_counters) +{ + struct gen_perf_query_info *query; + + perf->queries = reralloc(perf, perf->queries, + struct gen_perf_query_info, + ++perf->n_queries); + query = &perf->queries[perf->n_queries - 1]; + memset(query, 0, sizeof(*query)); + + if (max_counters > 0) { + query->max_counters = max_counters; + query->counters = + rzalloc_array(perf, struct gen_perf_query_counter, max_counters); + } + + return query; +} + +static inline void +gen_perf_query_info_add_stat_reg(struct gen_perf_query_info *query, + uint32_t reg, + uint32_t numerator, + uint32_t denominator, + const char *name, + const char *description) +{ + struct gen_perf_query_counter *counter; + + assert(query->n_counters < query->max_counters); + + counter = &query->counters[query->n_counters]; + counter->name = name; + counter->desc = description; + counter->type = GEN_PERF_COUNTER_TYPE_RAW; + counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64; + counter->size = sizeof(uint64_t); + counter->offset = sizeof(uint64_t) * query->n_counters; + counter->pipeline_stat.reg = reg; + counter->pipeline_stat.numerator = numerator; + counter->pipeline_stat.denominator = denominator; + + query->n_counters++; +} + +static inline void +gen_perf_query_info_add_basic_stat_reg(struct gen_perf_query_info *query, + uint32_t reg, const char *name) +{ + gen_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name); +} + +/* Accumulate 32bits OA counters */ +static inline void +gen_perf_query_accumulate_uint32(const uint32_t *report0, + const uint32_t *report1, + uint64_t *accumulator) +{ + *accumulator += (uint32_t)(*report1 - *report0); +} + +/* Accumulate 40bits OA counters */ +static inline void +gen_perf_query_accumulate_uint40(int a_index, + const uint32_t *report0, + const uint32_t *report1, + uint64_t *accumulator) +{ + const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); + const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); + uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32; + uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32; + uint64_t value0 = report0[a_index + 4] | high0; + uint64_t value1 = report1[a_index + 4] | high1; + uint64_t delta; + + if (value0 > value1) + delta = (1ULL << 40) + value1 - value0; + else + delta = value1 - value0; + + *accumulator += delta; +} + +static inline struct gen_perf * +gen_perf_new(void *ctx, int (*ioctl_cb)(int, unsigned long, void *)) +{ + struct gen_perf *perf = rzalloc(ctx, struct gen_perf); + + perf->ioctl = ioctl_cb; + + return perf; +} + +bool gen_perf_load_oa_metrics(struct gen_perf *perf, int fd, + const struct gen_device_info *devinfo); +bool gen_perf_load_metric_id(struct gen_perf *perf, const char *guid, + uint64_t *metric_id); + +#endif /* GEN_PERF_H */ diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/intel/perf/gen_perf.py index 75382558e6d..c41e8f3eb11 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/intel/perf/gen_perf.py @@ -176,16 +176,16 @@ exp_ops["&&"] = (2, splice_logical_and) hw_vars = {} -hw_vars["$EuCoresTotalCount"] = "brw->perfquery.sys_vars.n_eus" -hw_vars["$EuSlicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_slices" -hw_vars["$EuSubslicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_sub_slices" -hw_vars["$EuThreadsCount"] = "brw->perfquery.sys_vars.eu_threads_count" -hw_vars["$SliceMask"] = "brw->perfquery.sys_vars.slice_mask" -hw_vars["$SubsliceMask"] = "brw->perfquery.sys_vars.subslice_mask" -hw_vars["$GpuTimestampFrequency"] = "brw->perfquery.sys_vars.timestamp_frequency" -hw_vars["$GpuMinFrequency"] = "brw->perfquery.sys_vars.gt_min_freq" -hw_vars["$GpuMaxFrequency"] = "brw->perfquery.sys_vars.gt_max_freq" -hw_vars["$SkuRevisionId"] = "brw->perfquery.sys_vars.revision" +hw_vars["$EuCoresTotalCount"] = "perf->sys_vars.n_eus" +hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices" +hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices" +hw_vars["$EuThreadsCount"] = "perf->sys_vars.eu_threads_count" +hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask" +hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask" +hw_vars["$GpuTimestampFrequency"] = "perf->sys_vars.timestamp_frequency" +hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq" +hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq" +hw_vars["$SkuRevisionId"] = "perf->sys_vars.revision" def output_rpn_equation_code(set, counter, equation): c("/* RPN equation: " + equation + " */") @@ -207,7 +207,7 @@ def output_rpn_equation_code(set, counter, equation): operand = hw_vars[operand] elif operand in set.counter_vars: reference = set.counter_vars[operand] - operand = set.read_funcs[operand[1:]] + "(brw, query, accumulator)" + operand = set.read_funcs[operand[1:]] + "(perf, query, accumulator)" else: raise Exception("Failed to resolve variable " + operand + " in equation " + equation + " for " + set.name + " :: " + counter.get('name')); args.append(operand) @@ -227,7 +227,7 @@ def output_rpn_equation_code(set, counter, equation): if value in hw_vars: value = hw_vars[value] if value in set.counter_vars: - value = set.read_funcs[value[1:]] + "(brw, query, accumulator)" + value = set.read_funcs[value[1:]] + "(perf, query, accumulator)" c("\nreturn " + value + ";") @@ -278,9 +278,9 @@ def output_counter_read(gen, set, counter): read_eq = counter.get('equation') c("static " + ret_type) - c(counter.read_sym + "(MAYBE_UNUSED struct brw_context *brw,\n") + c(counter.read_sym + "(MAYBE_UNUSED struct gen_perf *perf,\n") c_indent(len(counter.read_sym) + 1) - c("const struct brw_perf_query_info *query,\n") + c("const struct gen_perf_query_info *query,\n") c("uint64_t *accumulator)\n") c_outdent(len(counter.read_sym) + 1) @@ -313,7 +313,7 @@ def output_counter_max(gen, set, counter): ret_type = "uint64_t" c("static " + ret_type) - c(counter.max_sym() + "(struct brw_context *brw)\n") + c(counter.max_sym() + "(struct gen_perf *perf)\n") c("{") c_indent(3) output_rpn_equation_code(set, counter, max_eq) @@ -375,8 +375,8 @@ def output_counter_report(set, counter, current_offset): c("counter->oa_counter_read_" + data_type + " = " + set.read_funcs[counter.get('symbol_name')] + ";\n") c("counter->name = \"" + counter.get('name') + "\";\n") c("counter->desc = \"" + counter.get('description') + "\";\n") - c("counter->type = GL_PERFQUERY_COUNTER_" + semantic_type_uc + "_INTEL;\n") - c("counter->data_type = GL_PERFQUERY_COUNTER_DATA_" + data_type_uc + "_INTEL;\n") + c("counter->type = GEN_PERF_COUNTER_TYPE_" + semantic_type_uc + ";\n") + c("counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ";\n") c("counter->raw_max = " + set.max_values[counter.get('symbol_name')] + ";\n") current_offset = pot_align(current_offset, sizeof(c_type)) @@ -420,7 +420,7 @@ def generate_register_configs(set): c_indent(3) for register in register_config.findall('register'): - c("query->%s[query->n_%s++] = (struct brw_perf_query_register_prog) { .reg = %s, .val = %s };" % + c("query->%s[query->n_%s++] = (struct gen_perf_query_register_prog) { .reg = %s, .val = %s };" % (t, t, register.get('address'), register.get('value'))) if availability: @@ -429,7 +429,7 @@ def generate_register_configs(set): c("\n") -# Wraps a <counter> element from the brw_oa_*.xml files. +# Wraps a <counter> element from the oa-*.xml files. class Counter: def __init__(self, set, xml): self.xml = xml @@ -501,11 +501,11 @@ class Counter: if token[0] == '$' and token not in hw_vars: return "0 /* unsupported (varies over time) */" - return "{0}__{1}__{2}__max(brw)".format(self.set.gen.chipset, - self.set.underscore_name, - self.xml.get('underscore_name')) + return "{0}__{1}__{2}__max(perf)".format(self.set.gen.chipset, + self.set.underscore_name, + self.xml.get('underscore_name')) -# Wraps a <set> element from the brw_oa_*.xml files. +# Wraps a <set> element from the oa-*.xml files. class Set: def __init__(self, gen, xml): self.gen = gen @@ -550,7 +550,7 @@ class Set: return self.xml.find(path) -# Wraps an entire brw_oa_*.xml file. +# Wraps an entire oa-*.xml file. class Gen: def __init__(self, filename): self.filename = filename @@ -573,8 +573,8 @@ def main(): args = parser.parse_args() - header_file = open(args.header, 'w') c_file = open(args.code, 'w') + header_file = open(args.header, 'w') gens = [] for xml_file in args.xml_files: @@ -612,7 +612,7 @@ def main(): h(textwrap.dedent("""\ #pragma once - struct brw_context; + struct gen_perf; """)) @@ -621,6 +621,8 @@ def main(): #include <stdint.h> #include <stdbool.h> + #include <drm-uapi/i915_drm.h> + #include "util/hash_table.h" """)) @@ -628,8 +630,7 @@ def main(): c("#include \"" + os.path.basename(args.header) + "\"") c(textwrap.dedent("""\ - #include "brw_context.h" - #include "brw_performance_query_metrics.h" + #include "perf/gen_perf.h" #define MIN(a, b) ((a < b) ? (a) : (b)) @@ -654,15 +655,15 @@ def main(): c("\n") register_lengths = compute_register_lengths(set); for reg_type, reg_length in register_lengths.items(): - c("static struct brw_perf_query_register_prog {0}_{1}_{2}[{3}];".format(gen.chipset, + c("static struct gen_perf_query_register_prog {0}_{1}_{2}[{3}];".format(gen.chipset, set.underscore_name, reg_type, reg_length)) - c("\nstatic struct brw_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(gen.chipset, set.underscore_name, len(counters))) - c("static struct brw_perf_query_info " + gen.chipset + "_" + set.underscore_name + "_query = {\n") + c("\nstatic struct gen_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(gen.chipset, set.underscore_name, len(counters))) + c("static struct gen_perf_query_info " + gen.chipset + "_" + set.underscore_name + "_query = {\n") c_indent(3) - c(".kind = OA_COUNTERS,\n") + c(".kind = GEN_PERF_QUERY_TYPE_OA,\n") c(".name = \"" + set.name + "\",\n") c(".guid = \"" + set.hw_config_guid + "\",\n") @@ -700,12 +701,12 @@ def main(): c("};\n") c("\nstatic void\n") - c("{0}_register_{1}_counter_query(struct brw_context *brw)\n".format(gen.chipset, set.underscore_name)) + c("{0}_register_{1}_counter_query(struct gen_perf *perf)\n".format(gen.chipset, set.underscore_name)) c("{\n") c_indent(3) - c("static struct brw_perf_query_info *query = &" + gen.chipset + "_" + set.underscore_name + "_query;\n") - c("struct brw_perf_query_counter *counter;\n") + c("static struct gen_perf_query_info *query = &" + gen.chipset + "_" + set.underscore_name + "_query;\n") + c("struct gen_perf_query_counter *counter;\n") c("\n") c("/* Note: we're assuming there can't be any variation in the definition ") @@ -726,20 +727,20 @@ def main(): c_outdent(3) c("}"); - c("\n_mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);") + c("\n_mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);") c_outdent(3) c("}\n") - h("void brw_oa_register_queries_" + gen.chipset + "(struct brw_context *brw);\n") + h("void gen_oa_register_queries_" + gen.chipset + "(struct gen_perf *perf);\n") c("\nvoid") - c("brw_oa_register_queries_" + gen.chipset + "(struct brw_context *brw)") + c("gen_oa_register_queries_" + gen.chipset + "(struct gen_perf *perf)") c("{") c_indent(3) for set in gen.sets: - c("{0}_register_{1}_counter_query(brw);".format(gen.chipset, set.underscore_name)) + c("{0}_register_{1}_counter_query(perf);".format(gen.chipset, set.underscore_name)) c_outdent(3) c("}") diff --git a/src/intel/perf/meson.build b/src/intel/perf/meson.build new file mode 100644 index 00000000000..3620f6885a4 --- /dev/null +++ b/src/intel/perf/meson.build @@ -0,0 +1,40 @@ +gen_hw_metrics = [ + 'hsw', + 'bdw', 'chv', + 'sklgt2', 'sklgt3', 'sklgt4', + 'kblgt2', 'kblgt3', + 'cflgt2', 'cflgt3', + 'bxt', 'glk', + 'cnl', + 'icl', +] + +gen_hw_metrics_xml_files = [] +foreach hw : gen_hw_metrics + gen_hw_metrics_xml_files += 'oa-@[email protected]'.format(hw) +endforeach + +gen_perf_sources = [ + 'gen_perf.c' +] + +gen_perf_sources += custom_target( + 'intel-perf-sources', + input : gen_hw_metrics_xml_files, + output : [ 'gen_perf_metrics.c', 'gen_perf_metrics.h' ], + command : [ + prog_python, files('gen_perf.py'), + '--code', '@OUTPUT0@', '--header', '@OUTPUT1@', + '@INPUT@', + ], +) + +libintel_perf = static_library( + 'intel_perf', + gen_perf_sources, + include_directories : [ + inc_common, inc_intel, inc_util, + ], + c_args : [c_vis_args, no_override_init_args, '-msse2'], + cpp_args : [cpp_vis_args, '-msse2'], +) diff --git a/src/mesa/drivers/dri/i965/brw_oa_bdw.xml b/src/intel/perf/oa-bdw.xml index 714a1f08ea7..714a1f08ea7 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_bdw.xml +++ b/src/intel/perf/oa-bdw.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_bxt.xml b/src/intel/perf/oa-bxt.xml index db018a3d62b..db018a3d62b 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_bxt.xml +++ b/src/intel/perf/oa-bxt.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_cflgt2.xml b/src/intel/perf/oa-cflgt2.xml index b167c11fc75..b167c11fc75 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_cflgt2.xml +++ b/src/intel/perf/oa-cflgt2.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_cflgt3.xml b/src/intel/perf/oa-cflgt3.xml index f8b878977f9..f8b878977f9 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_cflgt3.xml +++ b/src/intel/perf/oa-cflgt3.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_chv.xml b/src/intel/perf/oa-chv.xml index 85c50bb43bb..85c50bb43bb 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_chv.xml +++ b/src/intel/perf/oa-chv.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_cnl.xml b/src/intel/perf/oa-cnl.xml index 182d6c4aa65..182d6c4aa65 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_cnl.xml +++ b/src/intel/perf/oa-cnl.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_glk.xml b/src/intel/perf/oa-glk.xml index e3da757f46f..e3da757f46f 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_glk.xml +++ b/src/intel/perf/oa-glk.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_hsw.xml b/src/intel/perf/oa-hsw.xml index a3bed73086b..a3bed73086b 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_hsw.xml +++ b/src/intel/perf/oa-hsw.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_icl.xml b/src/intel/perf/oa-icl.xml index cc1945bc83f..cc1945bc83f 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_icl.xml +++ b/src/intel/perf/oa-icl.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_kblgt2.xml b/src/intel/perf/oa-kblgt2.xml index c6f66c504cc..c6f66c504cc 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_kblgt2.xml +++ b/src/intel/perf/oa-kblgt2.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_kblgt3.xml b/src/intel/perf/oa-kblgt3.xml index 0d90451a2e7..0d90451a2e7 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_kblgt3.xml +++ b/src/intel/perf/oa-kblgt3.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_sklgt2.xml b/src/intel/perf/oa-sklgt2.xml index 4fe6f877e4c..4fe6f877e4c 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_sklgt2.xml +++ b/src/intel/perf/oa-sklgt2.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_sklgt3.xml b/src/intel/perf/oa-sklgt3.xml index 7fc5e8da02b..7fc5e8da02b 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_sklgt3.xml +++ b/src/intel/perf/oa-sklgt3.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_sklgt4.xml b/src/intel/perf/oa-sklgt4.xml index 30a1d172996..30a1d172996 100644 --- a/src/mesa/drivers/dri/i965/brw_oa_sklgt4.xml +++ b/src/intel/perf/oa-sklgt4.xml diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index b9ce93f7a97..29b46147f39 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -289,7 +289,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ libmesa_intel_common \ libmesa_isl \ libmesa_blorp \ - libmesa_intel_compiler + libmesa_intel_compiler \ + libmesa_intel_perf ifeq ($(ARCH_X86_HAVE_SSE4_1),true) LOCAL_CFLAGS += \ diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index ae609361989..01aeae89980 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -172,23 +172,3 @@ i965_gen11_FILES = \ genX_boilerplate.h \ genX_pipe_control.c \ genX_state_upload.c - -i965_oa_GENERATED_FILES = \ - brw_oa_metrics.c \ - brw_oa_metrics.h - -i965_oa_xml_FILES = \ - brw_oa_hsw.xml \ - brw_oa_bdw.xml \ - brw_oa_chv.xml \ - brw_oa_sklgt2.xml \ - brw_oa_sklgt3.xml \ - brw_oa_sklgt4.xml \ - brw_oa_bxt.xml \ - brw_oa_kblgt2.xml \ - brw_oa_kblgt3.xml \ - brw_oa_glk.xml \ - brw_oa_cflgt2.xml \ - brw_oa_cflgt3.xml \ - brw_oa_cnl.xml \ - brw_oa_icl.xml diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d6187541a8c..23048428f3e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -681,48 +681,7 @@ enum brw_predicate_state { struct shader_times; struct gen_l3_config; - -enum brw_query_kind { - OA_COUNTERS, - OA_COUNTERS_RAW, - PIPELINE_STATS, -}; - -struct brw_perf_query_register_prog { - uint32_t reg; - uint32_t val; -}; - -struct brw_perf_query_info -{ - enum brw_query_kind kind; - const char *name; - const char *guid; - struct brw_perf_query_counter *counters; - int n_counters; - size_t data_size; - - /* OA specific */ - uint64_t oa_metrics_set_id; - int oa_format; - - /* For indexing into the accumulator[] ... */ - int gpu_time_offset; - int gpu_clock_offset; - int a_offset; - int b_offset; - int c_offset; - - /* Register programming for a given query */ - struct brw_perf_query_register_prog *flex_regs; - uint32_t n_flex_regs; - - struct brw_perf_query_register_prog *mux_regs; - uint32_t n_mux_regs; - - struct brw_perf_query_register_prog *b_counter_regs; - uint32_t n_b_counter_regs; -}; +struct gen_perf; struct brw_uploader { struct brw_bufmgr *bufmgr; @@ -1203,35 +1162,7 @@ struct brw_context } predicate; struct { - /* Variables referenced in the XML meta data for OA performance - * counters, e.g in the normalization equations. - * - * All uint64_t for consistent operand types in generated code - */ - struct { - uint64_t timestamp_frequency; /** $GpuTimestampFrequency */ - uint64_t n_eus; /** $EuCoresTotalCount */ - uint64_t n_eu_slices; /** $EuSlicesTotalCount */ - uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */ - uint64_t eu_threads_count; /** $EuThreadsCount */ - uint64_t slice_mask; /** $SliceMask */ - uint64_t subslice_mask; /** $SubsliceMask */ - uint64_t gt_min_freq; /** $GpuMinFrequency */ - uint64_t gt_max_freq; /** $GpuMaxFrequency */ - uint64_t revision; /** $SkuRevisionId */ - } sys_vars; - - /* OA metric sets, indexed by GUID, as know by Mesa at build time, - * to cross-reference with the GUIDs of configs advertised by the - * kernel at runtime - */ - struct hash_table *oa_metrics_table; - - /* Location of the device's sysfs entry. */ - char sysfs_dev_dir[256]; - - struct brw_perf_query_info *queries; - int n_queries; + struct gen_perf *perf; /* The i915 perf stream we open to setup + enable the OA counters */ int oa_stream_fd; diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 7676b2bdd6b..3207be11569 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -40,7 +40,6 @@ */ #include <limits.h> -#include <dirent.h> /* put before sys/types.h to silence glibc warnings */ #ifdef MAJOR_IN_MKDEV @@ -72,9 +71,10 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_performance_query.h" -#include "brw_oa_metrics.h" #include "intel_batchbuffer.h" +#include "perf/gen_perf.h" + #define FILE_DEBUG_FLAG DEBUG_PERFMON #define OAREPORT_REASON_MASK 0x3f @@ -223,61 +223,20 @@ brw_perf_query(struct gl_perf_query_object *o) /******************************************************************************/ static bool -read_file_uint64(const char *file, uint64_t *val) -{ - char buf[32]; - int fd, n; - - fd = open(file, 0); - if (fd < 0) - return false; - while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 && - errno == EINTR); - close(fd); - if (n < 0) - return false; - - buf[n] = '\0'; - *val = strtoull(buf, NULL, 0); - - return true; -} - -static bool -read_sysfs_drm_device_file_uint64(struct brw_context *brw, - const char *file, - uint64_t *value) -{ - char buf[512]; - int len; - - len = snprintf(buf, sizeof(buf), "%s/%s", - brw->perfquery.sysfs_dev_dir, file); - if (len < 0 || len >= sizeof(buf)) { - DBG("Failed to concatenate sys filename to read u64 from\n"); - return false; - } - - return read_file_uint64(buf, value); -} - -/******************************************************************************/ - -static bool brw_is_perf_query_ready(struct gl_context *ctx, struct gl_perf_query_object *o); static uint64_t brw_perf_query_get_metric_id(struct brw_context *brw, - const struct brw_perf_query_info *query) + const struct gen_perf_query_info *query) { /* These queries are know not to ever change, their config ID has been * loaded upon the first query creation. No need to look them up again. */ - if (query->kind == OA_COUNTERS) + if (query->kind == GEN_PERF_QUERY_TYPE_OA) return query->oa_metrics_set_id; - assert(query->kind == OA_COUNTERS_RAW); + assert(query->kind == GEN_PERF_QUERY_TYPE_RAW); /* Raw queries can be reprogrammed up by an external application/library. * When a raw query is used for the first time it's id is set to a value != @@ -290,12 +249,9 @@ brw_perf_query_get_metric_id(struct brw_context *brw, return query->oa_metrics_set_id; } - char metric_id_file[280]; - snprintf(metric_id_file, sizeof(metric_id_file), - "%s/metrics/%s/id", brw->perfquery.sysfs_dev_dir, query->guid); - - struct brw_perf_query_info *raw_query = (struct brw_perf_query_info *)query; - if (!read_file_uint64(metric_id_file, &raw_query->oa_metrics_set_id)) { + struct gen_perf_query_info *raw_query = (struct gen_perf_query_info *)query; + if (!gen_perf_load_metric_id(brw->perfquery.perf, query->guid, + &raw_query->oa_metrics_set_id)) { DBG("Unable to read query guid=%s ID, falling back to test config\n", query->guid); raw_query->oa_metrics_set_id = 1ULL; } else { @@ -313,8 +269,8 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void) struct brw_perf_query_object *obj = query_void; switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: DBG("%4d: %-6s %-8s BO: %-4s OA data: %-10s %-15s\n", id, o->Used ? "Dirty," : "New,", @@ -323,7 +279,7 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void) brw_is_perf_query_ready(ctx, o) ? "ready," : "not ready,", obj->oa.results_accumulated ? "accumulated" : "not accumulated"); break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: DBG("%4d: %-6s %-8s BO: %-4s\n", id, o->Used ? "Dirty," : "New,", @@ -414,20 +370,20 @@ brw_get_perf_query_info(struct gl_context *ctx, GLuint *n_active) { struct brw_context *brw = brw_context(ctx); - const struct brw_perf_query_info *query = - &brw->perfquery.queries[query_index]; + const struct gen_perf_query_info *query = + &brw->perfquery.perf->queries[query_index]; *name = query->name; *data_size = query->data_size; *n_counters = query->n_counters; switch (query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: *n_active = brw->perfquery.n_active_oa_queries; break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: *n_active = brw->perfquery.n_active_pipeline_stats_queries; break; @@ -437,6 +393,35 @@ brw_get_perf_query_info(struct gl_context *ctx, } } +static GLuint +gen_counter_type_enum_to_gl_type(enum gen_perf_counter_type type) +{ + switch (type) { + case GEN_PERF_COUNTER_TYPE_EVENT: return GL_PERFQUERY_COUNTER_EVENT_INTEL; + case GEN_PERF_COUNTER_TYPE_DURATION_NORM: return GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL; + case GEN_PERF_COUNTER_TYPE_DURATION_RAW: return GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL; + case GEN_PERF_COUNTER_TYPE_THROUGHPUT: return GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL; + case GEN_PERF_COUNTER_TYPE_RAW: return GL_PERFQUERY_COUNTER_RAW_INTEL; + case GEN_PERF_COUNTER_TYPE_TIMESTAMP: return GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL; + default: + unreachable("Unknown counter type"); + } +} + +static GLuint +gen_counter_data_type_to_gl_type(enum gen_perf_counter_data_type type) +{ + switch (type) { + case GEN_PERF_COUNTER_DATA_TYPE_BOOL32: return GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL; + case GEN_PERF_COUNTER_DATA_TYPE_UINT32: return GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL; + case GEN_PERF_COUNTER_DATA_TYPE_UINT64: return GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL; + case GEN_PERF_COUNTER_DATA_TYPE_FLOAT: return GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL; + case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE: return GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL; + default: + unreachable("Unknown counter data type"); + } +} + /** * Driver hook for glGetPerfCounterInfoINTEL(). */ @@ -453,17 +438,17 @@ brw_get_perf_counter_info(struct gl_context *ctx, GLuint64 *raw_max) { struct brw_context *brw = brw_context(ctx); - const struct brw_perf_query_info *query = - &brw->perfquery.queries[query_index]; - const struct brw_perf_query_counter *counter = + const struct gen_perf_query_info *query = + &brw->perfquery.perf->queries[query_index]; + const struct gen_perf_query_counter *counter = &query->counters[counter_index]; *name = counter->name; *desc = counter->desc; *offset = counter->offset; *data_size = counter->size; - *type_enum = counter->type; - *data_type_enum = counter->data_type; + *type_enum = gen_counter_type_enum_to_gl_type(counter->type); + *data_type_enum = gen_counter_data_type_to_gl_type(counter->data_type); *raw_max = counter->raw_max; } @@ -478,13 +463,13 @@ snapshot_statistics_registers(struct brw_context *brw, struct brw_perf_query_object *obj, uint32_t offset_in_bytes) { - const struct brw_perf_query_info *query = obj->query; + const struct gen_perf_query_info *query = obj->query; const int n_counters = query->n_counters; for (int i = 0; i < n_counters; i++) { - const struct brw_perf_query_counter *counter = &query->counters[i]; + const struct gen_perf_query_counter *counter = &query->counters[i]; - assert(counter->data_type == GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL); + assert(counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64); brw_store_register_mem64(brw, obj->pipeline_stats.bo, counter->pipeline_stat.reg, @@ -567,7 +552,7 @@ add_deltas(struct brw_context *brw, const uint32_t *start, const uint32_t *end) { - const struct brw_perf_query_info *query = obj->query; + const struct gen_perf_query_info *query = obj->query; uint64_t *accumulator = obj->oa.accumulator; int idx = 0; int i; @@ -576,29 +561,29 @@ add_deltas(struct brw_context *brw, switch (query->oa_format) { case I915_OA_FORMAT_A32u40_A4u32_B8_C8: - brw_perf_query_accumulate_uint32(start + 1, end + 1, accumulator + idx++); /* timestamp */ - brw_perf_query_accumulate_uint32(start + 3, end + 3, accumulator + idx++); /* clock */ + gen_perf_query_accumulate_uint32(start + 1, end + 1, accumulator + idx++); /* timestamp */ + gen_perf_query_accumulate_uint32(start + 3, end + 3, accumulator + idx++); /* clock */ /* 32x 40bit A counters... */ for (i = 0; i < 32; i++) - brw_perf_query_accumulate_uint40(i, start, end, accumulator + idx++); + gen_perf_query_accumulate_uint40(i, start, end, accumulator + idx++); /* 4x 32bit A counters... */ for (i = 0; i < 4; i++) - brw_perf_query_accumulate_uint32(start + 36 + i, end + 36 + i, + gen_perf_query_accumulate_uint32(start + 36 + i, end + 36 + i, accumulator + idx++); /* 8x 32bit B counters + 8x 32bit C counters... */ for (i = 0; i < 16; i++) - brw_perf_query_accumulate_uint32(start + 48 + i, end + 48 + i, + gen_perf_query_accumulate_uint32(start + 48 + i, end + 48 + i, accumulator + idx++); break; case I915_OA_FORMAT_A45_B8_C8: - brw_perf_query_accumulate_uint32(start + 1, end + 1, accumulator); /* timestamp */ + gen_perf_query_accumulate_uint32(start + 1, end + 1, accumulator); /* timestamp */ for (i = 0; i < 61; i++) - brw_perf_query_accumulate_uint32(start + 3 + i, end + 3 + i, accumulator + 1 + i); + gen_perf_query_accumulate_uint32(start + 3 + i, end + 3 + i, accumulator + 1 + i); break; default: @@ -982,15 +967,15 @@ open_i915_perf_oa_stream(struct brw_context *brw, static void close_perf(struct brw_context *brw, - const struct brw_perf_query_info *query) + const struct gen_perf_query_info *query) { if (brw->perfquery.oa_stream_fd != -1) { close(brw->perfquery.oa_stream_fd); brw->perfquery.oa_stream_fd = -1; } - if (query->kind == OA_COUNTERS_RAW) { - struct brw_perf_query_info *raw_query = - (struct brw_perf_query_info *) query; + if (query->kind == GEN_PERF_QUERY_TYPE_RAW) { + struct gen_perf_query_info *raw_query = + (struct gen_perf_query_info *) query; raw_query->oa_metrics_set_id = 0; } } @@ -1019,7 +1004,7 @@ brw_begin_perf_query(struct gl_context *ctx, { struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *obj = brw_perf_query(o); - const struct brw_perf_query_info *query = obj->query; + const struct gen_perf_query_info *query = obj->query; /* We can assume the frontend hides mistaken attempts to Begin a * query object multiple times before its End. Similarly if an @@ -1079,8 +1064,8 @@ brw_begin_perf_query(struct gl_context *ctx, brw_emit_mi_flush(brw); switch (query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: { + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: { /* Opening an i915 perf stream implies exclusive access to the OA unit * which will generate counter reports for a specific counter set with a @@ -1130,12 +1115,12 @@ brw_begin_perf_query(struct gl_context *ctx, a_counter_in_bits = 40; uint64_t overflow_period = pow(2, a_counter_in_bits) / - (brw->perfquery.sys_vars.n_eus * + (brw->perfquery.perf->sys_vars.n_eus * /* drop 1GHz freq to have units in nanoseconds */ 2); DBG("A counter overflow period: %"PRIu64"ns, %"PRIu64"ms (n_eus=%"PRIu64")\n", - overflow_period, overflow_period / 1000000ul, brw->perfquery.sys_vars.n_eus); + overflow_period, overflow_period / 1000000ul, brw->perfquery.perf->sys_vars.n_eus); int period_exponent = 0; uint64_t prev_sample_period, next_sample_period; @@ -1234,7 +1219,7 @@ brw_begin_perf_query(struct gl_context *ctx, break; } - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: if (obj->pipeline_stats.bo) { brw_bo_unreference(obj->pipeline_stats.bo); obj->pipeline_stats.bo = NULL; @@ -1282,8 +1267,8 @@ brw_end_perf_query(struct gl_context *ctx, brw_emit_mi_flush(brw); switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: /* NB: It's possible that the query will have already been marked * as 'accumulated' if an error was seen while reading samples @@ -1306,7 +1291,7 @@ brw_end_perf_query(struct gl_context *ctx, */ break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: snapshot_statistics_registers(brw, obj, STATS_BO_END_OFFSET_BYTES); --brw->perfquery.n_active_pipeline_stats_queries; @@ -1328,12 +1313,12 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o) assert(!o->Ready); switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: bo = obj->oa.bo; break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: bo = obj->pipeline_stats.bo; break; @@ -1358,8 +1343,8 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o) * we need to wait for all the reports to come in before we can * read them. */ - if (obj->query->kind == OA_COUNTERS || - obj->query->kind == OA_COUNTERS_RAW) { + if (obj->query->kind == GEN_PERF_QUERY_TYPE_OA || + obj->query->kind == GEN_PERF_QUERY_TYPE_RAW) { while (!read_oa_samples_for_query(brw, obj)) ; } @@ -1376,14 +1361,14 @@ brw_is_perf_query_ready(struct gl_context *ctx, return true; switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: return (obj->oa.results_accumulated || (obj->oa.bo && !brw_batch_references(&brw->batch, obj->oa.bo) && !brw_bo_busy(obj->oa.bo) && read_oa_samples_for_query(brw, obj))); - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: return (obj->pipeline_stats.bo && !brw_batch_references(&brw->batch, obj->pipeline_stats.bo) && !brw_bo_busy(obj->pipeline_stats.bo)); @@ -1489,25 +1474,26 @@ get_oa_counter_data(struct brw_context *brw, size_t data_size, uint8_t *data) { - const struct brw_perf_query_info *query = obj->query; + struct gen_perf *perf = brw->perfquery.perf; + const struct gen_perf_query_info *query = obj->query; int n_counters = query->n_counters; int written = 0; for (int i = 0; i < n_counters; i++) { - const struct brw_perf_query_counter *counter = &query->counters[i]; + const struct gen_perf_query_counter *counter = &query->counters[i]; uint64_t *out_uint64; float *out_float; if (counter->size) { switch (counter->data_type) { - case GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL: + case GEN_PERF_COUNTER_DATA_TYPE_UINT64: out_uint64 = (uint64_t *)(data + counter->offset); - *out_uint64 = counter->oa_counter_read_uint64(brw, query, + *out_uint64 = counter->oa_counter_read_uint64(perf, query, obj->oa.accumulator); break; - case GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL: + case GEN_PERF_COUNTER_DATA_TYPE_FLOAT: out_float = (float *)(data + counter->offset); - *out_float = counter->oa_counter_read_float(brw, query, + *out_float = counter->oa_counter_read_float(perf, query, obj->oa.accumulator); break; default: @@ -1528,7 +1514,7 @@ get_pipeline_stats_data(struct brw_context *brw, uint8_t *data) { - const struct brw_perf_query_info *query = obj->query; + const struct gen_perf_query_info *query = obj->query; int n_counters = obj->query->n_counters; uint8_t *p = data; @@ -1536,7 +1522,7 @@ get_pipeline_stats_data(struct brw_context *brw, uint64_t *end = start + (STATS_BO_END_OFFSET_BYTES / sizeof(uint64_t)); for (int i = 0; i < n_counters; i++) { - const struct brw_perf_query_counter *counter = &query->counters[i]; + const struct gen_perf_query_counter *counter = &query->counters[i]; uint64_t value = end[i] - start[i]; if (counter->pipeline_stat.numerator != @@ -1581,8 +1567,8 @@ brw_get_perf_query_data(struct gl_context *ctx, assert(o->Ready); switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: if (!obj->oa.results_accumulated) { read_gt_frequency(brw, obj); read_slice_unslice_frequencies(brw, obj); @@ -1592,13 +1578,13 @@ brw_get_perf_query_data(struct gl_context *ctx, brw_bo_unmap(obj->oa.bo); obj->oa.map = NULL; } - if (obj->query->kind == OA_COUNTERS) + if (obj->query->kind == GEN_PERF_QUERY_TYPE_OA) written = get_oa_counter_data(brw, obj, data_size, (uint8_t *)data); else written = brw_perf_query_get_mdapi_oa_data(brw, obj, data_size, (uint8_t *)data); break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: written = get_pipeline_stats_data(brw, obj, data_size, (uint8_t *)data); break; @@ -1615,8 +1601,8 @@ static struct gl_perf_query_object * brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index) { struct brw_context *brw = brw_context(ctx); - const struct brw_perf_query_info *query = - &brw->perfquery.queries[query_index]; + const struct gen_perf_query_info *query = + &brw->perfquery.perf->queries[query_index]; struct brw_perf_query_object *obj = calloc(1, sizeof(struct brw_perf_query_object)); @@ -1650,8 +1636,8 @@ brw_delete_perf_query(struct gl_context *ctx, DBG("Delete(%d)\n", o->Id); switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: if (obj->oa.bo) { if (!obj->oa.results_accumulated) { drop_from_unaccumulated_query_list(brw, obj); @@ -1665,7 +1651,7 @@ brw_delete_perf_query(struct gl_context *ctx, obj->oa.results_accumulated = false; break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: if (obj->pipeline_stats.bo) { brw_bo_unreference(obj->pipeline_stats.bo); obj->pipeline_stats.bo = NULL; @@ -1695,223 +1681,87 @@ static void init_pipeline_statistic_query_registers(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; - struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw); + struct gen_perf *perf = brw->perfquery.perf; + struct gen_perf_query_info *query = + gen_perf_query_append_query_info(perf, MAX_STAT_COUNTERS); - query->kind = PIPELINE_STATS; + query->kind = GEN_PERF_QUERY_TYPE_PIPELINE; query->name = "Pipeline Statistics Registers"; - query->n_counters = 0; - query->counters = - rzalloc_array(brw, struct brw_perf_query_counter, MAX_STAT_COUNTERS); - brw_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT, - "N vertices submitted"); - brw_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, - "N primitives submitted"); - brw_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT, - "N vertex shader invocations"); + gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT, + "N vertices submitted"); + gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, + "N primitives submitted"); + gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT, + "N vertex shader invocations"); if (devinfo->gen == 6) { - brw_perf_query_info_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1, "SO_PRIM_STORAGE_NEEDED", "N geometry shader stream-out primitives (total)"); - brw_perf_query_info_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1, "SO_NUM_PRIMS_WRITTEN", "N geometry shader stream-out primitives (written)"); } else { - brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1, "SO_PRIM_STORAGE_NEEDED (Stream 0)", "N stream-out (stream 0) primitives (total)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1, "SO_PRIM_STORAGE_NEEDED (Stream 1)", "N stream-out (stream 1) primitives (total)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1, "SO_PRIM_STORAGE_NEEDED (Stream 2)", "N stream-out (stream 2) primitives (total)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1, "SO_PRIM_STORAGE_NEEDED (Stream 3)", "N stream-out (stream 3) primitives (total)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1, "SO_NUM_PRIMS_WRITTEN (Stream 0)", "N stream-out (stream 0) primitives (written)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1, "SO_NUM_PRIMS_WRITTEN (Stream 1)", "N stream-out (stream 1) primitives (written)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1, "SO_NUM_PRIMS_WRITTEN (Stream 2)", "N stream-out (stream 2) primitives (written)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1, "SO_NUM_PRIMS_WRITTEN (Stream 3)", "N stream-out (stream 3) primitives (written)"); } - brw_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT, "N TCS shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT, "N TES shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT, "N geometry shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, "N geometry shader primitives emitted"); - brw_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT, "N primitives entering clipping"); - brw_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, "N primitives leaving clipping"); - if (devinfo->is_haswell || devinfo->gen == 8) - brw_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, + if (devinfo->is_haswell || devinfo->gen == 8) { + gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, "N fragment shader invocations", "N fragment shader invocations"); - else - brw_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT, + } else { + gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT, "N fragment shader invocations"); - - brw_perf_query_info_add_basic_stat_reg(query, PS_DEPTH_COUNT, "N z-pass fragments"); - - if (devinfo->gen >= 7) - brw_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, - "N compute shader invocations"); - - query->data_size = sizeof(uint64_t) * query->n_counters; -} - -static void -register_oa_config(struct brw_context *brw, - const struct brw_perf_query_info *query, - uint64_t config_id) -{ - struct brw_perf_query_info *registred_query = - brw_perf_query_append_query_info(brw); - - *registred_query = *query; - registred_query->oa_metrics_set_id = config_id; - DBG("metric set registred: id = %" PRIu64", guid = %s\n", - registred_query->oa_metrics_set_id, query->guid); -} - -static void -enumerate_sysfs_metrics(struct brw_context *brw) -{ - char buf[256]; - DIR *metricsdir = NULL; - struct dirent *metric_entry; - int len; - - len = snprintf(buf, sizeof(buf), "%s/metrics", brw->perfquery.sysfs_dev_dir); - if (len < 0 || len >= sizeof(buf)) { - DBG("Failed to concatenate path to sysfs metrics/ directory\n"); - return; - } - - metricsdir = opendir(buf); - if (!metricsdir) { - DBG("Failed to open %s: %m\n", buf); - return; - } - - while ((metric_entry = readdir(metricsdir))) { - struct hash_entry *entry; - - if ((metric_entry->d_type != DT_DIR && - metric_entry->d_type != DT_LNK) || - metric_entry->d_name[0] == '.') - continue; - - DBG("metric set: %s\n", metric_entry->d_name); - entry = _mesa_hash_table_search(brw->perfquery.oa_metrics_table, - metric_entry->d_name); - if (entry) { - uint64_t id; - - len = snprintf(buf, sizeof(buf), "%s/metrics/%s/id", - brw->perfquery.sysfs_dev_dir, metric_entry->d_name); - if (len < 0 || len >= sizeof(buf)) { - DBG("Failed to concatenate path to sysfs metric id file\n"); - continue; - } - - if (!read_file_uint64(buf, &id)) { - DBG("Failed to read metric set id from %s: %m", buf); - continue; - } - - register_oa_config(brw, (const struct brw_perf_query_info *)entry->data, id); - } else - DBG("metric set not known by mesa (skipping)\n"); } - closedir(metricsdir); -} - -static bool -kernel_has_dynamic_config_support(struct brw_context *brw) -{ - __DRIscreen *screen = brw->screen->driScrnPriv; - - hash_table_foreach(brw->perfquery.oa_metrics_table, entry) { - struct brw_perf_query_info *query = entry->data; - char config_path[280]; - uint64_t config_id; + gen_perf_query_info_add_basic_stat_reg(query, PS_DEPTH_COUNT, + "N z-pass fragments"); - snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", - brw->perfquery.sysfs_dev_dir, query->guid); - - /* Look for the test config, which we know we can't replace. */ - if (read_file_uint64(config_path, &config_id) && config_id == 1) { - return drmIoctl(screen->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, - &config_id) < 0 && errno == ENOENT; - } + if (devinfo->gen >= 7) { + gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, + "N compute shader invocations"); } - return false; -} - -static void -init_oa_configs(struct brw_context *brw) -{ - __DRIscreen *screen = brw->screen->driScrnPriv; - - hash_table_foreach(brw->perfquery.oa_metrics_table, entry) { - const struct brw_perf_query_info *query = entry->data; - struct drm_i915_perf_oa_config config; - char config_path[280]; - uint64_t config_id; - int ret; - - snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", - brw->perfquery.sysfs_dev_dir, query->guid); - - /* Don't recreate already loaded configs. */ - if (read_file_uint64(config_path, &config_id)) { - DBG("metric set: %s (already loaded)\n", query->guid); - register_oa_config(brw, query, config_id); - continue; - } - - memset(&config, 0, sizeof(config)); - - memcpy(config.uuid, query->guid, sizeof(config.uuid)); - - config.n_mux_regs = query->n_mux_regs; - config.mux_regs_ptr = (uintptr_t) query->mux_regs; - - config.n_boolean_regs = query->n_b_counter_regs; - config.boolean_regs_ptr = (uintptr_t) query->b_counter_regs; - - config.n_flex_regs = query->n_flex_regs; - config.flex_regs_ptr = (uintptr_t) query->flex_regs; - - ret = drmIoctl(screen->fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config); - if (ret < 0) { - DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n", - query->name, query->guid, strerror(errno)); - continue; - } - - register_oa_config(brw, query, ret); - DBG("metric set: %s (added)\n", query->guid); - } + query->data_size = sizeof(uint64_t) * query->n_counters; } static bool @@ -1974,55 +1824,20 @@ getparam_topology(struct brw_context *brw) return true; } -static void -compute_topology_builtins(struct brw_context *brw) +static unsigned +brw_init_perf_query_info(struct gl_context *ctx) { + struct brw_context *brw = brw_context(ctx); const struct gen_device_info *devinfo = &brw->screen->devinfo; + __DRIscreen *screen = brw->screen->driScrnPriv; - brw->perfquery.sys_vars.slice_mask = devinfo->slice_masks; - brw->perfquery.sys_vars.n_eu_slices = devinfo->num_slices; - - for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) { - brw->perfquery.sys_vars.n_eu_sub_slices += - util_bitcount(devinfo->subslice_masks[i]); - } - - for (int i = 0; i < sizeof(devinfo->eu_masks); i++) - brw->perfquery.sys_vars.n_eus += util_bitcount(devinfo->eu_masks[i]); - - brw->perfquery.sys_vars.eu_threads_count = - brw->perfquery.sys_vars.n_eus * devinfo->num_thread_per_eu; - - /* The subslice mask builtin contains bits for all slices. Prior to Gen11 - * it had groups of 3bits for each slice, on Gen11 it's 8bits for each - * slice. - * - * Ideally equations would be updated to have a slice/subslice query - * function/operator. - */ - brw->perfquery.sys_vars.subslice_mask = 0; - - int bits_per_subslice = devinfo->gen == 11 ? 8 : 3; - - for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) { - for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) { - if (gen_device_info_subslice_available(devinfo, s, ss)) - brw->perfquery.sys_vars.subslice_mask |= 1UL << (s * bits_per_subslice + ss); - } - } -} - -static bool -init_oa_sys_vars(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - uint64_t min_freq_mhz = 0, max_freq_mhz = 0; + if (brw->perfquery.perf) + return brw->perfquery.perf->n_queries; - if (!read_sysfs_drm_device_file_uint64(brw, "gt_min_freq_mhz", &min_freq_mhz)) - return false; + brw->perfquery.perf = gen_perf_new(brw, drmIoctl); - if (!read_sysfs_drm_device_file_uint64(brw, "gt_max_freq_mhz", &max_freq_mhz)) - return false; + init_pipeline_statistic_query_registers(brw); + brw_perf_query_register_mdapi_statistic_query(brw); if (!query_topology(brw)) { /* We need the i915 query uAPI on CNL+ (kernel 4.17+). */ @@ -2040,182 +1855,8 @@ init_oa_sys_vars(struct brw_context *brw) } } - memset(&brw->perfquery.sys_vars, 0, sizeof(brw->perfquery.sys_vars)); - brw->perfquery.sys_vars.gt_min_freq = min_freq_mhz * 1000000; - brw->perfquery.sys_vars.gt_max_freq = max_freq_mhz * 1000000; - brw->perfquery.sys_vars.timestamp_frequency = devinfo->timestamp_frequency; - brw->perfquery.sys_vars.revision = devinfo->revision; - compute_topology_builtins(brw); - - return true; -} - -static bool -get_sysfs_dev_dir(struct brw_context *brw) -{ - __DRIscreen *screen = brw->screen->driScrnPriv; - struct stat sb; - int min, maj; - DIR *drmdir; - struct dirent *drm_entry; - int len; - - brw->perfquery.sysfs_dev_dir[0] = '\0'; - - if (fstat(screen->fd, &sb)) { - DBG("Failed to stat DRM fd\n"); - return false; - } - - maj = major(sb.st_rdev); - min = minor(sb.st_rdev); - - if (!S_ISCHR(sb.st_mode)) { - DBG("DRM fd is not a character device as expected\n"); - return false; - } - - len = snprintf(brw->perfquery.sysfs_dev_dir, - sizeof(brw->perfquery.sysfs_dev_dir), - "/sys/dev/char/%d:%d/device/drm", maj, min); - if (len < 0 || len >= sizeof(brw->perfquery.sysfs_dev_dir)) { - DBG("Failed to concatenate sysfs path to drm device\n"); - return false; - } - - drmdir = opendir(brw->perfquery.sysfs_dev_dir); - if (!drmdir) { - DBG("Failed to open %s: %m\n", brw->perfquery.sysfs_dev_dir); - return false; - } - - while ((drm_entry = readdir(drmdir))) { - if ((drm_entry->d_type == DT_DIR || - drm_entry->d_type == DT_LNK) && - strncmp(drm_entry->d_name, "card", 4) == 0) - { - len = snprintf(brw->perfquery.sysfs_dev_dir, - sizeof(brw->perfquery.sysfs_dev_dir), - "/sys/dev/char/%d:%d/device/drm/%s", - maj, min, drm_entry->d_name); - closedir(drmdir); - if (len < 0 || len >= sizeof(brw->perfquery.sysfs_dev_dir)) - return false; - else - return true; - } - } - - closedir(drmdir); - - DBG("Failed to find cardX directory under /sys/dev/char/%d:%d/device/drm\n", - maj, min); - - return false; -} - -typedef void (*perf_register_oa_queries_t)(struct brw_context *); - -static perf_register_oa_queries_t -get_register_queries_function(const struct gen_device_info *devinfo) -{ - if (devinfo->is_haswell) - return brw_oa_register_queries_hsw; - if (devinfo->is_cherryview) - return brw_oa_register_queries_chv; - if (devinfo->is_broadwell) - return brw_oa_register_queries_bdw; - if (devinfo->is_broxton) - return brw_oa_register_queries_bxt; - if (devinfo->is_skylake) { - if (devinfo->gt == 2) - return brw_oa_register_queries_sklgt2; - if (devinfo->gt == 3) - return brw_oa_register_queries_sklgt3; - if (devinfo->gt == 4) - return brw_oa_register_queries_sklgt4; - } - if (devinfo->is_kabylake) { - if (devinfo->gt == 2) - return brw_oa_register_queries_kblgt2; - if (devinfo->gt == 3) - return brw_oa_register_queries_kblgt3; - } - if (devinfo->is_geminilake) - return brw_oa_register_queries_glk; - if (devinfo->is_coffeelake) { - if (devinfo->gt == 2) - return brw_oa_register_queries_cflgt2; - if (devinfo->gt == 3) - return brw_oa_register_queries_cflgt3; - } - if (devinfo->is_cannonlake) - return brw_oa_register_queries_cnl; - if (devinfo->gen == 11) - return brw_oa_register_queries_icl; - - return NULL; -} - -static unsigned -brw_init_perf_query_info(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - const struct gen_device_info *devinfo = &brw->screen->devinfo; - bool i915_perf_oa_available = false; - struct stat sb; - perf_register_oa_queries_t oa_register; - - if (brw->perfquery.n_queries) - return brw->perfquery.n_queries; - - init_pipeline_statistic_query_registers(brw); - brw_perf_query_register_mdapi_statistic_query(brw); - - oa_register = get_register_queries_function(devinfo); - - /* The existence of this sysctl parameter implies the kernel supports - * the i915 perf interface. - */ - if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) { - - /* If _paranoid == 1 then on Gen8+ we won't be able to access OA - * metrics unless running as root. - */ - if (devinfo->is_haswell) - i915_perf_oa_available = true; - else { - uint64_t paranoid = 1; - - read_file_uint64("/proc/sys/dev/i915/perf_stream_paranoid", ¶noid); - - if (paranoid == 0 || geteuid() == 0) - i915_perf_oa_available = true; - } - } - - if (i915_perf_oa_available && - oa_register && - get_sysfs_dev_dir(brw) && - init_oa_sys_vars(brw)) - { - brw->perfquery.oa_metrics_table = - _mesa_hash_table_create(NULL, _mesa_key_hash_string, - _mesa_key_string_equal); - - /* Index all the metric sets mesa knows about before looking to see what - * the kernel is advertising. - */ - oa_register(brw); - - if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) && - kernel_has_dynamic_config_support(brw)) - init_oa_configs(brw); - else - enumerate_sysfs_metrics(brw); - + if (gen_perf_load_oa_metrics(brw->perfquery.perf, screen->fd, devinfo)) brw_perf_query_register_mdapi_oa_query(brw); - } brw->perfquery.unaccumulated = ralloc_array(brw, struct brw_perf_query_object *, 2); @@ -2237,7 +1878,7 @@ brw_init_perf_query_info(struct gl_context *ctx) brw->perfquery.next_query_start_report_id = 1000; - return brw->perfquery.n_queries; + return brw->perfquery.perf->n_queries; } void diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.h b/src/mesa/drivers/dri/i965/brw_performance_query.h index 66b32c0490b..ca0503422ca 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.h +++ b/src/mesa/drivers/dri/i965/brw_performance_query.h @@ -27,7 +27,8 @@ #include <stdint.h> #include "brw_context.h" -#include "brw_performance_query_metrics.h" + +struct gen_perf_query_info; /* * When currently allocate only one page for pipeline statistics queries. Here @@ -57,7 +58,7 @@ struct brw_perf_query_object { struct gl_perf_query_object base; - const struct brw_perf_query_info *query; + const struct gen_perf_query_info *query; /* See query->kind to know which state below is in use... */ union { @@ -142,81 +143,6 @@ struct brw_perf_query_object }; }; -static inline struct brw_perf_query_info * -brw_perf_query_append_query_info(struct brw_context *brw) -{ - brw->perfquery.queries = - reralloc(brw, brw->perfquery.queries, - struct brw_perf_query_info, ++brw->perfquery.n_queries); - - return &brw->perfquery.queries[brw->perfquery.n_queries - 1]; -} - -static inline void -brw_perf_query_info_add_stat_reg(struct brw_perf_query_info *query, - uint32_t reg, - uint32_t numerator, - uint32_t denominator, - const char *name, - const char *description) -{ - struct brw_perf_query_counter *counter; - - assert(query->n_counters < MAX_STAT_COUNTERS); - - counter = &query->counters[query->n_counters]; - counter->name = name; - counter->desc = description; - counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL; - counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL; - counter->size = sizeof(uint64_t); - counter->offset = sizeof(uint64_t) * query->n_counters; - counter->pipeline_stat.reg = reg; - counter->pipeline_stat.numerator = numerator; - counter->pipeline_stat.denominator = denominator; - - query->n_counters++; -} - -static inline void -brw_perf_query_info_add_basic_stat_reg(struct brw_perf_query_info *query, - uint32_t reg, const char *name) -{ - brw_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name); -} - -/* Accumulate 32bits OA counters */ -static inline void -brw_perf_query_accumulate_uint32(const uint32_t *report0, - const uint32_t *report1, - uint64_t *accumulator) -{ - *accumulator += (uint32_t)(*report1 - *report0); -} - -/* Accumulate 40bits OA counters */ -static inline void -brw_perf_query_accumulate_uint40(int a_index, - const uint32_t *report0, - const uint32_t *report1, - uint64_t *accumulator) -{ - const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); - const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); - uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32; - uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32; - uint64_t value0 = report0[a_index + 4] | high0; - uint64_t value1 = report1[a_index + 4] | high1; - uint64_t delta; - - if (value0 > value1) - delta = (1ULL << 40) + value1 - value0; - else - delta = value1 - value0; - - *accumulator += delta; -} - int brw_perf_query_get_mdapi_oa_data(struct brw_context *brw, struct brw_perf_query_object *obj, size_t data_size, diff --git a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c index 70f69debe98..0676e868b81 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c @@ -24,6 +24,8 @@ #include "brw_defines.h" #include "brw_performance_query.h" +#include "perf/gen_perf.h" + /** * Data format expected by MDAPI. */ @@ -203,16 +205,19 @@ brw_perf_query_get_mdapi_oa_data(struct brw_context *brw, } static void -fill_mdapi_perf_query_counter(struct brw_perf_query_info *query, +fill_mdapi_perf_query_counter(struct gen_perf_query_info *query, const char *name, uint32_t data_offset, uint32_t data_size, - GLenum data_type) + enum gen_perf_counter_data_type data_type) { - struct brw_perf_query_counter *counter = &query->counters[query->n_counters]; + struct gen_perf_query_counter *counter = &query->counters[query->n_counters]; + + assert(query->n_counters <= query->max_counters); counter->name = name; counter->desc = "Raw counter value"; + counter->type = GEN_PERF_COUNTER_TYPE_RAW; counter->data_type = data_type; counter->offset = data_offset; counter->size = data_size; @@ -226,19 +231,21 @@ fill_mdapi_perf_query_counter(struct brw_perf_query_info *query, (uint8_t *) &struct_name.field_name - \ (uint8_t *) &struct_name, \ sizeof(struct_name.field_name), \ - GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL) + GEN_PERF_COUNTER_DATA_TYPE_##type_name) #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \ fill_mdapi_perf_query_counter(query, \ ralloc_asprintf(ctx, "%s%i", #field_name, idx), \ (uint8_t *) &struct_name.field_name[idx] - \ (uint8_t *) &struct_name, \ sizeof(struct_name.field_name[0]), \ - GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL) + GEN_PERF_COUNTER_DATA_TYPE_##type_name) void brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; + struct gen_perf *perf = brw->perfquery.perf; + struct gen_perf_query_info *query = NULL; /* MDAPI requires different structures for pretty much every generation * (right now we have definitions for gen 7 to 11). @@ -246,36 +253,22 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) if (!(devinfo->gen >= 7 && devinfo->gen <= 11)) return; - struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw); - - query->kind = OA_COUNTERS_RAW; - query->name = "Intel_Raw_Hardware_Counters_Set_0_Query"; - /* Guid has to matches with MDAPI's. */ - query->guid = "2f01b241-7014-42a7-9eb6-a925cad3daba"; - query->n_counters = 0; - query->oa_metrics_set_id = 0; /* Set by MDAPI */ - - int n_counters; switch (devinfo->gen) { case 7: { + query = gen_perf_query_append_query_info(perf, 1 + 45 + 16 + 7); query->oa_format = I915_OA_FORMAT_A45_B8_C8; struct mdapi_gen7_metrics metric_data; query->data_size = sizeof(metric_data); - n_counters = 1 + 45 + 16 + 7; - query->counters = - rzalloc_array_size(brw->perfquery.queries, - sizeof(*query->counters), n_counters); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, ACounters, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, ACounters, i, UINT64); } for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, NOACounters, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NOACounters, i, UINT64); } MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); @@ -287,25 +280,21 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) break; } case 8: { + query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16); query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; struct mdapi_gen8_metrics metric_data; query->data_size = sizeof(metric_data); - n_counters = 2 + 36 + 16 + 16; - query->counters = - rzalloc_array_size(brw->perfquery.queries, - sizeof(*query->counters), n_counters); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, OaCntr, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, OaCntr, i, UINT64); } for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, NoaCntr, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NoaCntr, i, UINT64); } MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); @@ -328,25 +317,21 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) case 9: case 10: case 11: { + query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2); query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; struct mdapi_gen9_metrics metric_data; query->data_size = sizeof(metric_data); - n_counters = 2 + 36 + 16 + 16 + 16 + 2; - query->counters = - rzalloc_array_size(brw->perfquery.queries, - sizeof(*query->counters), n_counters); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, OaCntr, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, OaCntr, i, UINT64); } for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, NoaCntr, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NoaCntr, i, UINT64); } MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); @@ -365,8 +350,8 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, UserCntr, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, UserCntr, i, UINT64); } MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32); MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32); @@ -377,12 +362,15 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) break; } - assert(query->n_counters <= n_counters); + query->kind = GEN_PERF_QUERY_TYPE_RAW; + query->name = "Intel_Raw_Hardware_Counters_Set_0_Query"; + /* Guid has to matches with MDAPI's. */ + query->guid = "2f01b241-7014-42a7-9eb6-a925cad3daba"; { /* Accumulation buffer offsets copied from an actual query... */ - const struct brw_perf_query_info *copy_query = - &brw->perfquery.queries[0]; + const struct gen_perf_query_info *copy_query = + &brw->perfquery.perf->queries[0]; query->gpu_time_offset = copy_query->gpu_time_offset; query->gpu_clock_offset = copy_query->gpu_clock_offset; @@ -400,43 +388,41 @@ brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw) if (!(devinfo->gen >= 7 && devinfo->gen <= 9)) return; - struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw); + struct gen_perf_query_info *query = + gen_perf_query_append_query_info(brw->perfquery.perf, MAX_STAT_COUNTERS); - query->kind = PIPELINE_STATS; + query->kind = GEN_PERF_QUERY_TYPE_PIPELINE; query->name = "Intel_Raw_Pipeline_Statistics_Query"; - query->n_counters = 0; - query->counters = - rzalloc_array(brw, struct brw_perf_query_counter, MAX_STAT_COUNTERS); /* The order has to match mdapi_pipeline_metrics. */ - brw_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT, "N vertices submitted"); - brw_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, "N primitives submitted"); - brw_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT, "N vertex shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT, "N geometry shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, "N geometry shader primitives emitted"); - brw_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT, "N primitives entering clipping"); - brw_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, "N primitives leaving clipping"); if (devinfo->is_haswell || devinfo->gen == 8) { - brw_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, + gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, "N fragment shader invocations", "N fragment shader invocations"); } else { - brw_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT, "N fragment shader invocations"); } - brw_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT, "N TCS shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT, "N TES shader invocations"); if (devinfo->gen >= 7) { - brw_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, "N compute shader invocations"); } diff --git a/src/mesa/drivers/dri/i965/brw_performance_query_metrics.h b/src/mesa/drivers/dri/i965/brw_performance_query_metrics.h deleted file mode 100644 index 80d7ddc07cf..00000000000 --- a/src/mesa/drivers/dri/i965/brw_performance_query_metrics.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright © 2018 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef BRW_PERFORMANCE_QUERY_METRICS_H -#define BRW_PERFORMANCE_QUERY_METRICS_H - -#include <stdint.h> - -struct brw_pipeline_stat -{ - uint32_t reg; - uint32_t numerator; - uint32_t denominator; -}; - -struct brw_perf_query_counter -{ - const char *name; - const char *desc; - GLenum type; - GLenum data_type; - uint64_t raw_max; - size_t offset; - size_t size; - - union { - uint64_t (*oa_counter_read_uint64)(struct brw_context *brw, - const struct brw_perf_query_info *query, - uint64_t *accumulator); - float (*oa_counter_read_float)(struct brw_context *brw, - const struct brw_perf_query_info *query, - uint64_t *accumulator); - struct brw_pipeline_stat pipeline_stat; - }; -}; - -#endif /* BRW_PERFORMANCE_QUERY_METRICS_H */ diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build index ca93af0ab10..3c3637dbebc 100644 --- a/src/mesa/drivers/dri/i965/meson.build +++ b/src/mesa/drivers/dri/i965/meson.build @@ -57,7 +57,6 @@ files_i965 = files( 'brw_performance_query.h', 'brw_performance_query.c', 'brw_performance_query_mdapi.c', - 'brw_performance_query_metrics.h', 'brw_program.c', 'brw_program.h', 'brw_program_binary.c', @@ -149,37 +148,9 @@ foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110'] endforeach -i965_hw_metrics = [ - 'hsw', - 'bdw', 'chv', - 'sklgt2', 'sklgt3', 'sklgt4', - 'kblgt2', 'kblgt3', - 'cflgt2', 'cflgt3', - 'bxt', 'glk', - 'cnl', - 'icl', -] - -i965_hw_metrics_xml_files = [] -foreach hw : i965_hw_metrics - i965_hw_metrics_xml_files += 'brw_oa_@[email protected]'.format(hw) -endforeach - -i965_oa_sources = custom_target( - 'i965-oa-sources', - input : i965_hw_metrics_xml_files, - output : [ 'brw_oa_metrics.c', 'brw_oa_metrics.h' ], - command : [ - prog_python, files('brw_oa.py'), - '--code', '@OUTPUT0@', '--header', '@OUTPUT1@', - '@INPUT@', - ], -) - libi965 = static_library( 'i965', - [files_i965, i965_oa_sources, ir_expression_operation_h, - xmlpool_options_h], + [files_i965, ir_expression_operation_h, xmlpool_options_h], include_directories : [ inc_common, inc_intel, inc_dri_common, inc_util, inc_include, ], @@ -187,7 +158,7 @@ libi965 = static_library( cpp_args : [cpp_vis_args, c_sse2_args], link_with : [ i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler, - libblorp + libblorp, libintel_perf ], dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml], ) |