diff options
-rw-r--r-- | src/intel/dev/gen_device_info.h | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query.c | 181 |
2 files changed, 118 insertions, 71 deletions
diff --git a/src/intel/dev/gen_device_info.h b/src/intel/dev/gen_device_info.h index 4d08f0dfedd..40b72383420 100644 --- a/src/intel/dev/gen_device_info.h +++ b/src/intel/dev/gen_device_info.h @@ -247,6 +247,14 @@ struct gen_device_info #define gen_device_info_is_9lp(devinfo) \ ((devinfo)->is_broxton || (devinfo)->is_geminilake) +static inline bool +gen_device_info_subslice_available(const struct gen_device_info *devinfo, + int slice, int subslice) +{ + return (devinfo->subslice_masks[slice * devinfo->subslice_slice_stride + + subslice / 8] & (1U << (subslice % 8))) != 0; +} + int gen_get_pci_device_id_override(void); int gen_device_name_to_pci_device_id(const char *name); bool gen_get_device_info(int devid, struct gen_device_info *devinfo); diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 12f797c1297..cca74001f19 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -1911,6 +1911,100 @@ init_oa_configs(struct brw_context *brw) } static bool +query_topology(struct brw_context *brw) +{ + __DRIscreen *screen = brw->screen->driScrnPriv; + struct drm_i915_query_item item = { + .query_id = DRM_I915_QUERY_TOPOLOGY_INFO, + }; + struct drm_i915_query query = { + .num_items = 1, + .items_ptr = (uintptr_t) &item, + }; + + if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, &query)) + return false; + + struct drm_i915_query_topology_info *topo_info = + (struct drm_i915_query_topology_info *) calloc(1, item.length); + item.data_ptr = (uintptr_t) topo_info; + + if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, &query) || + item.length <= 0) + return false; + + gen_device_info_update_from_topology(&brw->screen->devinfo, + topo_info); + + free(topo_info); + + return true; +} + +static bool +getparam_topology(struct brw_context *brw) +{ + __DRIscreen *screen = brw->screen->driScrnPriv; + drm_i915_getparam_t gp; + int ret; + + int slice_mask = 0; + gp.param = I915_PARAM_SLICE_MASK; + gp.value = &slice_mask; + ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) + return false; + + int subslice_mask = 0; + gp.param = I915_PARAM_SUBSLICE_MASK; + gp.value = &subslice_mask; + ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) + return false; + + gen_device_info_update_from_masks(&brw->screen->devinfo, + slice_mask, + subslice_mask, + brw->screen->eu_total); + + return true; +} + +static void +compute_topology_builtins(struct brw_context *brw) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + + brw->perfquery.sys_vars.slice_mask = devinfo->slice_masks; + brw->perfquery.sys_vars.n_eu_slices = devinfo->num_slices; + + for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) { + brw->perfquery.sys_vars.n_eu_sub_slices += + _mesa_bitcount(devinfo->subslice_masks[i]); + } + + for (int i = 0; i < sizeof(devinfo->eu_masks); i++) + brw->perfquery.sys_vars.n_eus += _mesa_bitcount(devinfo->eu_masks[i]); + + brw->perfquery.sys_vars.eu_threads_count = + brw->perfquery.sys_vars.n_eus * devinfo->num_thread_per_eu; + + /* At the moment the subslice mask builtin has groups of 3bits for each + * slice. + * + * Ideally equations would be updated to have a slice/subslice query + * function/operator. + */ + brw->perfquery.sys_vars.subslice_mask = 0; + for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) { + for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) { + if (gen_device_info_subslice_available(devinfo, s, ss)) + brw->perfquery.sys_vars.subslice_mask |= 1UL << (s * 3 + ss); + } + } +} + +static bool init_oa_sys_vars(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; @@ -1923,83 +2017,28 @@ init_oa_sys_vars(struct brw_context *brw) if (!read_sysfs_drm_device_file_uint64(brw, "gt_max_freq_mhz", &max_freq_mhz)) return false; - brw->perfquery.sys_vars.gt_min_freq = min_freq_mhz * 1000000; - brw->perfquery.sys_vars.gt_max_freq = max_freq_mhz * 1000000; - brw->perfquery.sys_vars.timestamp_frequency = devinfo->timestamp_frequency; - - brw->perfquery.sys_vars.revision = intel_device_get_revision(screen->fd); - brw->perfquery.sys_vars.n_eu_slices = devinfo->num_slices; - /* Assuming uniform distribution of subslices per slices. */ - brw->perfquery.sys_vars.n_eu_sub_slices = devinfo->num_subslices[0]; - - if (devinfo->is_haswell) { - brw->perfquery.sys_vars.slice_mask = 0; - brw->perfquery.sys_vars.subslice_mask = 0; - - for (int s = 0; s < devinfo->num_slices; s++) - brw->perfquery.sys_vars.slice_mask |= 1U << s; - for (int ss = 0; ss < devinfo->num_subslices[0]; ss++) - brw->perfquery.sys_vars.subslice_mask |= 1U << ss; - - if (devinfo->gt == 1) { - brw->perfquery.sys_vars.n_eus = 10; - } else if (devinfo->gt == 2) { - brw->perfquery.sys_vars.n_eus = 20; - } else if (devinfo->gt == 3) { - brw->perfquery.sys_vars.n_eus = 40; - } else - unreachable("not reached"); - } else { - drm_i915_getparam_t gp; - int ret; - int slice_mask = 0; - int ss_mask = 0; - /* maximum number of slices */ - int s_max = devinfo->num_slices; - /* maximum number of subslices per slice (assuming uniform subslices per - * slices) - */ - int ss_max = devinfo->num_subslices[0]; - uint64_t subslice_mask = 0; - int s; - - gp.param = I915_PARAM_SLICE_MASK; - gp.value = &slice_mask; - ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, &gp); - if (ret) + if (!query_topology(brw)) { + /* We need the i915 query uAPI on CNL+ (kernel 4.17+). */ + if (devinfo->gen >= 10) return false; - gp.param = I915_PARAM_SUBSLICE_MASK; - gp.value = &ss_mask; - ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, &gp); - if (ret) - return false; + if (!getparam_topology(brw)) { + /* We need the SLICE_MASK/SUBSLICE_MASK on gen8+ (kernel 4.13+). */ + if (devinfo->gen >= 8) + return false; - brw->perfquery.sys_vars.n_eus = brw->screen->eu_total; - brw->perfquery.sys_vars.n_eu_slices = __builtin_popcount(slice_mask); - brw->perfquery.sys_vars.slice_mask = slice_mask; - - /* Note: the _SUBSLICE_MASK param only reports a global subslice mask - * which applies to all slices. - * - * Note: some of the metrics we have (as described in XML) are - * conditional on a $SubsliceMask variable which is expected to also - * reflect the slice mask by packing together subslice masks for each - * slice in one value.. - */ - for (s = 0; s < s_max; s++) { - if (slice_mask & (1<<s)) { - subslice_mask |= ss_mask << (ss_max * s); - } + /* On Haswell, the values are already computed for us in + * gen_device_info. + */ } - - brw->perfquery.sys_vars.subslice_mask = subslice_mask; - brw->perfquery.sys_vars.n_eu_sub_slices = - __builtin_popcount(subslice_mask); } - brw->perfquery.sys_vars.eu_threads_count = - brw->perfquery.sys_vars.n_eus * devinfo->num_thread_per_eu; + memset(&brw->perfquery.sys_vars, 0, sizeof(brw->perfquery.sys_vars)); + brw->perfquery.sys_vars.gt_min_freq = min_freq_mhz * 1000000; + brw->perfquery.sys_vars.gt_max_freq = max_freq_mhz * 1000000; + brw->perfquery.sys_vars.timestamp_frequency = devinfo->timestamp_frequency; + brw->perfquery.sys_vars.revision = intel_device_get_revision(screen->fd); + compute_topology_builtins(brw); return true; } |