diff options
Diffstat (limited to 'include/linux/simd_x86.h')
-rw-r--r-- | include/linux/simd_x86.h | 388 |
1 files changed, 388 insertions, 0 deletions
diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h new file mode 100644 index 000000000..6aa51144c --- /dev/null +++ b/include/linux/simd_x86.h @@ -0,0 +1,388 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (C) 2016 Gvozden Neskovic <[email protected]>. + */ + +/* + * USER API: + * + * Kernel fpu methods: + * kfpu_begin() + * kfpu_end() + * + * SIMD support: + * + * Following functions should be called to determine whether CPU feature + * is supported. All functions are usable in kernel and user space. + * If a SIMD algorithm is using more than one instruction set + * all relevant feature test functions should be called. + * + * Supported features: + * zfs_sse_available() + * zfs_sse2_available() + * zfs_sse3_available() + * zfs_ssse3_available() + * zfs_sse4_1_available() + * zfs_sse4_2_available() + * zfs_avx_available() + * zfs_avx2_available() + * zfs_bmi1_available() + * zfs_bmi2_available() + */ + +#ifndef _SIMD_X86_H +#define _SIMD_X86_H + +#include <sys/isa_defs.h> + +/* only for __x86 */ +#if defined(__x86) + +#include <sys/types.h> + +#if defined(_KERNEL) +#include <asm/cpufeature.h> +#else +#include <cpuid.h> +#endif + +#if defined(_KERNEL) +#if defined(HAVE_FPU_API_H) +#include <asm/fpu/api.h> +#include <asm/fpu/internal.h> +#define kfpu_begin() \ +{ \ + preempt_disable(); \ + __kernel_fpu_begin(); \ +} +#define kfpu_end() \ +{ \ + __kernel_fpu_end(); \ + preempt_enable(); \ +} +#else +#include <asm/i387.h> +#include <asm/xcr.h> +#define kfpu_begin() kernel_fpu_begin() +#define kfpu_end() kernel_fpu_end() +#endif /* defined(HAVE_FPU_API_H) */ +#else +/* + * fpu dummy methods for userspace + */ +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) +#endif /* defined(_KERNEL) */ + +/* + * CPUID feature tests for user-space. Linux kernel provides an interface for + * CPU feature testing. + */ +#if !defined(_KERNEL) + +/* + * x86 registers used implicitly by CPUID + */ +typedef enum cpuid_regs { + EAX = 0, + EBX, + ECX, + EDX, + CPUID_REG_CNT = 4 +} cpuid_regs_t; + +/* + * List of instruction sets identified by CPUID + */ +typedef enum cpuid_inst_sets { + SSE = 0, + SSE2, + SSE3, + SSSE3, + SSE4_1, + SSE4_2, + OSXSAVE, + AVX, + AVX2, + BMI1, + BMI2 +} cpuid_inst_sets_t; + +/* + * Instruction set descriptor. + */ +typedef struct cpuid_feature_desc { + uint32_t leaf; /* CPUID leaf */ + uint32_t subleaf; /* CPUID subleaf */ + uint32_t flag; /* bit mask of the feature */ + cpuid_regs_t reg; /* which CPUID return register to test */ +} cpuid_feature_desc_t; + +/* + * Descriptions of supported instruction sets + */ +static const cpuid_feature_desc_t cpuid_features[] = { + [SSE] = {1U, 0U, 1U << 25, EDX }, + [SSE2] = {1U, 0U, 1U << 26, EDX }, + [SSE3] = {1U, 0U, 1U << 0, ECX }, + [SSSE3] = {1U, 0U, 1U << 9, ECX }, + [SSE4_1] = {1U, 0U, 1U << 19, ECX }, + [SSE4_2] = {1U, 0U, 1U << 20, ECX }, + [OSXSAVE] = {1U, 0U, 1U << 27, ECX }, + [AVX] = {1U, 0U, 1U << 28, ECX }, + [AVX2] = {7U, 0U, 1U << 5, EBX }, + [BMI1] = {7U, 0U, 1U << 3, EBX }, + [BMI2] = {7U, 0U, 1U << 8, EBX } +}; + +/* + * Check if OS supports AVX and AVX2 by checking XCR0 + * Only call this function if CPUID indicates that AVX feature is + * supported by the CPU, otherwise it might be an illegal instruction. + */ +static inline uint64_t +xgetbv(uint32_t index) +{ + uint32_t eax, edx; + /* xgetbv - instruction byte code */ + __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0" + : "=a" (eax), "=d" (edx) + : "c" (index)); + + return ((((uint64_t)edx)<<32) | (uint64_t)eax); +} + +/* + * Check if CPU supports a feature + */ +static inline boolean_t +__cpuid_check_feature(const cpuid_feature_desc_t *desc) +{ + uint32_t r[CPUID_REG_CNT]; + + if (__get_cpuid_max(0, NULL) >= desc->leaf) { + /* + * __cpuid_count is needed to properly check + * for AVX2. It is a macro, so return parameters + * are passed by value. + */ + __cpuid_count(desc->leaf, desc->subleaf, + r[EAX], r[EBX], r[ECX], r[EDX]); + return (!!(r[desc->reg] & desc->flag)); + } + return (B_FALSE); +} + +#define CPUID_FEATURE_CHECK(name, id) \ +static inline boolean_t \ +__cpuid_has_ ## name(void)\ +{ \ + return (__cpuid_check_feature(&cpuid_features[id])); \ +} + +/* + * Define functions for user-space CPUID features testing + */ +CPUID_FEATURE_CHECK(sse, SSE); +CPUID_FEATURE_CHECK(sse2, SSE2); +CPUID_FEATURE_CHECK(sse3, SSE3); +CPUID_FEATURE_CHECK(ssse3, SSSE3); +CPUID_FEATURE_CHECK(sse4_1, SSE4_1); +CPUID_FEATURE_CHECK(sse4_2, SSE4_2); +CPUID_FEATURE_CHECK(avx, AVX); +CPUID_FEATURE_CHECK(avx2, AVX2); +CPUID_FEATURE_CHECK(osxsave, OSXSAVE); +CPUID_FEATURE_CHECK(bmi1, BMI1); +CPUID_FEATURE_CHECK(bmi2, BMI2); + +#endif /* !defined(_KERNEL) */ + +/* + * Detect ymm register set support + */ +static inline boolean_t +__ymm_enabled(void) +{ + static const uint64_t XSTATE_SSE_AVX = 0x2 | 0x4; + boolean_t has_osxsave; + uint64_t xcr0; + +#if defined(_KERNEL) && defined(X86_FEATURE_OSXSAVE) + has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE); +#elif defined(_KERNEL) && !defined(X86_FEATURE_OSXSAVE) + has_osxsave = B_FALSE; +#else + has_osxsave = __cpuid_has_osxsave(); +#endif + + if (!has_osxsave) + return (B_FALSE); + + xcr0 = xgetbv(0); + return ((xcr0 & XSTATE_SSE_AVX) == XSTATE_SSE_AVX); +} + +/* + * Check if SSE instruction set is available + */ +static inline boolean_t +zfs_sse_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_XMM)); +#else + return (__cpuid_has_sse()); +#endif +} + +/* + * Check if SSE2 instruction set is available + */ +static inline boolean_t +zfs_sse2_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_XMM2)); +#else + return (__cpuid_has_sse2()); +#endif +} + +/* + * Check if SSE3 instruction set is available + */ +static inline boolean_t +zfs_sse3_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_XMM3)); +#else + return (__cpuid_has_sse3()); +#endif +} + +/* + * Check if SSSE3 instruction set is available + */ +static inline boolean_t +zfs_ssse3_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_SSSE3)); +#else + return (__cpuid_has_ssse3()); +#endif +} + +/* + * Check if SSE4.1 instruction set is available + */ +static inline boolean_t +zfs_sse4_1_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_XMM4_1)); +#else + return (__cpuid_has_sse4_1()); +#endif +} + +/* + * Check if SSE4.2 instruction set is available + */ +static inline boolean_t +zfs_sse4_2_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_XMM4_2)); +#else + return (__cpuid_has_sse4_2()); +#endif +} + +/* + * Check if AVX instruction set is available + */ +static inline boolean_t +zfs_avx_available(void) +{ + boolean_t has_avx; +#if defined(_KERNEL) + has_avx = !!boot_cpu_has(X86_FEATURE_AVX); +#else + has_avx = __cpuid_has_avx(); +#endif + + return (has_avx && __ymm_enabled()); +} + +/* + * Check if AVX2 instruction set is available + */ +static inline boolean_t +zfs_avx2_available(void) +{ + boolean_t has_avx2; +#if defined(_KERNEL) && defined(X86_FEATURE_AVX2) + has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2); +#elif defined(_KERNEL) && !defined(X86_FEATURE_AVX2) + has_avx2 = B_FALSE; +#else + has_avx2 = __cpuid_has_avx2(); +#endif + + return (has_avx2 && __ymm_enabled()); +} + +/* + * Check if BMI1 instruction set is available + */ +static inline boolean_t +zfs_bmi1_available(void) +{ +#if defined(_KERNEL) && defined(X86_FEATURE_BMI1) + return (!!boot_cpu_has(X86_FEATURE_BMI1)); +#elif defined(_KERNEL) && !defined(X86_FEATURE_BMI1) + return (B_FALSE); +#else + return (__cpuid_has_bmi1()); +#endif +} + +/* + * Check if BMI2 instruction set is available + */ +static inline boolean_t +zfs_bmi2_available(void) +{ +#if defined(_KERNEL) && defined(X86_FEATURE_BMI2) + return (!!boot_cpu_has(X86_FEATURE_BMI2)); +#elif defined(_KERNEL) && !defined(X86_FEATURE_BMI2) + return (B_FALSE); +#else + return (__cpuid_has_bmi2()); +#endif +} + +#endif /* defined(__x86) */ + +#endif /* _SIMD_X86_H */ |