diff options
author | Gvozden Neskovic <neskovic@gmail.com> | 2016-07-01 18:33:04 +0200 |
---|---|---|
committer | Brian Behlendorf <behlendorf1@llnl.gov> | 2016-08-16 14:10:33 -0700 |
commit | 32ffaa3de58981814342fe6d3556c03d41d121f8 (patch) | |
tree | a4ffe5e103b059ba5a2ffe294c9e76a5b1aa3f8d /include | |
parent | 6d836e6f8b358270d55a57ad8e8868c957f15bf3 (diff) |
Add support for AVX-512 family of instruction sets
This patch adds compiler and runtime tests (user and kernel) for following
instruction sets: avx512f, avx512cd, avx512er, avx512pf, avx512bw, avx512dq,
avx512vl, avx512ifma, avx512vbmi.
note: Linux support for AVX-512F (Foundation) instruction set started with
linux v3.15
Signed-off-by: Gvozden Neskovic <neskovic@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #4952
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/simd_x86.h | 245 |
1 files changed, 233 insertions, 12 deletions
diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h index 6aa51144c..216dbed62 100644 --- a/include/linux/simd_x86.h +++ b/include/linux/simd_x86.h @@ -43,10 +43,25 @@ * zfs_ssse3_available() * zfs_sse4_1_available() * zfs_sse4_2_available() + * * zfs_avx_available() * zfs_avx2_available() + * * zfs_bmi1_available() * zfs_bmi2_available() + * + * zfs_avx512f_available() + * zfs_avx512cd_available() + * zfs_avx512er_available() + * zfs_avx512pf_available() + * zfs_avx512bw_available() + * zfs_avx512dq_available() + * zfs_avx512vl_available() + * zfs_avx512ifma_available() + * zfs_avx512vbmi_available() + * + * NOTE(AVX-512VL): If using AVX-512 instructions with 128Bit registers + * also add zfs_avx512vl_available() to feature check. */ #ifndef _SIMD_X86_H @@ -124,7 +139,16 @@ typedef enum cpuid_inst_sets { AVX, AVX2, BMI1, - BMI2 + BMI2, + AVX512F, + AVX512CD, + AVX512DQ, + AVX512BW, + AVX512IFMA, + AVX512VBMI, + AVX512PF, + AVX512ER, + AVX512VL } cpuid_inst_sets_t; /* @@ -132,11 +156,21 @@ typedef enum cpuid_inst_sets { */ typedef struct cpuid_feature_desc { uint32_t leaf; /* CPUID leaf */ - uint32_t subleaf; /* CPUID subleaf */ + uint32_t subleaf; /* CPUID sub-leaf */ uint32_t flag; /* bit mask of the feature */ cpuid_regs_t reg; /* which CPUID return register to test */ } cpuid_feature_desc_t; +#define _AVX512F_BIT (1U << 16) +#define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28)) +#define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17)) +#define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30)) +#define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21)) +#define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */ +#define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26)) +#define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27)) +#define _AVX512VL_BIT (1U << 31) /* if used also check other levels */ + /* * Descriptions of supported instruction sets */ @@ -151,7 +185,16 @@ static const cpuid_feature_desc_t cpuid_features[] = { [AVX] = {1U, 0U, 1U << 28, ECX }, [AVX2] = {7U, 0U, 1U << 5, EBX }, [BMI1] = {7U, 0U, 1U << 3, EBX }, - [BMI2] = {7U, 0U, 1U << 8, EBX } + [BMI2] = {7U, 0U, 1U << 8, EBX }, + [AVX512F] = {7U, 0U, _AVX512F_BIT, EBX }, + [AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX }, + [AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX }, + [AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX }, + [AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX }, + [AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX }, + [AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX }, + [AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX }, + [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX } }; /* @@ -187,15 +230,15 @@ __cpuid_check_feature(const cpuid_feature_desc_t *desc) */ __cpuid_count(desc->leaf, desc->subleaf, r[EAX], r[EBX], r[ECX], r[EDX]); - return (!!(r[desc->reg] & desc->flag)); + return ((r[desc->reg] & desc->flag) == desc->flag); } return (B_FALSE); } -#define CPUID_FEATURE_CHECK(name, id) \ -static inline boolean_t \ -__cpuid_has_ ## name(void)\ -{ \ +#define CPUID_FEATURE_CHECK(name, id) \ +static inline boolean_t \ +__cpuid_has_ ## name(void) \ +{ \ return (__cpuid_check_feature(&cpuid_features[id])); \ } @@ -213,16 +256,25 @@ CPUID_FEATURE_CHECK(avx2, AVX2); CPUID_FEATURE_CHECK(osxsave, OSXSAVE); CPUID_FEATURE_CHECK(bmi1, BMI1); CPUID_FEATURE_CHECK(bmi2, BMI2); +CPUID_FEATURE_CHECK(avx512f, AVX512F); +CPUID_FEATURE_CHECK(avx512cd, AVX512CD); +CPUID_FEATURE_CHECK(avx512dq, AVX512DQ); +CPUID_FEATURE_CHECK(avx512bw, AVX512BW); +CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA); +CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI); +CPUID_FEATURE_CHECK(avx512pf, AVX512PF); +CPUID_FEATURE_CHECK(avx512er, AVX512ER); +CPUID_FEATURE_CHECK(avx512vl, AVX512VL); #endif /* !defined(_KERNEL) */ + /* - * Detect ymm register set support + * Detect register set support */ static inline boolean_t -__ymm_enabled(void) +__simd_state_enabled(const uint64_t state) { - static const uint64_t XSTATE_SSE_AVX = 0x2 | 0x4; boolean_t has_osxsave; uint64_t xcr0; @@ -238,9 +290,16 @@ __ymm_enabled(void) return (B_FALSE); xcr0 = xgetbv(0); - return ((xcr0 & XSTATE_SSE_AVX) == XSTATE_SSE_AVX); + return ((xcr0 & state) == state); } +#define _XSTATE_SSE_AVX (0x2 | 0x4) +#define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX) + +#define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX) +#define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512) + + /* * Check if SSE instruction set is available */ @@ -383,6 +442,168 @@ zfs_bmi2_available(void) #endif } + +/* + * AVX-512 family of instruction sets: + * + * AVX512F Foundation + * AVX512CD Conflict Detection Instructions + * AVX512ER Exponential and Reciprocal Instructions + * AVX512PF Prefetch Instructions + * + * AVX512BW Byte and Word Instructions + * AVX512DQ Double-word and Quadword Instructions + * AVX512VL Vector Length Extensions + * + * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4) + * AVX512VBMI Vector Byte Manipulation Instructions + */ + + +/* Check if AVX512F instruction set is available */ +static inline boolean_t +zfs_avx512f_available(void) +{ + boolean_t has_avx512 = B_FALSE; + +#if defined(_KERNEL) && defined(X86_FEATURE_AVX512F) + has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F); +#elif !defined(_KERNEL) + has_avx512 = __cpuid_has_avx512f(); +#endif + + return (has_avx512 && __zmm_enabled()); +} + +/* Check if AVX512CD instruction set is available */ +static inline boolean_t +zfs_avx512cd_available(void) +{ + boolean_t has_avx512 = B_FALSE; + +#if defined(_KERNEL) && defined(X86_FEATURE_AVX512CD) + has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512CD); +#elif !defined(_KERNEL) + has_avx512 = __cpuid_has_avx512cd(); +#endif + + return (has_avx512 && __zmm_enabled()); +} + +/* Check if AVX512ER instruction set is available */ +static inline boolean_t +zfs_avx512er_available(void) +{ + boolean_t has_avx512 = B_FALSE; + +#if defined(_KERNEL) && defined(X86_FEATURE_AVX512ER) + has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512ER); +#elif !defined(_KERNEL) + has_avx512 = __cpuid_has_avx512er(); +#endif + + return (has_avx512 && __zmm_enabled()); +} + +/* Check if AVX512PF instruction set is available */ +static inline boolean_t +zfs_avx512pf_available(void) +{ + boolean_t has_avx512 = B_FALSE; + +#if defined(_KERNEL) && defined(X86_FEATURE_AVX512PF) + has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512PF); +#elif !defined(_KERNEL) + has_avx512 = __cpuid_has_avx512pf(); +#endif + + return (has_avx512 && __zmm_enabled()); +} + +/* Check if AVX512BW instruction set is available */ +static inline boolean_t +zfs_avx512bw_available(void) +{ + boolean_t has_avx512 = B_FALSE; + +#if defined(_KERNEL) && defined(X86_FEATURE_AVX512BW) + has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512BW); +#elif !defined(_KERNEL) + has_avx512 = __cpuid_has_avx512bw(); +#endif + + return (has_avx512 && __zmm_enabled()); +} + +/* Check if AVX512DQ instruction set is available */ +static inline boolean_t +zfs_avx512dq_available(void) +{ + boolean_t has_avx512 = B_FALSE; + +#if defined(_KERNEL) && defined(X86_FEATURE_AVX512DQ) + has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512DQ); +#elif !defined(_KERNEL) + has_avx512 = __cpuid_has_avx512dq(); +#endif + + return (has_avx512 && __zmm_enabled()); +} + +/* Check if AVX512VL instruction set is available */ +static inline boolean_t +zfs_avx512vl_available(void) +{ + boolean_t has_avx512 = B_FALSE; + +#if defined(_KERNEL) && defined(X86_FEATURE_AVX512VL) + has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512VL); +#elif !defined(_KERNEL) + has_avx512 = __cpuid_has_avx512vl(); +#endif + + return (has_avx512 && __zmm_enabled()); +} + +/* Check if AVX512IFMA instruction set is available */ +static inline boolean_t +zfs_avx512ifma_available(void) +{ + boolean_t has_avx512 = B_FALSE; + +#if defined(_KERNEL) && defined(X86_FEATURE_AVX512IFMA) + has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512IFMA); +#elif !defined(_KERNEL) + has_avx512 = __cpuid_has_avx512ifma(); +#endif + + return (has_avx512 && __zmm_enabled()); +} + +/* Check if AVX512VBMI instruction set is available */ +static inline boolean_t +zfs_avx512vbmi_available(void) +{ + boolean_t has_avx512 = B_FALSE; + +#if defined(_KERNEL) && defined(X86_FEATURE_AVX512VBMI) + has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512VBMI); +#elif !defined(_KERNEL) + has_avx512 = __cpuid_has_avx512f() && + __cpuid_has_avx512vbmi(); +#endif + + return (has_avx512 && __zmm_enabled()); +} + #endif /* defined(__x86) */ #endif /* _SIMD_X86_H */ |