summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorRomain Dolbeau <[email protected]>2016-11-22 08:38:34 +0100
committerBrian Behlendorf <[email protected]>2016-11-29 14:34:33 -0800
commit88cc2352eaf6bdd87be8349097b4a3784aeafc51 (patch)
treee3f244ad83d5a91a1d565caad51241cb9f1ca8a3 /module
parent65d71d4212a813937f2eba36981b236cdba292f7 (diff)
ABD raidz NEON support
Port NEON implementation of RAID-Z functions to ABD. Signed-off-by: Roomain Dolbeau <[email protected]>
Diffstat (limited to 'module')
-rw-r--r--module/zfs/vdev_raidz_math.c4
-rw-r--r--module/zfs/vdev_raidz_math_aarch64_neon.c134
-rw-r--r--module/zfs/vdev_raidz_math_aarch64_neon_common.h29
-rw-r--r--module/zfs/vdev_raidz_math_aarch64_neonx2.c156
4 files changed, 229 insertions, 94 deletions
diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c
index 25d25bd27..c050c9099 100644
--- a/module/zfs/vdev_raidz_math.c
+++ b/module/zfs/vdev_raidz_math.c
@@ -64,8 +64,8 @@ const raidz_impl_ops_t *raidz_all_maths[] = {
// &vdev_raidz_avx512bw_impl,
#endif
#if defined(__aarch64__)
- // &vdev_raidz_aarch64_neon_impl,
- // &vdev_raidz_aarch64_neonx2_impl,
+ &vdev_raidz_aarch64_neon_impl,
+ &vdev_raidz_aarch64_neonx2_impl,
#endif
};
diff --git a/module/zfs/vdev_raidz_math_aarch64_neon.c b/module/zfs/vdev_raidz_math_aarch64_neon.c
index 7ba30ba5e..c7b8afd38 100644
--- a/module/zfs/vdev_raidz_math_aarch64_neon.c
+++ b/module/zfs/vdev_raidz_math_aarch64_neon.c
@@ -25,10 +25,36 @@
#include <sys/isa_defs.h>
#include <sys/types.h>
-#if 0 // defined(__aarch64__)
+#if defined(__aarch64__)
#include "vdev_raidz_math_aarch64_neon_common.h"
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define ZERO_D 0, 1, 2, 3
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define COPY_D 0, 1, 2, 3
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define ADD_D 0, 1, 2, 3
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define MUL_D 0, 1, 2, 3
+
#define GEN_P_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_33_36()
@@ -39,15 +65,12 @@
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
- GEN_X_DEFINE_8_9() \
- GEN_X_DEFINE_10_11() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define GEN_PQ_STRIDE 4
#define GEN_PQ_D 0, 1, 2, 3
-#define GEN_PQ_P 4, 5, 6, 7
-#define GEN_PQ_Q 8, 9, 10, 11
+#define GEN_PQ_C 4, 5, 6, 7
#define GEN_PQR_DEFINE() \
GEN_X_DEFINE_0_3() \
@@ -55,69 +78,115 @@
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
- GEN_X_DEFINE_31() \
- GEN_X_DEFINE_32() \
GEN_X_DEFINE_33_36()
-#define GEN_PQR_STRIDE 2
-#define GEN_PQR_D 0, 1
-#define GEN_PQR_P 2, 3
-#define GEN_PQR_Q 4, 5
-#define GEN_PQR_R 6, 7
+#define GEN_PQR_STRIDE 4
+#define GEN_PQR_D 0, 1, 2, 3
+#define GEN_PQR_C 4, 5, 6, 7
-#define REC_P_DEFINE() \
- GEN_X_DEFINE_0_3() \
+#define SYN_Q_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
-#define REC_P_STRIDE 4
-#define REC_P_X 0, 1, 2, 3
+#define SYN_Q_STRIDE 4
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
-#define REC_Q_DEFINE() \
+#define SYN_R_DEFINE() \
GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
-#define REC_Q_STRIDE 4
-#define REC_Q_X 0, 1, 2, 3
+#define SYN_R_STRIDE 4
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
-#define REC_R_DEFINE() \
+#define SYN_PQ_DEFINE() \
GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
-#define REC_R_STRIDE 4
-#define REC_R_X 0, 1, 2, 3
+#define SYN_PQ_STRIDE 4
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
#define REC_PQ_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
- GEN_X_DEFINE_16() \
- GEN_X_DEFINE_17() \
GEN_X_DEFINE_31() \
GEN_X_DEFINE_32() \
GEN_X_DEFINE_33_36()
#define REC_PQ_STRIDE 2
#define REC_PQ_X 0, 1
#define REC_PQ_Y 2, 3
-#define REC_PQ_D 4, 5
+#define REC_PQ_T 4, 5
+
+#define SYN_PR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_PR_STRIDE 4
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
-#define REC_PR_DEFINE() REC_PQ_DEFINE()
+#define REC_PR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_31() \
+ GEN_X_DEFINE_32() \
+ GEN_X_DEFINE_33_36()
#define REC_PR_STRIDE 2
#define REC_PR_X 0, 1
#define REC_PR_Y 2, 3
-#define REC_PR_D 4, 5
+#define REC_PR_T 4, 5
+
+#define SYN_QR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_QR_STRIDE 4
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
-#define REC_QR_DEFINE() REC_PQ_DEFINE()
+#define REC_QR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_31() \
+ GEN_X_DEFINE_32() \
+ GEN_X_DEFINE_33_36()
#define REC_QR_STRIDE 2
#define REC_QR_X 0, 1
#define REC_QR_Y 2, 3
-#define REC_QR_D 4, 5
+#define REC_QR_T 4, 5
-#define REC_PQR_DEFINE() \
+#define SYN_PQR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
- GEN_X_DEFINE_8_9() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_PQR_STRIDE 4
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
+
+#define REC_PQR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_8_9() \
GEN_X_DEFINE_31() \
GEN_X_DEFINE_32() \
GEN_X_DEFINE_33_36()
@@ -125,7 +194,6 @@
#define REC_PQR_X 0, 1
#define REC_PQR_Y 2, 3
#define REC_PQR_Z 4, 5
-#define REC_PQR_D 6, 7
#define REC_PQR_XS 6, 7
#define REC_PQR_YS 8, 9
@@ -154,7 +222,7 @@ const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = {
#endif /* defined(__aarch64__) */
-#if 0 // defined(__aarch64__)
+#if defined(__aarch64__)
const uint8_t
__attribute__((aligned(256))) gf_clmul_mod_lt[4*256][16] = {
diff --git a/module/zfs/vdev_raidz_math_aarch64_neon_common.h b/module/zfs/vdev_raidz_math_aarch64_neon_common.h
index 08dbddaea..cb9ff86c1 100644
--- a/module/zfs/vdev_raidz_math_aarch64_neon_common.h
+++ b/module/zfs/vdev_raidz_math_aarch64_neon_common.h
@@ -125,7 +125,7 @@
#define ASM_BUG() ASSERT(0)
-#define OFFSET(ptr, val) (((unsigned char *)ptr)+val)
+#define OFFSET(ptr, val) (((unsigned char *)(ptr))+val)
extern const uint8_t gf_clmul_mod_lt[4*256][16];
@@ -135,20 +135,6 @@ typedef struct v {
uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
} v_t;
-#define PREFETCHNTA(ptr, offset) \
-{ \
- __asm( \
- "prfm pstl1strm, %[MEM]\n" \
- : : [MEM] "Q" (*(ptr + offset))); \
-}
-
-#define PREFETCH(ptr, offset) \
-{ \
- __asm( \
- "prfm pldl1keep, %[MEM]\n" \
- : : [MEM] "Q" (*(ptr + offset))); \
-}
-
#define XOR_ACC(src, r...) \
{ \
switch (REG_CNT(r)) { \
@@ -242,6 +228,19 @@ typedef struct v {
#define ZERO(r...) \
{ \
switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \
+ "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n" \
+ "eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n" \
+ "eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n" \
+ "eor " VR4(r) ".16b," VR4(r) ".16b," VR4(r) ".16b\n" \
+ "eor " VR5(r) ".16b," VR5(r) ".16b," VR5(r) ".16b\n" \
+ "eor " VR6(r) ".16b," VR6(r) ".16b," VR6(r) ".16b\n" \
+ "eor " VR7(r) ".16b," VR7(r) ".16b," VR7(r) ".16b\n" \
+ : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \
+ WVR4(r), WVR5(r), WVR6(r), WVR7(r)); \
+ break; \
case 4: \
__asm( \
"eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \
diff --git a/module/zfs/vdev_raidz_math_aarch64_neonx2.c b/module/zfs/vdev_raidz_math_aarch64_neonx2.c
index e05deeb98..f8688a06a 100644
--- a/module/zfs/vdev_raidz_math_aarch64_neonx2.c
+++ b/module/zfs/vdev_raidz_math_aarch64_neonx2.c
@@ -24,115 +24,183 @@
#include <sys/isa_defs.h>
-#if 0 // defined(__aarch64__)
+#if defined(__aarch64__)
#include "vdev_raidz_math_aarch64_neon_common.h"
-#define GEN_P_DEFINE() \
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 8
+#define ZERO_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7()
-#define GEN_P_STRIDE 8
-#define GEN_P_P 0, 1, 2, 3, 4, 5, 6, 7
+#define ZERO_D 0, 1, 2, 3, 4, 5, 6, 7
-#define GEN_PQ_DEFINE() \
+#define COPY_STRIDE 8
+#define COPY_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7()
+#define COPY_D 0, 1, 2, 3, 4, 5, 6, 7
+
+#define ADD_STRIDE 8
+#define ADD_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7()
+#define ADD_D 0, 1, 2, 3, 4, 5, 6, 7
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define MUL_D 0, 1, 2, 3
+
+#define GEN_P_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define GEN_P_STRIDE 4
+#define GEN_P_P 0, 1, 2, 3
+
+#define GEN_PQ_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
- GEN_X_DEFINE_8_9() \
- GEN_X_DEFINE_10_11() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define GEN_PQ_STRIDE 4
#define GEN_PQ_D 0, 1, 2, 3
-#define GEN_PQ_P 4, 5, 6, 7
-#define GEN_PQ_Q 8, 9, 10, 11
+#define GEN_PQ_C 4, 5, 6, 7
#define GEN_PQR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
- GEN_X_DEFINE_8_9() \
- GEN_X_DEFINE_22_23() \
- GEN_X_DEFINE_24_27() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define GEN_PQR_STRIDE 4
#define GEN_PQR_D 0, 1, 2, 3
-#define GEN_PQR_P 4, 5, 6, 7
-#define GEN_PQR_Q 8, 9, 22, 23
-#define GEN_PQR_R 24, 25, 26, 27
+#define GEN_PQR_C 4, 5, 6, 7
-#define REC_P_DEFINE() \
+#define SYN_Q_DEFINE() \
GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
-#define REC_P_STRIDE 4
-#define REC_P_X 0, 1, 2, 3
+#define SYN_Q_STRIDE 4
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
-#define REC_Q_DEFINE() \
+#define SYN_R_DEFINE() \
GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
-#define REC_Q_STRIDE 4
-#define REC_Q_X 0, 1, 2, 3
+#define SYN_R_STRIDE 4
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
-#define REC_R_DEFINE() \
+#define SYN_PQ_DEFINE() \
GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
-#define REC_R_STRIDE 4
-#define REC_R_X 0, 1, 2, 3
+#define SYN_PQ_STRIDE 4
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
#define REC_PQ_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_8_9() \
- GEN_X_DEFINE_16() \
- GEN_X_DEFINE_17() \
GEN_X_DEFINE_22_23() \
GEN_X_DEFINE_33_36()
#define REC_PQ_STRIDE 4
#define REC_PQ_X 0, 1, 2, 3
#define REC_PQ_Y 4, 5, 6, 7
-#define REC_PQ_D 8, 9, 22, 23
+#define REC_PQ_T 8, 9, 22, 23
-#define REC_PR_DEFINE() REC_PQ_DEFINE()
+#define SYN_PR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_PR_STRIDE 4
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
+
+#define REC_PR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_8_9() \
+ GEN_X_DEFINE_22_23() \
+ GEN_X_DEFINE_33_36()
#define REC_PR_STRIDE 4
#define REC_PR_X 0, 1, 2, 3
#define REC_PR_Y 4, 5, 6, 7
-#define REC_PR_D 8, 9, 22, 23
+#define REC_PR_T 8, 9, 22, 23
-#define REC_QR_DEFINE() REC_PQ_DEFINE()
+#define SYN_QR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_QR_STRIDE 4
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
+
+#define REC_QR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_8_9() \
+ GEN_X_DEFINE_22_23() \
+ GEN_X_DEFINE_33_36()
#define REC_QR_STRIDE 4
#define REC_QR_X 0, 1, 2, 3
#define REC_QR_Y 4, 5, 6, 7
-#define REC_QR_D 8, 9, 22, 23
+#define REC_QR_T 8, 9, 22, 23
-#define REC_PQR_DEFINE() \
+#define SYN_PQR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
- GEN_X_DEFINE_8_9() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
- GEN_X_DEFINE_22_23() \
- GEN_X_DEFINE_24_27() \
- GEN_X_DEFINE_28_30() \
- GEN_X_DEFINE_31() \
GEN_X_DEFINE_33_36()
-#define REC_PQR_STRIDE 4
-#define REC_PQR_X 0, 1, 2, 3
-#define REC_PQR_Y 4, 5, 6, 7
-#define REC_PQR_Z 8, 9, 22, 23
-#define REC_PQR_D 24, 25, 26, 27
-#define REC_PQR_XS 24, 25, 26, 27
-#define REC_PQR_YS 28, 29, 30, 31
+#define SYN_PQR_STRIDE 4
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
+#define REC_PQR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_8_9() \
+ GEN_X_DEFINE_31() \
+ GEN_X_DEFINE_32() \
+ GEN_X_DEFINE_33_36()
+#define REC_PQR_STRIDE 2
+#define REC_PQR_X 0, 1
+#define REC_PQR_Y 2, 3
+#define REC_PQR_Z 4, 5
+#define REC_PQR_XS 6, 7
+#define REC_PQR_YS 8, 9
#include <sys/vdev_raidz_impl.h>
#include "vdev_raidz_math_impl.h"