aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorRomain Dolbeau <[email protected]>2019-12-18 04:34:52 +0100
committerBrian Behlendorf <[email protected]>2019-12-17 19:34:52 -0800
commit118fc3ef07c53a88ea1d4c21142a2b01c4648434 (patch)
tree79465738f3e7f8ef8f593c5c9f097396e452950a /module
parentfe564845c0c5ec6f0c65fd647b2018c7fc8d672d (diff)
Minor performance fix for NEON RAID-Z
The NEON code replicates too closely the SSE code, including a masked 16-bits shift. But NEON, like AltiVec (#9539), has unsigned 8-bits shift, so use that instead and drop the masking. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Romain Dolbeau <[email protected]> Closes #9725
Diffstat (limited to 'module')
-rw-r--r--module/zfs/vdev_raidz_math_aarch64_neon_common.h6
1 files changed, 2 insertions, 4 deletions
diff --git a/module/zfs/vdev_raidz_math_aarch64_neon_common.h b/module/zfs/vdev_raidz_math_aarch64_neon_common.h
index 5312b9094..92a50b3a0 100644
--- a/module/zfs/vdev_raidz_math_aarch64_neon_common.h
+++ b/module/zfs/vdev_raidz_math_aarch64_neon_common.h
@@ -479,10 +479,8 @@ typedef struct v {
/* upper part */ \
"and v14.16b," VR0(r) ".16b,v15.16b\n" \
"and v13.16b," VR1(r) ".16b,v15.16b\n" \
- "sshr " VR0(r) ".8h," VR0(r) ".8h,#4\n" \
- "sshr " VR1(r) ".8h," VR1(r) ".8h,#4\n" \
- "and " VR0(r) ".16b," VR0(r) ".16b,v15.16b\n" \
- "and " VR1(r) ".16b," VR1(r) ".16b,v15.16b\n" \
+ "ushr " VR0(r) ".16b," VR0(r) ".16b,#4\n" \
+ "ushr " VR1(r) ".16b," VR1(r) ".16b,#4\n" \
\
"tbl v12.16b,{v10.16b}," VR0(r) ".16b\n" \
"tbl v10.16b,{v10.16b}," VR1(r) ".16b\n" \