diff options
author | Romain Dolbeau <[email protected]> | 2019-12-18 04:34:52 +0100 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2019-12-17 19:34:52 -0800 |
commit | 118fc3ef07c53a88ea1d4c21142a2b01c4648434 (patch) | |
tree | 79465738f3e7f8ef8f593c5c9f097396e452950a /module | |
parent | fe564845c0c5ec6f0c65fd647b2018c7fc8d672d (diff) |
Minor performance fix for NEON RAID-Z
The NEON code replicates too closely the SSE code, including
a masked 16-bits shift. But NEON, like AltiVec (#9539), has
unsigned 8-bits shift, so use that instead and drop the masking.
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Romain Dolbeau <[email protected]>
Closes #9725
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/vdev_raidz_math_aarch64_neon_common.h | 6 |
1 files changed, 2 insertions, 4 deletions
diff --git a/module/zfs/vdev_raidz_math_aarch64_neon_common.h b/module/zfs/vdev_raidz_math_aarch64_neon_common.h index 5312b9094..92a50b3a0 100644 --- a/module/zfs/vdev_raidz_math_aarch64_neon_common.h +++ b/module/zfs/vdev_raidz_math_aarch64_neon_common.h @@ -479,10 +479,8 @@ typedef struct v { /* upper part */ \ "and v14.16b," VR0(r) ".16b,v15.16b\n" \ "and v13.16b," VR1(r) ".16b,v15.16b\n" \ - "sshr " VR0(r) ".8h," VR0(r) ".8h,#4\n" \ - "sshr " VR1(r) ".8h," VR1(r) ".8h,#4\n" \ - "and " VR0(r) ".16b," VR0(r) ".16b,v15.16b\n" \ - "and " VR1(r) ".16b," VR1(r) ".16b,v15.16b\n" \ + "ushr " VR0(r) ".16b," VR0(r) ".16b,#4\n" \ + "ushr " VR1(r) ".16b," VR1(r) ".16b,#4\n" \ \ "tbl v12.16b,{v10.16b}," VR0(r) ".16b\n" \ "tbl v10.16b,{v10.16b}," VR1(r) ".16b\n" \ |