diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libspl/include/os/linux/sys/Makefile.am | 1 | ||||
-rw-r--r-- | lib/libspl/include/os/linux/sys/file.h | 49 | ||||
-rw-r--r-- | lib/libzpool/kernel.c | 610 |
3 files changed, 376 insertions, 284 deletions
diff --git a/lib/libspl/include/os/linux/sys/Makefile.am b/lib/libspl/include/os/linux/sys/Makefile.am index 6b170fa8c..f8b6d9fae 100644 --- a/lib/libspl/include/os/linux/sys/Makefile.am +++ b/lib/libspl/include/os/linux/sys/Makefile.am @@ -2,7 +2,6 @@ libspldir = $(includedir)/libspl/sys libspl_HEADERS = \ $(top_srcdir)/lib/libspl/include/os/linux/sys/byteorder.h \ $(top_srcdir)/lib/libspl/include/os/linux/sys/errno.h \ - $(top_srcdir)/lib/libspl/include/os/linux/sys/file.h \ $(top_srcdir)/lib/libspl/include/os/linux/sys/mnttab.h \ $(top_srcdir)/lib/libspl/include/os/linux/sys/mount.h \ $(top_srcdir)/lib/libspl/include/os/linux/sys/param.h \ diff --git a/lib/libspl/include/os/linux/sys/file.h b/lib/libspl/include/os/linux/sys/file.h deleted file mode 100644 index e0752ac25..000000000 --- a/lib/libspl/include/os/linux/sys/file.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBSPL_SYS_FILE_H -#define _LIBSPL_SYS_FILE_H - -#include_next <sys/file.h> - -#include <sys/user.h> - -#define FREAD 1 -#define FWRITE 2 -// #define FAPPEND 8 - -#define FCREAT O_CREAT -#define FTRUNC O_TRUNC -#define FOFFMAX O_LARGEFILE -#define FSYNC O_SYNC -#define FDSYNC O_DSYNC -#define FEXCL O_EXCL - -#define FNODSYNC 0x10000 /* fsync pseudo flag */ -#define FNOFOLLOW 0x20000 /* don't follow symlinks */ -#define FIGNORECASE 0x80000 /* request case-insensitive lookups */ - -#endif diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index c14468cb2..ef52ed3af 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -49,7 +49,6 @@ int aok; uint64_t physmem; -vnode_t *rootdir = (vnode_t *)0xabcd1234; char hw_serial[HW_HOSTID_LEN]; struct utsname hw_utsname; vmem_t *zio_arena = NULL; @@ -488,183 +487,6 @@ procfs_list_add(procfs_list_t *procfs_list, void *p) * vnode operations * ========================================================================= */ -/* - * Note: for the xxxat() versions of these functions, we assume that the - * starting vp is always rootdir (which is true for spa_directory.c, the only - * ZFS consumer of these interfaces). We assert this is true, and then emulate - * them by adding '/' in front of the path. - */ - -/*ARGSUSED*/ -int -vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) -{ - int fd = -1; - int dump_fd = -1; - vnode_t *vp; - int old_umask = 0; - struct stat64 st; - int err; - - if (!(flags & FCREAT) && stat64(path, &st) == -1) { - err = errno; - return (err); - } - - if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) - flags |= O_DIRECT; - - if (flags & FCREAT) - old_umask = umask(0); - - /* - * The construct 'flags - FREAD' conveniently maps combinations of - * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. - */ - fd = open64(path, flags - FREAD, mode); - if (fd == -1) { - err = errno; - return (err); - } - - if (flags & FCREAT) - (void) umask(old_umask); - - if (vn_dumpdir != NULL) { - char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL); - (void) snprintf(dumppath, MAXPATHLEN, - "%s/%s", vn_dumpdir, basename(path)); - dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); - umem_free(dumppath, MAXPATHLEN); - if (dump_fd == -1) { - err = errno; - close(fd); - return (err); - } - } else { - dump_fd = -1; - } - - if (fstat64_blk(fd, &st) == -1) { - err = errno; - close(fd); - if (dump_fd != -1) - close(dump_fd); - return (err); - } - - (void) fcntl(fd, F_SETFD, FD_CLOEXEC); - - *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); - - vp->v_fd = fd; - vp->v_size = st.st_size; - vp->v_path = spa_strdup(path); - vp->v_dump_fd = dump_fd; - - return (0); -} - -/*ARGSUSED*/ -int -vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, - int x3, vnode_t *startvp, int fd) -{ - char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); - int ret; - - ASSERT(startvp == rootdir); - (void) sprintf(realpath, "/%s", path); - - /* fd ignored for now, need if want to simulate nbmand support */ - ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); - - umem_free(realpath, strlen(path) + 2); - - return (ret); -} - -/*ARGSUSED*/ -int -vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, - int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) -{ - ssize_t rc, done = 0, split; - - if (uio == UIO_READ) { - rc = pread64(vp->v_fd, addr, len, offset); - if (vp->v_dump_fd != -1 && rc != -1) { - int status; - status = pwrite64(vp->v_dump_fd, addr, rc, offset); - ASSERT(status != -1); - } - } else { - /* - * To simulate partial disk writes, we split writes into two - * system calls so that the process can be killed in between. - */ - int sectors = len >> SPA_MINBLOCKSHIFT; - split = (sectors > 0 ? rand() % sectors : 0) << - SPA_MINBLOCKSHIFT; - rc = pwrite64(vp->v_fd, addr, split, offset); - if (rc != -1) { - done = rc; - rc = pwrite64(vp->v_fd, (char *)addr + split, - len - split, offset + split); - } - } - -#ifdef __linux__ - if (rc == -1 && errno == EINVAL) { - /* - * Under Linux, this most likely means an alignment issue - * (memory or disk) due to O_DIRECT, so we abort() in order to - * catch the offender. - */ - abort(); - } -#endif - if (rc == -1) - return (errno); - - done += rc; - - if (residp) - *residp = len - done; - else if (done != len) - return (EIO); - return (0); -} - -void -vn_close(vnode_t *vp) -{ - close(vp->v_fd); - if (vp->v_dump_fd != -1) - close(vp->v_dump_fd); - spa_strfree(vp->v_path); - umem_free(vp, sizeof (vnode_t)); -} - -/* - * At a minimum we need to update the size since vdev_reopen() - * will no longer call vn_openat(). - */ -int -fop_getattr(vnode_t *vp, vattr_t *vap) -{ - struct stat64 st; - int err; - - if (fstat64_blk(vp->v_fd, &st) == -1) { - err = errno; - close(vp->v_fd); - return (err); - } - - vap->va_size = st.st_size; - return (0); -} /* * ========================================================================= @@ -860,60 +682,6 @@ cmn_err(int ce, const char *fmt, ...) /* * ========================================================================= - * kobj interfaces - * ========================================================================= - */ -struct _buf * -kobj_open_file(char *name) -{ - struct _buf *file; - vnode_t *vp; - - /* set vp as the _fd field of the file */ - if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, - -1) != 0) - return ((void *)-1UL); - - file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); - file->_fd = (intptr_t)vp; - return (file); -} - -int -kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) -{ - ssize_t resid = 0; - - if (vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, - UIO_SYSSPACE, 0, 0, 0, &resid) != 0) - return (-1); - - return (size - resid); -} - -void -kobj_close_file(struct _buf *file) -{ - vn_close((vnode_t *)file->_fd); - umem_free(file, sizeof (struct _buf)); -} - -int -kobj_get_filesize(struct _buf *file, uint64_t *size) -{ - struct stat64 st; - vnode_t *vp = (vnode_t *)file->_fd; - - if (fstat64(vp->v_fd, &st) == -1) { - vn_close(vp); - return (errno); - } - *size = st.st_size; - return (0); -} - -/* - * ========================================================================= * misc routines * ========================================================================= */ @@ -1059,7 +827,7 @@ kernel_init(int mode) (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); (void) snprintf(hw_serial, sizeof (hw_serial), "%ld", - (mode & FWRITE) ? get_system_hostid() : 0); + (mode & SPA_MODE_WRITE) ? get_system_hostid() : 0); random_init(); @@ -1068,7 +836,7 @@ kernel_init(int mode) system_taskq_init(); icp_init(); - spa_init(mode); + spa_init((spa_mode_t)mode); fletcher_4_init(); @@ -1265,3 +1033,377 @@ zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname, boolean_t async) { } + +/* + * Open file + * + * path - fully qualified path to file + * flags - file attributes O_READ / O_WRITE / O_EXCL + * fpp - pointer to return file pointer + * + * Returns 0 on success underlying error on failure. + */ +int +zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp) +{ + int fd = -1; + int dump_fd = -1; + int err; + int old_umask = 0; + zfs_file_t *fp; + struct stat64 st; + + if (!(flags & O_CREAT) && stat64(path, &st) == -1) + return (errno); + + if (!(flags & O_CREAT) && S_ISBLK(st.st_mode)) + flags |= O_DIRECT; + + if (flags & O_CREAT) + old_umask = umask(0); + + fd = open64(path, flags, mode); + if (fd == -1) + return (errno); + + if (flags & O_CREAT) + (void) umask(old_umask); + + if (vn_dumpdir != NULL) { + char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL); + char *inpath = basename((char *)(uintptr_t)path); + + (void) snprintf(dumppath, MAXPATHLEN, + "%s/%s", vn_dumpdir, inpath); + dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); + umem_free(dumppath, MAXPATHLEN); + if (dump_fd == -1) { + err = errno; + close(fd); + return (err); + } + } else { + dump_fd = -1; + } + + (void) fcntl(fd, F_SETFD, FD_CLOEXEC); + + fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL); + fp->f_fd = fd; + fp->f_dump_fd = dump_fd; + *fpp = fp; + + return (0); +} + +void +zfs_file_close(zfs_file_t *fp) +{ + close(fp->f_fd); + if (fp->f_dump_fd != -1) + close(fp->f_dump_fd); + + umem_free(fp, sizeof (zfs_file_t)); +} + +/* + * Stateful write - use os internal file pointer to determine where to + * write and update on successful completion. + * + * fp - pointer to file (pipe, socket, etc) to write to + * buf - buffer to write + * count - # of bytes to write + * resid - pointer to count of unwritten bytes (if short write) + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid) +{ + ssize_t rc; + + rc = write(fp->f_fd, buf, count); + if (rc < 0) + return (errno); + + if (resid) { + *resid = count - rc; + } else if (rc != count) { + return (EIO); + } + + return (0); +} + +/* + * Stateless write - os internal file pointer is not updated. + * + * fp - pointer to file (pipe, socket, etc) to write to + * buf - buffer to write + * count - # of bytes to write + * off - file offset to write to (only valid for seekable types) + * resid - pointer to count of unwritten bytes + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_pwrite(zfs_file_t *fp, const void *buf, + size_t count, loff_t pos, ssize_t *resid) +{ + ssize_t rc, split, done; + int sectors; + + /* + * To simulate partial disk writes, we split writes into two + * system calls so that the process can be killed in between. + * This is used by ztest to simulate realistic failure modes. + */ + sectors = count >> SPA_MINBLOCKSHIFT; + split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT; + rc = pwrite64(fp->f_fd, buf, split, pos); + if (rc != -1) { + done = rc; + rc = pwrite64(fp->f_fd, (char *)buf + split, + count - split, pos + split); + } +#ifdef __linux__ + if (rc == -1 && errno == EINVAL) { + /* + * Under Linux, this most likely means an alignment issue + * (memory or disk) due to O_DIRECT, so we abort() in order + * to catch the offender. + */ + abort(); + } +#endif + + if (rc < 0) + return (errno); + + done += rc; + + if (resid) { + *resid = count - done; + } else if (done != count) { + return (EIO); + } + + return (0); +} + +/* + * Stateful read - use os internal file pointer to determine where to + * read and update on successful completion. + * + * fp - pointer to file (pipe, socket, etc) to read from + * buf - buffer to write + * count - # of bytes to read + * resid - pointer to count of unread bytes (if short read) + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid) +{ + int rc; + + rc = read(fp->f_fd, buf, count); + if (rc < 0) + return (errno); + + if (resid) { + *resid = count - rc; + } else if (rc != count) { + return (EIO); + } + + return (0); +} + +/* + * Stateless read - os internal file pointer is not updated. + * + * fp - pointer to file (pipe, socket, etc) to read from + * buf - buffer to write + * count - # of bytes to write + * off - file offset to read from (only valid for seekable types) + * resid - pointer to count of unwritten bytes (if short write) + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off, + ssize_t *resid) +{ + ssize_t rc; + + rc = pread64(fp->f_fd, buf, count, off); + if (rc < 0) { +#ifdef __linux__ + /* + * Under Linux, this most likely means an alignment issue + * (memory or disk) due to O_DIRECT, so we abort() in order to + * catch the offender. + */ + if (errno == EINVAL) + abort(); +#endif + return (errno); + } + + if (fp->f_dump_fd != -1) { + int status; + + status = pwrite64(fp->f_dump_fd, buf, rc, off); + ASSERT(status != -1); + } + + if (resid) { + *resid = count - rc; + } else if (rc != count) { + return (EIO); + } + + return (0); +} + +/* + * lseek - set / get file pointer + * + * fp - pointer to file (pipe, socket, etc) to read from + * offp - value to seek to, returns current value plus passed offset + * whence - see man pages for standard lseek whence values + * + * Returns 0 on success errno on failure (ESPIPE for non seekable types) + */ +int +zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence) +{ + loff_t rc; + + rc = lseek(fp->f_fd, *offp, whence); + if (rc < 0) + return (errno); + + *offp = rc; + + return (0); +} + +/* + * Get file attributes + * + * filp - file pointer + * zfattr - pointer to file attr structure + * + * Currently only used for fetching size and file mode + * + * Returns 0 on success or error code of underlying getattr call on failure. + */ +int +zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr) +{ + struct stat64 st; + + if (fstat64_blk(fp->f_fd, &st) == -1) + return (errno); + + zfattr->zfa_size = st.st_size; + zfattr->zfa_mode = st.st_mode; + + return (0); +} + +/* + * Sync file to disk + * + * filp - file pointer + * flags - O_SYNC and or O_DSYNC + * + * Returns 0 on success or error code of underlying sync call on failure. + */ +int +zfs_file_fsync(zfs_file_t *fp, int flags) +{ + int rc; + + rc = fsync(fp->f_fd); + if (rc < 0) + return (errno); + + return (0); +} + +/* + * fallocate - allocate or free space on disk + * + * fp - file pointer + * mode (non-standard options for hole punching etc) + * offset - offset to start allocating or freeing from + * len - length to free / allocate + * + * OPTIONAL + */ +int +zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len) +{ + return (fallocate(fp->f_fd, mode, offset, len)); +} + +/* + * Request current file pointer offset + * + * fp - pointer to file + * + * Returns current file offset. + */ +loff_t +zfs_file_off(zfs_file_t *fp) +{ + return (lseek(fp->f_fd, SEEK_CUR, 0)); +} + +/* + * unlink file + * + * path - fully qualified file path + * + * Returns 0 on success. + * + * OPTIONAL + */ +int +zfs_file_unlink(const char *path) +{ + return (remove(path)); +} + +/* + * Get reference to file pointer + * + * fd - input file descriptor + * fpp - pointer to file pointer + * + * Returns 0 on success EBADF on failure. + * Unsupported in user space. + */ +int +zfs_file_get(int fd, zfs_file_t **fpp) +{ + abort(); + + return (EOPNOTSUPP); +} + +/* + * Drop reference to file pointer + * + * fd - input file descriptor + * + * Unsupported in user space. + */ +void +zfs_file_put(int fd) +{ + abort(); +} |