aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbehlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>2008-03-07 23:07:02 +0000
committerbehlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>2008-03-07 23:07:02 +0000
commit0b3cf046cb5b65ccaf22687e105a4380533c0305 (patch)
tree852c1c4b4d94570a49e271836b23b559e782be93
parent3b3ba48fe9a639d5a3cd1b8960deabefd35310be (diff)
Add the initial vestigates of vnode support
git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@30 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
-rw-r--r--include/sys/sysmacros.h1
-rw-r--r--include/sys/vnode.h97
-rw-r--r--modules/spl/Makefile.in1
-rw-r--r--modules/spl/spl-vnode.c168
4 files changed, 267 insertions, 0 deletions
diff --git a/include/sys/sysmacros.h b/include/sys/sysmacros.h
index 3bc9f7a37..b65a5797c 100644
--- a/include/sys/sysmacros.h
+++ b/include/sys/sysmacros.h
@@ -131,6 +131,7 @@ extern int highbit(unsigned long i);
#define makedevice(maj,min) makedev(maj,min)
#define zone_dataset_visible(x, y) (1)
#define INGLOBALZONE(z) (1)
+#define utsname system_utsname
/* XXX - Borrowed from zfs project libsolcompat/include/sys/sysmacros.h */
/* common macros */
diff --git a/include/sys/vnode.h b/include/sys/vnode.h
new file mode 100644
index 000000000..9afac4c25
--- /dev/null
+++ b/include/sys/vnode.h
@@ -0,0 +1,97 @@
+#ifndef _SPL_VNODE_H
+#define _SPL_VNODE_H
+
+#define XVA_MAPSIZE 3
+#define XVA_MAGIC 0x78766174
+
+typedef struct vnode {
+ uint64_t v_size;
+ int v_fd;
+ mode_t v_mode;
+ char *v_path;
+} vnode_t;
+
+
+typedef struct xoptattr {
+ timestruc_t xoa_createtime; /* Create time of file */
+ uint8_t xoa_archive;
+ uint8_t xoa_system;
+ uint8_t xoa_readonly;
+ uint8_t xoa_hidden;
+ uint8_t xoa_nounlink;
+ uint8_t xoa_immutable;
+ uint8_t xoa_appendonly;
+ uint8_t xoa_nodump;
+ uint8_t xoa_settable;
+ uint8_t xoa_opaque;
+ uint8_t xoa_av_quarantined;
+ uint8_t xoa_av_modified;
+} xoptattr_t;
+
+typedef struct vattr {
+ uint_t va_mask; /* bit-mask of attributes */
+ u_offset_t va_size; /* file size in bytes */
+} vattr_t;
+
+
+typedef struct xvattr {
+ vattr_t xva_vattr; /* Embedded vattr structure */
+ uint32_t xva_magic; /* Magic Number */
+ uint32_t xva_mapsize; /* Size of attr bitmap (32-bit words) */
+ uint32_t *xva_rtnattrmapp; /* Ptr to xva_rtnattrmap[] */
+ uint32_t xva_reqattrmap[XVA_MAPSIZE]; /* Requested attrs */
+ uint32_t xva_rtnattrmap[XVA_MAPSIZE]; /* Returned attrs */
+ xoptattr_t xva_xoptattrs; /* Optional attributes */
+} xvattr_t;
+
+typedef struct vsecattr {
+ uint_t vsa_mask; /* See below */
+ int vsa_aclcnt; /* ACL entry count */
+ void *vsa_aclentp; /* pointer to ACL entries */
+ int vsa_dfaclcnt; /* default ACL entry count */
+ void *vsa_dfaclentp; /* pointer to default ACL entries */
+ size_t vsa_aclentsz; /* ACE size in bytes of vsa_aclentp */
+} vsecattr_t;
+
+#define AT_TYPE 0x00001
+#define AT_MODE 0x00002
+// #define AT_UID 0x00004 /* Conflicts with linux/auxvec.h */
+// #define AT_GID 0x00008 /* Conflicts with linux/auxvec.h */
+#define AT_FSID 0x00010
+#define AT_NODEID 0x00020
+#define AT_NLINK 0x00040
+#define AT_SIZE 0x00080
+#define AT_ATIME 0x00100
+#define AT_MTIME 0x00200
+#define AT_CTIME 0x00400
+#define AT_RDEV 0x00800
+#define AT_BLKSIZE 0x01000
+#define AT_NBLOCKS 0x02000
+#define AT_SEQ 0x08000
+#define AT_XVATTR 0x10000
+
+#define CRCREAT 0
+
+#define VOP_CLOSE(vp, f, c, o, cr, ct) 0
+#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0
+#define VOP_GETATTR(vp, vap, fl, cr, ct) ((vap)->va_size = (vp)->v_size, 0)
+
+#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd)
+
+#define VN_RELE(vp) vn_close(vp)
+
+extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp,
+ int x2, int x3);
+extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp,
+ int x2, int x3, vnode_t *vp, int fd);
+extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len,
+ offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp);
+extern void vn_close(vnode_t *vp);
+
+#define vn_remove(path, x1, x2) remove(path)
+#define vn_rename(from, to, seg) rename((from), (to))
+#define vn_is_readonly(vp) B_FALSE
+
+extern vnode_t *rootdir;
+
+#endif /* SPL_VNODE_H */
diff --git a/modules/spl/Makefile.in b/modules/spl/Makefile.in
index 09e934e4f..667858aa9 100644
--- a/modules/spl/Makefile.in
+++ b/modules/spl/Makefile.in
@@ -13,6 +13,7 @@ spl-objs += spl-kmem.o
spl-objs += spl-thread.o
spl-objs += spl-taskq.o
spl-objs += spl-rwlock.o
+spl-objs += spl-vnode.o
spl-objs += spl-generic.o
splmodule := spl.ko
diff --git a/modules/spl/spl-vnode.c b/modules/spl/spl-vnode.c
new file mode 100644
index 000000000..5089f8567
--- /dev/null
+++ b/modules/spl/spl-vnode.c
@@ -0,0 +1,168 @@
+#include <sys/sysmacros.h>
+#include "config.h"
+
+/*
+ * XXX: currently borrrowed from libsolcompat until this
+ * can be adapted to the linux kernel interfaces.
+ */
+#if 0
+/*
+ * =========================================================================
+ * vnode operations
+ * =========================================================================
+ */
+/*
+ * Note: for the xxxat() versions of these functions, we assume that the
+ * starting vp is always rootdir (which is true for spa_directory.c, the only
+ * ZFS consumer of these interfaces). We assert this is true, and then emulate
+ * them by adding '/' in front of the path.
+ */
+
+/*ARGSUSED*/
+int
+vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
+{
+ int fd;
+ vnode_t *vp;
+ int old_umask;
+ char realpath[MAXPATHLEN];
+ struct stat64 st;
+
+ /*
+ * If we're accessing a real disk from userland, we need to use
+ * the character interface to avoid caching. This is particularly
+ * important if we're trying to look at a real in-kernel storage
+ * pool from userland, e.g. via zdb, because otherwise we won't
+ * see the changes occurring under the segmap cache.
+ * On the other hand, the stupid character device returns zero
+ * for its size. So -- gag -- we open the block device to get
+ * its size, and remember it for subsequent VOP_GETATTR().
+ */
+#if defined(__sun__) || defined(__sun)
+ if (strncmp(path, "/dev/", 5) == 0) {
+#else
+ if (0) {
+#endif
+ char *dsk;
+ fd = open64(path, O_RDONLY);
+ if (fd == -1)
+ return (errno);
+ if (fstat64(fd, &st) == -1) {
+ close(fd);
+ return (errno);
+ }
+ close(fd);
+ (void) sprintf(realpath, "%s", path);
+ dsk = strstr(path, "/dsk/");
+ if (dsk != NULL)
+ (void) sprintf(realpath + (dsk - path) + 1, "r%s",
+ dsk + 1);
+ } else {
+ (void) sprintf(realpath, "%s", path);
+ if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
+ return (errno);
+ }
+
+#ifdef __linux__
+ if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) {
+ flags |= O_DIRECT;
+ if (flags & FWRITE)
+ flags |= O_EXCL;
+ }
+#endif
+
+ if (flags & FCREAT)
+ old_umask = umask(0);
+
+ /*
+ * The construct 'flags - FREAD' conveniently maps combinations of
+ * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
+ */
+ fd = open64(realpath, flags - FREAD, mode);
+
+ if (flags & FCREAT)
+ (void) umask(old_umask);
+
+ if (fd == -1)
+ return (errno);
+
+ if (fstat64(fd, &st) == -1) {
+ close(fd);
+ return (errno);
+ }
+
+ (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
+
+ *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
+
+ vp->v_fd = fd;
+ vp->v_size = st.st_size;
+ vp->v_mode = st.st_mode;
+ vp->v_path = spa_strdup(path);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
+ int x3, vnode_t *startvp, int fd)
+{
+ char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
+ int ret;
+
+ ASSERT(startvp == rootdir);
+ (void) sprintf(realpath, "/%s", path);
+
+ /* fd ignored for now, need if want to simulate nbmand support */
+ ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
+
+ umem_free(realpath, strlen(path) + 2);
+
+ return (ret);
+}
+
+/*ARGSUSED*/
+int
+vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
+ int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
+{
+ ssize_t iolen, split;
+
+ if (uio == UIO_READ) {
+ iolen = pread64(vp->v_fd, addr, len, offset);
+ } else {
+ /*
+ * To simulate partial disk writes, we split writes into two
+ * system calls so that the process can be killed in between.
+ */
+#ifdef ZFS_DEBUG
+ if (!S_ISBLK(vp->v_mode) && !S_ISCHR(vp->v_mode)) {
+ split = (len > 0 ? rand() % len : 0);
+ iolen = pwrite64(vp->v_fd, addr, split, offset);
+ iolen += pwrite64(vp->v_fd, (char *)addr + split,
+ len - split, offset + split);
+ } else
+ iolen = pwrite64(vp->v_fd, addr, len, offset);
+#else
+ iolen = pwrite64(vp->v_fd, addr, len, offset);
+#endif
+ }
+
+ if (iolen < 0)
+ return (errno);
+ if (residp)
+ *residp = len - iolen;
+ else if (iolen != len)
+ return (EIO);
+ return (0);
+}
+
+void
+vn_close(vnode_t *vp)
+{
+ close(vp->v_fd);
+ spa_strfree(vp->v_path);
+ umem_free(vp, sizeof (vnode_t));
+}
+#endif