aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/fs/zfs.h5
-rw-r--r--include/sys/sa.h2
-rw-r--r--include/sys/zfs_sa.h9
-rw-r--r--include/sys/zfs_vfsops.h1
-rw-r--r--include/sys/zfs_znode.h3
-rw-r--r--module/nvpair/nvpair_alloc_spl.c2
-rw-r--r--module/zcommon/zfs_prop.c17
-rw-r--r--module/zfs/sa.c28
-rw-r--r--module/zfs/zfs_sa.c80
-rw-r--r--module/zfs/zfs_vfsops.c16
-rw-r--r--module/zfs/zfs_znode.c9
-rw-r--r--module/zfs/zpl_xattr.c329
12 files changed, 425 insertions, 76 deletions
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 2ac84f645..2b6abbf1b 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -309,6 +309,11 @@ typedef enum {
ZFS_SYNC_DISABLED = 2
} zfs_sync_type_t;
+typedef enum {
+ ZFS_XATTR_OFF = 0,
+ ZFS_XATTR_DIR = 1,
+ ZFS_XATTR_SA = 2
+} zfs_xattr_type_t;
/*
* On-disk version number.
diff --git a/include/sys/sa.h b/include/sys/sa.h
index c8b924771..718cbfbd5 100644
--- a/include/sys/sa.h
+++ b/include/sys/sa.h
@@ -149,6 +149,8 @@ int sa_replace_all_by_template_locked(sa_handle_t *, sa_bulk_attr_t *,
boolean_t sa_enabled(objset_t *);
void sa_cache_init(void);
void sa_cache_fini(void);
+void *sa_spill_alloc(int);
+void sa_spill_free(void *);
int sa_set_sa_object(objset_t *, uint64_t);
int sa_hdrsize(void *);
void sa_handle_lock(sa_handle_t *);
diff --git a/include/sys/zfs_sa.h b/include/sys/zfs_sa.h
index 90bb9972b..0bac7808a 100644
--- a/include/sys/zfs_sa.h
+++ b/include/sys/zfs_sa.h
@@ -73,6 +73,7 @@ typedef enum zpl_attr {
ZPL_SYMLINK,
ZPL_SCANSTAMP,
ZPL_DACL_ACES,
+ ZPL_DXATTR,
ZPL_END
} zpl_attr_t;
@@ -126,12 +127,20 @@ typedef struct znode_phys {
} znode_phys_t;
#ifdef _KERNEL
+
+#define DXATTR_MAX_ENTRY_SIZE (32768)
+#define DXATTR_MAX_SA_SIZE (SPA_MAXBLOCKSIZE >> 1)
+
int zfs_sa_readlink(struct znode *, uio_t *);
void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *);
void zfs_sa_get_scanstamp(struct znode *, xvattr_t *);
void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *);
+int zfs_sa_get_xattr(struct znode *);
+int zfs_sa_set_xattr(struct znode *);
void zfs_sa_upgrade(struct sa_handle *, dmu_tx_t *);
void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *);
+void zfs_sa_init(void);
+void zfs_sa_fini(void);
#endif
#ifdef __cplusplus
diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h
index fc8be58bc..6d4d713ce 100644
--- a/include/sys/zfs_vfsops.h
+++ b/include/sys/zfs_vfsops.h
@@ -76,6 +76,7 @@ typedef struct zfs_sb {
boolean_t z_use_fuids; /* version allows fuids */
boolean_t z_replay; /* set during ZIL replay */
boolean_t z_use_sa; /* version allow system attributes */
+ boolean_t z_xattr_sa; /* allow xattrs to be stores as SA */
uint64_t z_version; /* ZPL version */
uint64_t z_shares_dir; /* hidden shares dir */
kmutex_t z_lock;
diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
index 6a0c6a4df..6903ad4cc 100644
--- a/include/sys/zfs_znode.h
+++ b/include/sys/zfs_znode.h
@@ -105,6 +105,7 @@ extern "C" {
#define SA_ZPL_FLAGS(z) z->z_attr_table[ZPL_FLAGS]
#define SA_ZPL_SIZE(z) z->z_attr_table[ZPL_SIZE]
#define SA_ZPL_ZNODE_ACL(z) z->z_attr_table[ZPL_ZNODE_ACL]
+#define SA_ZPL_DXATTR(z) z->z_attr_table[ZPL_DXATTR]
#define SA_ZPL_PAD(z) z->z_attr_table[ZPL_PAD]
/*
@@ -206,6 +207,8 @@ typedef struct znode {
uint32_t z_sync_cnt; /* synchronous open count */
kmutex_t z_acl_lock; /* acl data lock */
zfs_acl_t *z_acl_cached; /* cached acl */
+ krwlock_t z_xattr_lock; /* xattr data lock */
+ nvlist_t *z_xattr_cached;/* cached xattrs */
list_node_t z_link_node; /* all znodes in fs link */
sa_handle_t *z_sa_hdl; /* handle to sa data */
boolean_t z_is_sa; /* are we native sa? */
diff --git a/module/nvpair/nvpair_alloc_spl.c b/module/nvpair/nvpair_alloc_spl.c
index d26d26913..63d57a19a 100644
--- a/module/nvpair/nvpair_alloc_spl.c
+++ b/module/nvpair/nvpair_alloc_spl.c
@@ -30,7 +30,7 @@
static void *
nv_alloc_sleep_spl(nv_alloc_t *nva, size_t size)
{
- return (kmem_alloc(size, KM_SLEEP));
+ return (kmem_alloc(size, KM_SLEEP | KM_NODEBUG));
}
static void *
diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c
index 9d65e35de..9afe3d900 100644
--- a/module/zcommon/zfs_prop.c
+++ b/module/zcommon/zfs_prop.c
@@ -186,6 +186,14 @@ zfs_prop_init(void)
{ NULL }
};
+ static zprop_index_t xattr_table[] = {
+ { "off", ZFS_XATTR_OFF },
+ { "on", ZFS_XATTR_DIR },
+ { "sa", ZFS_XATTR_SA },
+ { "dir", ZFS_XATTR_DIR },
+ { NULL }
+ };
+
/* inherit index properties */
zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
@@ -226,6 +234,9 @@ zfs_prop_init(void)
zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"latency | throughput", "LOGBIAS", logbias_table);
+ zprop_register_index(ZFS_PROP_XATTR, "xattr", ZFS_XATTR_DIR,
+ PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
+ "on | off | dir | sa", "XATTR", xattr_table);
/* inherit index (boolean) properties */
zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
@@ -244,12 +255,8 @@ zfs_prop_init(void)
boolean_table);
zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table);
- zprop_register_index(ZFS_PROP_XATTR, "xattr", 1, PROP_INHERIT,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "XATTR",
- boolean_table);
zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT,
- ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN",
- boolean_table);
+ ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN", boolean_table);
zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND",
boolean_table);
diff --git a/module/zfs/sa.c b/module/zfs/sa.c
index 4278ed7e4..bcef7d1fb 100644
--- a/module/zfs/sa.c
+++ b/module/zfs/sa.c
@@ -201,6 +201,7 @@ sa_attr_type_t sa_dummy_zpl_layout[] = { 0 };
static int sa_legacy_attr_count = 16;
static kmem_cache_t *sa_cache = NULL;
+static kmem_cache_t *spill_cache = NULL;
/*ARGSUSED*/
static int
@@ -232,6 +233,8 @@ sa_cache_init(void)
sa_cache = kmem_cache_create("sa_cache",
sizeof (sa_handle_t), 0, sa_cache_constructor,
sa_cache_destructor, NULL, NULL, NULL, 0);
+ spill_cache = kmem_cache_create("spill_cache",
+ SPA_MAXBLOCKSIZE, 0, NULL, NULL, NULL, NULL, NULL, 0);
}
void
@@ -239,6 +242,21 @@ sa_cache_fini(void)
{
if (sa_cache)
kmem_cache_destroy(sa_cache);
+
+ if (spill_cache)
+ kmem_cache_destroy(spill_cache);
+}
+
+void *
+sa_spill_alloc(int flags)
+{
+ return kmem_cache_alloc(spill_cache, flags);
+}
+
+void
+sa_spill_free(void *obj)
+{
+ kmem_cache_free(spill_cache, obj);
}
static int
@@ -1618,7 +1636,7 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
sa_bulk_attr_t *attr_desc;
void *old_data[2];
int bonus_attr_count = 0;
- int bonus_data_size = 0, spill_data_size = 0;
+ int bonus_data_size = 0;
int spill_attr_count = 0;
int error;
uint16_t length;
@@ -1648,8 +1666,8 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
/* Bring spill buffer online if it isn't currently */
if ((error = sa_get_spill(hdl)) == 0) {
- spill_data_size = hdl->sa_spill->db_size;
- old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP);
+ ASSERT3U(hdl->sa_spill->db_size, <=, SPA_MAXBLOCKSIZE);
+ old_data[1] = sa_spill_alloc(KM_SLEEP);
bcopy(hdl->sa_spill->db_data, old_data[1],
hdl->sa_spill->db_size);
spill_attr_count =
@@ -1729,7 +1747,7 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
if (old_data[0])
kmem_free(old_data[0], bonus_data_size);
if (old_data[1])
- kmem_free(old_data[1], spill_data_size);
+ sa_spill_free(old_data[1]);
kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count);
return (error);
@@ -1998,6 +2016,8 @@ EXPORT_SYMBOL(sa_replace_all_by_template_locked);
EXPORT_SYMBOL(sa_enabled);
EXPORT_SYMBOL(sa_cache_init);
EXPORT_SYMBOL(sa_cache_fini);
+EXPORT_SYMBOL(sa_spill_alloc);
+EXPORT_SYMBOL(sa_spill_free);
EXPORT_SYMBOL(sa_set_sa_object);
EXPORT_SYMBOL(sa_hdrsize);
EXPORT_SYMBOL(sa_handle_lock);
diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c
index 324e9b96c..3ec6f0d70 100644
--- a/module/zfs/zfs_sa.c
+++ b/module/zfs/zfs_sa.c
@@ -63,6 +63,7 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = {
{"ZPL_SYMLINK", 0, SA_UINT8_ARRAY, 0},
{"ZPL_SCANSTAMP", 32, SA_UINT8_ARRAY, 0},
{"ZPL_DACL_ACES", 0, SA_ACL, 0},
+ {"ZPL_DXATTR", 0, SA_UINT8_ARRAY, 0},
{NULL, 0, 0, 0}
};
@@ -183,6 +184,83 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
}
}
+int
+zfs_sa_get_xattr(znode_t *zp)
+{
+ zfs_sb_t *zsb = ZTOZSB(zp);
+ char *obj;
+ int size;
+ int error;
+
+ ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
+ ASSERT(!zp->z_xattr_cached);
+ ASSERT(zp->z_is_sa);
+
+ error = sa_size(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), &size);
+ if (error) {
+ if (error == ENOENT)
+ return nvlist_alloc(&zp->z_xattr_cached,
+ NV_UNIQUE_NAME, KM_SLEEP);
+ else
+ return (error);
+ }
+
+ obj = sa_spill_alloc(KM_SLEEP);
+
+ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), obj, size);
+ if (error == 0)
+ error = nvlist_unpack(obj, size, &zp->z_xattr_cached, KM_SLEEP);
+
+ sa_spill_free(obj);
+
+ return (error);
+}
+
+int
+zfs_sa_set_xattr(znode_t *zp)
+{
+ zfs_sb_t *zsb = ZTOZSB(zp);
+ dmu_tx_t *tx;
+ char *obj;
+ size_t size;
+ int error;
+
+ ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
+ ASSERT(zp->z_xattr_cached);
+ ASSERT(zp->z_is_sa);
+
+ error = nvlist_size(zp->z_xattr_cached, &size, NV_ENCODE_XDR);
+ if (error)
+ goto out;
+
+ obj = sa_spill_alloc(KM_SLEEP);
+
+ error = nvlist_pack(zp->z_xattr_cached, &obj, &size,
+ NV_ENCODE_XDR, KM_SLEEP);
+ if (error)
+ goto out_free;
+
+ tx = dmu_tx_create(zsb->z_os);
+ dmu_tx_hold_sa_create(tx, size);
+ dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ dmu_tx_abort(tx);
+ } else {
+ error = sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb),
+ obj, size, tx);
+ if (error)
+ dmu_tx_abort(tx);
+ else
+ dmu_tx_commit(tx);
+ }
+out_free:
+ sa_spill_free(obj);
+out:
+ return (error);
+}
+
/*
* I'm not convinced we should do any of this upgrade.
* since the SA code can read both old/new znode formats
@@ -338,6 +416,8 @@ EXPORT_SYMBOL(zfs_sa_readlink);
EXPORT_SYMBOL(zfs_sa_symlink);
EXPORT_SYMBOL(zfs_sa_get_scanstamp);
EXPORT_SYMBOL(zfs_sa_set_scanstamp);
+EXPORT_SYMBOL(zfs_sa_get_xattr);
+EXPORT_SYMBOL(zfs_sa_set_xattr);
EXPORT_SYMBOL(zfs_sa_upgrade);
EXPORT_SYMBOL(zfs_sa_upgrade_txholds);
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 920d87e4f..a0726e117 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -140,10 +140,16 @@ xattr_changed_cb(void *arg, uint64_t newval)
{
zfs_sb_t *zsb = arg;
- if (newval == TRUE)
- zsb->z_flags |= ZSB_XATTR;
- else
+ if (newval == ZFS_XATTR_OFF) {
zsb->z_flags &= ~ZSB_XATTR;
+ } else {
+ zsb->z_flags |= ZSB_XATTR;
+
+ if (newval == ZFS_XATTR_SA)
+ zsb->z_xattr_sa = B_TRUE;
+ else
+ zsb->z_xattr_sa = B_FALSE;
+ }
}
static void
@@ -641,6 +647,10 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp)
&sa_obj);
if (error)
goto out;
+
+ error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &zval);
+ if ((error == 0) && (zval == ZFS_XATTR_SA))
+ zsb->z_xattr_sa = B_TRUE;
} else {
/*
* Pre SA versions file systems should never touch
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index 0443b3065..1edbd7e2e 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -106,6 +106,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
+ rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
avl_create(&zp->z_range_avl, zfs_range_compare,
@@ -113,6 +114,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
zp->z_dirlocks = NULL;
zp->z_acl_cached = NULL;
+ zp->z_xattr_cached = NULL;
zp->z_moved = 0;
return (0);
}
@@ -128,11 +130,13 @@ zfs_znode_cache_destructor(void *buf, void *arg)
rw_destroy(&zp->z_parent_lock);
rw_destroy(&zp->z_name_lock);
mutex_destroy(&zp->z_acl_lock);
+ rw_destroy(&zp->z_xattr_lock);
avl_destroy(&zp->z_range_avl);
mutex_destroy(&zp->z_range_lock);
ASSERT(zp->z_dirlocks == NULL);
ASSERT(zp->z_acl_cached == NULL);
+ ASSERT(zp->z_xattr_cached == NULL);
}
void
@@ -272,6 +276,11 @@ zfs_inode_destroy(struct inode *ip)
zp->z_acl_cached = NULL;
}
+ if (zp->z_xattr_cached) {
+ nvlist_free(zp->z_xattr_cached);
+ zp->z_xattr_cached = NULL;
+ }
+
kmem_cache_free(znode_cache, zp);
}
diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c
index cf52e720d..9117b7bc1 100644
--- a/module/zfs/zpl_xattr.c
+++ b/module/zfs/zpl_xattr.c
@@ -29,40 +29,54 @@
* as practically no size limit on the file, and the extended
* attributes permissions may differ from those of the parent file.
* This interface is really quite clever, but it's also completely
- * different than what is supported on Linux.
+ * different than what is supported on Linux. It also comes with a
+ * steep performance penalty when accessing small xattrs because they
+ * are not stored with the parent file.
*
* Under Linux extended attributes are manipulated by the system
* calls getxattr(2), setxattr(2), and listxattr(2). They consider
* extended attributes to be name/value pairs where the name is a
* NULL terminated string. The name must also include one of the
- * following name space prefixes:
+ * following namespace prefixes:
*
* user - No restrictions and is available to user applications.
* trusted - Restricted to kernel and root (CAP_SYS_ADMIN) use.
* system - Used for access control lists (system.nfs4_acl, etc).
* security - Used by SELinux to store a files security context.
*
- * This Linux interface is implemented internally using the more
- * flexible Solaris style extended attributes. Every extended
- * attribute is store as a file in a hidden directory associated
- * with the parent file. This ensures on disk compatibility with
- * zfs implementations on other platforms (Solaris, FreeBSD, MacOS).
+ * The value under Linux to limited to 65536 bytes of binary data.
+ * In practice, individual xattrs tend to be much smaller than this
+ * and are typically less than 100 bytes. A good example of this
+ * are the security.selinux xattrs which are less than 100 bytes and
+ * exist for every file when xattr labeling is enabled.
*
- * One consequence of this implementation is that when an extended
- * attribute is manipulated an inode is created. This inode will
- * exist in the Linux inode cache but there will be no associated
- * entry in the dentry cache which references it. This is safe
- * but it may result in some confusion.
+ * The Linux xattr implemenation has been written to take advantage of
+ * this typical usage. When the dataset property 'xattr=sa' is set,
+ * then xattrs will be preferentially stored as System Attributes (SA).
+ * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
+ * up to 64k of xattrs to be stored in the spill block. If additional
+ * xattr space is required, which is unlikely under Linux, they will
+ * be stored using the traditional directory approach.
*
- * Longer term I would like to see the 'security.selinux' extended
- * attribute moved to a SA. This should significantly improve
- * performance on a SELinux enabled system by minimizing the
- * number of seeks required to access a file. However, for now
- * this xattr is still stored in a file because I'm pretty sure
- * adding a new SA will break on-disk compatibility.
+ * This optimization results in roughly a 3x performance improvement
+ * when accessing xattrs because it avoids the need to perform a seek
+ * for every xattr value. When multiple xattrs are stored per-file
+ * the performance improvements are even greater because all of the
+ * xattrs stored in the spill block will be cached.
+ *
+ * However, by default SA based xattrs are disabled in the Linux port
+ * to maximize compatibility with other implementations. If you do
+ * enable SA based xattrs then they will not be visible on platforms
+ * which do not support this feature.
+ *
+ * NOTE: One additional consequence of the xattr directory implementation
+ * is that when an extended attribute is manipulated an inode is created.
+ * This inode will exist in the Linux inode cache but there will be no
+ * associated entry in the dentry cache which references it. This is
+ * safe but it may result in some confusion. Enabling SA based xattrs
+ * largely avoids the issue except in the overflow case.
*/
-
#include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h>
#include <sys/zfs_znode.h>
@@ -104,17 +118,13 @@ zpl_xattr_filldir(void *arg, const char *name, int name_len,
return (0);
}
-ssize_t
-zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
+static ssize_t
+zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
{
- struct inode *ip = dentry->d_inode;
+ struct inode *ip = xf->inode;
struct inode *dxip = NULL;
loff_t pos = 3; /* skip '.', '..', and '.zfs' entries. */
- cred_t *cr = CRED();
int error;
- xattr_filldir_t xf = { buffer_size, 0, buffer, ip };
-
- crhold(cr);
/* Lookup the xattr directory */
error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
@@ -122,34 +132,84 @@ zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
if (error == -ENOENT)
error = 0;
- goto out;
+ return (error);
}
/* Fill provided buffer via zpl_zattr_filldir helper */
- error = -zfs_readdir(dxip, (void *)&xf, zpl_xattr_filldir, &pos, cr);
+ error = -zfs_readdir(dxip, (void *)xf, zpl_xattr_filldir, &pos, cr);
+ iput(dxip);
+
+ return (error);
+}
+
+static ssize_t
+zpl_xattr_list_sa(xattr_filldir_t *xf)
+{
+ znode_t *zp = ITOZ(xf->inode);
+ nvpair_t *nvp = NULL;
+ int error = 0;
+
+ mutex_enter(&zp->z_lock);
+ if (zp->z_xattr_cached == NULL)
+ error = -zfs_sa_get_xattr(zp);
+ mutex_exit(&zp->z_lock);
+
+ if (error)
+ return (error);
+
+ ASSERT(zp->z_xattr_cached);
+
+ while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
+ ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
+
+ error = zpl_xattr_filldir((void *)xf, nvpair_name(nvp),
+ strlen(nvpair_name(nvp)), 0, 0, 0);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
+
+ssize_t
+zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
+{
+ znode_t *zp = ITOZ(dentry->d_inode);
+ zfs_sb_t *zsb = ZTOZSB(zp);
+ xattr_filldir_t xf = { buffer_size, 0, buffer, dentry->d_inode };
+ cred_t *cr = CRED();
+ int error = 0;
+
+ crhold(cr);
+ rw_enter(&zp->z_xattr_lock, RW_READER);
+
+ if (zsb->z_use_sa && zp->z_is_sa) {
+ error = zpl_xattr_list_sa(&xf);
+ if (error)
+ goto out;
+ }
+
+ error = zpl_xattr_list_dir(&xf, cr);
if (error)
goto out;
error = xf.offset;
out:
- if (dxip)
- iput(dxip);
+ rw_exit(&zp->z_xattr_lock);
crfree(cr);
return (error);
}
static int
-zpl_xattr_get(struct inode *ip, const char *name, void *buf, size_t size)
+zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
+ size_t size, cred_t *cr)
{
struct inode *dxip = NULL;
struct inode *xip = NULL;
- cred_t *cr = CRED();
int error;
- crhold(cr);
-
/* Lookup the xattr directory */
error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
if (error)
@@ -165,7 +225,7 @@ zpl_xattr_get(struct inode *ip, const char *name, void *buf, size_t size)
goto out;
}
- error = zpl_read_common(xip, buf, size, 0, UIO_SYSSPACE, 0, cr);
+ error = zpl_read_common(xip, value, size, 0, UIO_SYSSPACE, 0, cr);
out:
if (xip)
iput(xip);
@@ -173,8 +233,59 @@ out:
if (dxip)
iput(dxip);
- crfree(cr);
+ return (error);
+}
+
+static int
+zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
+{
+ znode_t *zp = ITOZ(ip);
+ uchar_t *nv_value;
+ uint_t nv_size;
+ int error = 0;
+ ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
+
+ mutex_enter(&zp->z_lock);
+ if (zp->z_xattr_cached == NULL)
+ error = -zfs_sa_get_xattr(zp);
+ mutex_exit(&zp->z_lock);
+
+ if (error)
+ return (error);
+
+ ASSERT(zp->z_xattr_cached);
+ error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
+ &nv_value, &nv_size);
+ if (error)
+ return (error);
+
+ if (!size)
+ return (nv_size);
+
+ memcpy(value, nv_value, MIN(size, nv_size));
+
+ return (MIN(size, nv_size));
+}
+
+static int
+__zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
+ cred_t *cr)
+{
+ znode_t *zp = ITOZ(ip);
+ zfs_sb_t *zsb = ZTOZSB(zp);
+ int error;
+
+ ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
+
+ if (zsb->z_use_sa && zp->z_is_sa) {
+ error = zpl_xattr_get_sa(ip, name, value, size);
+ if (error >= 0)
+ goto out;
+ }
+
+ error = zpl_xattr_get_dir(ip, name, value, size, cr);
+out:
if (error == -ENOENT)
error = -ENODATA;
@@ -182,42 +293,43 @@ out:
}
static int
-zpl_xattr_set(struct inode *ip, const char *name, const void *value,
- size_t size, int flags)
+zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
+{
+ znode_t *zp = ITOZ(ip);
+ cred_t *cr = CRED();
+ int error;
+
+ crhold(cr);
+ rw_enter(&zp->z_xattr_lock, RW_READER);
+ error = __zpl_xattr_get(ip, name, value, size, cr);
+ rw_exit(&zp->z_xattr_lock);
+ crfree(cr);
+
+ return (error);
+}
+
+static int
+zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
+ size_t size, int flags, cred_t *cr)
{
struct inode *dxip = NULL;
struct inode *xip = NULL;
vattr_t *vap = NULL;
- cred_t *cr = CRED();
ssize_t wrote;
int error;
const int xattr_mode = S_IFREG | 0644;
- crhold(cr);
-
/* Lookup the xattr directory and create it if required. */
error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR | CREATE_XATTR_DIR,
cr, NULL, NULL);
if (error)
goto out;
- /*
- * Lookup a specific xattr name in the directory, two failure modes:
- * XATTR_CREATE: fail if xattr already exists
- * XATTR_REMOVE: fail if xattr does not exist
- */
+ /* Lookup a specific xattr name in the directory */
error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
- if (error) {
- if (error != -ENOENT)
- goto out;
+ if (error && (error != -ENOENT))
+ goto out;
- if ((error == -ENOENT) && (flags & XATTR_REPLACE))
- goto out;
- } else {
- error = -EEXIST;
- if (flags & XATTR_CREATE)
- goto out;
- }
error = 0;
/* Remove a specific name xattr when value is set to NULL. */
@@ -262,7 +374,6 @@ out:
if (dxip)
iput(dxip);
- crfree(cr);
if (error == -ENOENT)
error = -ENODATA;
@@ -272,8 +383,100 @@ out:
}
static int
+zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
+ size_t size, int flags, cred_t *cr)
+{
+ znode_t *zp = ITOZ(ip);
+ nvlist_t *nvl;
+ size_t sa_size;
+ int error;
+
+ ASSERT(zp->z_xattr_cached);
+ nvl = zp->z_xattr_cached;
+
+ if (value == NULL) {
+ error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
+ if (error == -ENOENT)
+ error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
+ } else {
+ /* Limited to 32k to keep nvpair memory allocations small */
+ if (size > DXATTR_MAX_ENTRY_SIZE)
+ return (-EFBIG);
+
+ /* Prevent the DXATTR SA from consuming the entire SA region */
+ error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
+ if (error)
+ return (error);
+
+ if (sa_size > DXATTR_MAX_SA_SIZE)
+ return (-EFBIG);
+
+ error = -nvlist_add_byte_array(nvl, name,
+ (uchar_t *)value, size);
+ if (error)
+ return (error);
+ }
+
+ /* Update the SA for additions, modifications, and removals. */
+ if (!error)
+ error = -zfs_sa_set_xattr(zp);
+
+ ASSERT3S(error, <=, 0);
+
+ return (error);
+}
+
+static int
+zpl_xattr_set(struct inode *ip, const char *name, const void *value,
+ size_t size, int flags)
+{
+ znode_t *zp = ITOZ(ip);
+ zfs_sb_t *zsb = ZTOZSB(zp);
+ cred_t *cr = CRED();
+ int error;
+
+ crhold(cr);
+ rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);
+
+ /*
+ * Before setting the xattr check to see if it already exists.
+ * This is done to ensure the following optional flags are honored.
+ *
+ * XATTR_CREATE: fail if xattr already exists
+ * XATTR_REPLACE: fail if xattr does not exist
+ */
+ error = __zpl_xattr_get(ip, name, NULL, 0, cr);
+ if (error < 0) {
+ if (error != -ENODATA)
+ goto out;
+
+ if ((error == -ENODATA) && (flags & XATTR_REPLACE))
+ goto out;
+ } else {
+ error = -EEXIST;
+ if (flags & XATTR_CREATE)
+ goto out;
+ }
+
+ /* Preferentially store the xattr as a SA for better performance */
+ if (zsb->z_use_sa && zsb->z_xattr_sa && zp->z_is_sa) {
+ error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
+ if (error == 0)
+ goto out;
+ }
+
+ error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
+out:
+ rw_exit(&ITOZ(ip)->z_xattr_lock);
+ crfree(cr);
+ ASSERT3S(error, <=, 0);
+
+ return (error);
+}
+
+static int
__zpl_xattr_user_get(struct inode *ip, const char *name,
- void *buffer, size_t size)
+ void *value, size_t size)
{
char *xattr_name;
int error;
@@ -285,7 +488,7 @@ __zpl_xattr_user_get(struct inode *ip, const char *name,
return -EOPNOTSUPP;
xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
- error = zpl_xattr_get(ip, xattr_name, buffer, size);
+ error = zpl_xattr_get(ip, xattr_name, value, size);
strfree(xattr_name);
return (error);
@@ -321,7 +524,7 @@ xattr_handler_t zpl_xattr_user_handler = {
static int
__zpl_xattr_trusted_get(struct inode *ip, const char *name,
- void *buffer, size_t size)
+ void *value, size_t size)
{
char *xattr_name;
int error;
@@ -333,7 +536,7 @@ __zpl_xattr_trusted_get(struct inode *ip, const char *name,
return -EINVAL;
xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
- error = zpl_xattr_get(ip, xattr_name, buffer, size);
+ error = zpl_xattr_get(ip, xattr_name, value, size);
strfree(xattr_name);
return (error);
@@ -369,7 +572,7 @@ xattr_handler_t zpl_xattr_trusted_handler = {
static int
__zpl_xattr_security_get(struct inode *ip, const char *name,
- void *buffer, size_t size)
+ void *value, size_t size)
{
char *xattr_name;
int error;
@@ -378,7 +581,7 @@ __zpl_xattr_security_get(struct inode *ip, const char *name,
return -EINVAL;
xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
- error = zpl_xattr_get(ip, xattr_name, buffer, size);
+ error = zpl_xattr_get(ip, xattr_name, value, size);
strfree(xattr_name);
return (error);