diff options
author | Matt Ahrens <[email protected]> | 2018-10-01 15:13:12 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-10-11 10:19:33 -0700 |
commit | 5d43cc9a59d61aea37a1236e9d28628856030947 (patch) | |
tree | 366ef984481334ae6f99287048ced780eb097712 /include/sys | |
parent | 50a343d85c04698d51c154375a00994dea81e6db (diff) |
OpenZFS 9689 - zfs range lock code should not be zpl-specific
The ZFS range locking code in zfs_rlock.c/h depends on ZPL-specific
data structures, specifically znode_t. However, it's also used by
the ZVOL code, which uses a "dummy" znode_t to pass to the range
locking code.
We should clean this up so that the range locking code is generic
and can be used equally by ZPL and ZVOL, and also can be used by
future consumers that may need to run in userland (libzpool) as
well as the kernel.
Porting notes:
* Added missing sys/avl.h include to sys/zfs_rlock.h.
* Removed 'dbuf is within the locked range' ASSERTs from dmu_sync().
This was needed because ztest does not yet use a locked_range_t.
* Removed "Approved by:" tag requirement from OpenZFS commit
check to prevent needless warnings when integrating changes
which has not been merged to illumos.
* Reverted free_list range lock changes which were originally
needed to defer the cv_destroy() which was called immediately
after cv_broadcast(). With d2733258 this should be safe but
if not we may need to reintroduce this logic.
* Reverts: The following two commits were reverted and squashed in
to this change in order to make it easier to apply OpenZFS 9689.
- d88895a0, which removed the dummy znode from zvol_state
- e3a07cd0, which updated ztest to use range locks
* Preserved optimized rangelock comparison function. Preserved the
rangelock free list. The cv_destroy() function will block waiting
for all processes in cv_wait() to be scheduled and drop their
reference. This is done to ensure it's safe to free the condition
variable. However, blocking while holding the rl->rl_lock mutex
can result in a deadlock on Linux. A free list is introduced to
defer the cv_destroy() and kmem_free() until after the mutex is
released.
Authored by: Matthew Ahrens <[email protected]>
Reviewed by: Brian Behlendorf <[email protected]>
Reviewed by: Serapheim Dimitropoulos <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Brad Lewis <[email protected]>
Ported-by: Brian Behlendorf <[email protected]>
OpenZFS-issue: https://illumos.org/issues/9689
OpenZFS-commit: https://github.com/openzfs/openzfs/pull/680
External-issue: DLPX-58662
Closes #7980
Diffstat (limited to 'include/sys')
-rw-r--r-- | include/sys/dmu.h | 3 | ||||
-rw-r--r-- | include/sys/zfs_rlock.h | 98 | ||||
-rw-r--r-- | include/sys/zfs_znode.h | 4 |
3 files changed, 35 insertions, 70 deletions
diff --git a/include/sys/dmu.h b/include/sys/dmu.h index bc7046fdc..f8b5f096a 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -73,6 +73,7 @@ struct arc_buf; struct zio_prop; struct sa_handle; struct dsl_crypto_params; +struct locked_range; typedef struct objset objset_t; typedef struct dmu_tx dmu_tx_t; @@ -1034,7 +1035,7 @@ typedef struct zgd { struct lwb *zgd_lwb; struct blkptr *zgd_bp; dmu_buf_t *zgd_db; - struct rl *zgd_rl; + struct locked_range *zgd_lr; void *zgd_private; } zgd_t; diff --git a/include/sys/zfs_rlock.h b/include/sys/zfs_rlock.h index 8483b4e8b..05b080843 100644 --- a/include/sys/zfs_rlock.h +++ b/include/sys/zfs_rlock.h @@ -22,6 +22,9 @@ * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2018 by Delphix. All rights reserved. + */ #ifndef _SYS_FS_ZFS_RLOCK_H #define _SYS_FS_ZFS_RLOCK_H @@ -30,85 +33,46 @@ extern "C" { #endif -#include <sys/list.h> #include <sys/avl.h> -#ifdef _KERNEL -#include <sys/condvar.h> -#else -#include <sys/zfs_context.h> -#endif - typedef enum { RL_READER, RL_WRITER, RL_APPEND -} rl_type_t; +} rangelock_type_t; -typedef struct zfs_rlock { - kmutex_t zr_mutex; /* protects changes to zr_avl */ - avl_tree_t zr_avl; /* avl tree of range locks */ - uint64_t *zr_size; /* points to znode->z_size */ - uint_t *zr_blksz; /* points to znode->z_blksz */ - uint64_t *zr_max_blksz; /* points to zfsvfs->z_max_blksz */ -} zfs_rlock_t; - -typedef struct rl { - zfs_rlock_t *r_zrl; - avl_node_t r_node; /* avl node link */ - uint64_t r_off; /* file range offset */ - uint64_t r_len; /* file range length */ - uint_t r_cnt; /* range reference count in tree */ - rl_type_t r_type; /* range type */ - kcondvar_t r_wr_cv; /* cv for waiting writers */ - kcondvar_t r_rd_cv; /* cv for waiting readers */ - uint8_t r_proxy; /* acting for original range */ - uint8_t r_write_wanted; /* writer wants to lock this range */ - uint8_t r_read_wanted; /* reader wants to lock this range */ - list_node_t rl_node; /* used for deferred release */ -} rl_t; - -/* - * Lock a range (offset, length) as either shared (RL_READER) - * or exclusive (RL_WRITER or RL_APPEND). RL_APPEND is a special type that - * is converted to RL_WRITER that specified to lock from the start of the - * end of file. Returns the range lock structure. - */ -rl_t *zfs_range_lock(zfs_rlock_t *zrl, uint64_t off, uint64_t len, - rl_type_t type); +struct locked_range; -/* Unlock range and destroy range lock structure. */ -void zfs_range_unlock(rl_t *rl); +typedef void (rangelock_cb_t)(struct locked_range *, void *); -/* - * Reduce range locked as RW_WRITER from whole file to specified range. - * Asserts the whole file was previously locked. - */ -void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len); +typedef struct rangelock { + avl_tree_t rl_tree; /* contains locked_range_t */ + kmutex_t rl_lock; + rangelock_cb_t *rl_cb; + void *rl_arg; +} rangelock_t; -/* - * AVL comparison function used to order range locks - * Locks are ordered on the start offset of the range. - */ -int zfs_range_compare(const void *arg1, const void *arg2); +typedef struct locked_range { + rangelock_t *lr_rangelock; /* rangelock that this lock applies to */ + avl_node_t lr_node; /* avl node link */ + uint64_t lr_offset; /* file range offset */ + uint64_t lr_length; /* file range length */ + uint_t lr_count; /* range reference count in tree */ + rangelock_type_t lr_type; /* range type */ + kcondvar_t lr_write_cv; /* cv for waiting writers */ + kcondvar_t lr_read_cv; /* cv for waiting readers */ + uint8_t lr_proxy; /* acting for original range */ + uint8_t lr_write_wanted; /* writer wants to lock this range */ + uint8_t lr_read_wanted; /* reader wants to lock this range */ +} locked_range_t; -static inline void -zfs_rlock_init(zfs_rlock_t *zrl) -{ - mutex_init(&zrl->zr_mutex, NULL, MUTEX_DEFAULT, NULL); - avl_create(&zrl->zr_avl, zfs_range_compare, - sizeof (rl_t), offsetof(rl_t, r_node)); - zrl->zr_size = NULL; - zrl->zr_blksz = NULL; - zrl->zr_max_blksz = NULL; -} +void rangelock_init(rangelock_t *, rangelock_cb_t *, void *); +void rangelock_fini(rangelock_t *); -static inline void -zfs_rlock_destroy(zfs_rlock_t *zrl) -{ - avl_destroy(&zrl->zr_avl); - mutex_destroy(&zrl->zr_mutex); -} +locked_range_t *rangelock_enter(rangelock_t *, + uint64_t, uint64_t, rangelock_type_t); +void rangelock_exit(locked_range_t *); +void rangelock_reduce(locked_range_t *, uint64_t, uint64_t); #ifdef __cplusplus } diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index 01f4328f0..5fa0986ea 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ @@ -191,7 +191,7 @@ typedef struct znode { krwlock_t z_parent_lock; /* parent lock for directories */ krwlock_t z_name_lock; /* "master" lock for dirent locks */ zfs_dirlock_t *z_dirlocks; /* directory entry lock list */ - zfs_rlock_t z_range_lock; /* file range lock */ + rangelock_t z_rangelock; /* file range locks */ uint8_t z_unlinked; /* file has been unlinked */ uint8_t z_atime_dirty; /* atime needs to be synced */ uint8_t z_zn_prefetch; /* Prefetch znodes? */ |