summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/zfs/dmu_objset.c20
-rw-r--r--module/zfs/dsl_crypt.c102
-rw-r--r--module/zfs/dsl_dataset.c87
-rw-r--r--module/zfs/dsl_pool.c27
-rw-r--r--module/zfs/spa.c18
-rw-r--r--module/zfs/zil.c26
6 files changed, 186 insertions, 94 deletions
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 3c9a817f7..086a65114 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -1157,6 +1157,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
{
dmu_objset_create_arg_t *doca = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
+ spa_t *spa = dp->dp_spa;
dsl_dir_t *pdd;
const char *tail;
dsl_dataset_t *ds;
@@ -1174,8 +1175,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
DS_HOLD_FLAG_DECRYPT, FTAG, &ds));
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
bp = dsl_dataset_get_blkptr(ds);
- os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
- ds, bp, doca->doca_type, tx);
+ os = dmu_objset_create_impl(spa, ds, bp, doca->doca_type, tx);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
if (doca->doca_userfunc != NULL) {
@@ -1199,7 +1199,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
ds->ds_owner = FTAG;
mutex_exit(&ds->ds_lock);
- rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+ rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds,
tx->tx_txg);
if (tmpds != NULL) {
@@ -1210,8 +1210,12 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
dmu_objset_do_userquota_updates(os, tx);
taskq_wait(dp->dp_sync_taskq);
+ if (txg_list_member(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
+ ASSERT3P(ds->ds_key_mapping, !=, NULL);
+ key_mapping_rele(spa, ds->ds_key_mapping, ds);
+ }
- rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+ rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds,
tx->tx_txg);
if (tmpds != NULL) {
@@ -1220,8 +1224,11 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
}
VERIFY0(zio_wait(rzio));
- if (need_sync_done)
+ if (need_sync_done) {
+ ASSERT3P(ds->ds_key_mapping, !=, NULL);
+ key_mapping_rele(spa, ds->ds_key_mapping, ds);
dsl_dataset_sync_done(ds, tx);
+ }
mutex_enter(&ds->ds_lock);
ds->ds_owner = NULL;
@@ -1229,7 +1236,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
}
spa_history_log_internal_ds(ds, "create", tx, "");
- zvol_create_minors(dp->dp_spa, doca->doca_name, B_TRUE);
+ zvol_create_minors(spa, doca->doca_name, B_TRUE);
dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
dsl_dir_rele(pdd, FTAG);
@@ -1702,7 +1709,6 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
if (os->os_raw_receive ||
os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
ASSERT(os->os_encrypted);
- os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
arc_convert_to_raw(os->os_phys_buf,
os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER,
DMU_OT_OBJSET, NULL, NULL, NULL);
diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c
index 6937fc9e1..0eeb260c3 100644
--- a/module/zfs/dsl_crypt.c
+++ b/module/zfs/dsl_crypt.c
@@ -896,6 +896,20 @@ spa_keystore_unload_wkey(const char *dsname)
int ret = 0;
dsl_dir_t *dd = NULL;
dsl_pool_t *dp = NULL;
+ spa_t *spa = NULL;
+
+ ret = spa_open(dsname, &spa, FTAG);
+ if (ret != 0)
+ return (ret);
+
+ /*
+ * Wait for any outstanding txg IO to complete, releasing any
+ * remaining references on the wkey.
+ */
+ if (spa_mode(spa) != FREAD)
+ txg_wait_synced(spa->spa_dsl_pool, 0);
+
+ spa_close(spa, FTAG);
/* hold the dsl dir */
ret = dsl_pool_hold(dsname, FTAG, &dp);
@@ -935,9 +949,56 @@ error:
return (ret);
}
+void
+key_mapping_add_ref(dsl_key_mapping_t *km, void *tag)
+{
+ ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1);
+ zfs_refcount_add(&km->km_refcnt, tag);
+}
+
+/*
+ * The locking here is a little tricky to ensure we don't cause unnecessary
+ * performance problems. We want to release a key mapping whenever someone
+ * decrements the refcount to 0, but freeing the mapping requires removing
+ * it from the spa_keystore, which requires holding sk_km_lock as a writer.
+ * Most of the time we don't want to hold this lock as a writer, since the
+ * same lock is held as a reader for each IO that needs to encrypt / decrypt
+ * data for any dataset and in practice we will only actually free the
+ * mapping after unmounting a dataset.
+ */
+void
+key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, void *tag)
+{
+ ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1);
+
+ if (zfs_refcount_remove(&km->km_refcnt, tag) != 0)
+ return;
+
+ /*
+ * We think we are going to need to free the mapping. Add a
+ * reference to prevent most other releasers from thinking
+ * this might be their responsibility. This is inherently
+ * racy, so we will confirm that we are legitimately the
+ * last holder once we have the sk_km_lock as a writer.
+ */
+ zfs_refcount_add(&km->km_refcnt, FTAG);
+
+ rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER);
+ if (zfs_refcount_remove(&km->km_refcnt, FTAG) != 0) {
+ rw_exit(&spa->spa_keystore.sk_km_lock);
+ return;
+ }
+
+ avl_remove(&spa->spa_keystore.sk_key_mappings, km);
+ rw_exit(&spa->spa_keystore.sk_km_lock);
+
+ spa_keystore_dsl_key_rele(spa, km->km_key, km);
+ kmem_free(km, sizeof (dsl_key_mapping_t));
+}
+
int
-spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj,
- dsl_dir_t *dd, void *tag)
+spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag,
+ dsl_key_mapping_t **km_out)
{
int ret;
avl_index_t where;
@@ -948,14 +1009,17 @@ spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj,
km = kmem_zalloc(sizeof (dsl_key_mapping_t), KM_SLEEP);
zfs_refcount_create(&km->km_refcnt);
- ret = spa_keystore_dsl_key_hold_dd(spa, dd, km, &km->km_key);
+ ret = spa_keystore_dsl_key_hold_dd(spa, ds->ds_dir, km, &km->km_key);
if (ret != 0) {
zfs_refcount_destroy(&km->km_refcnt);
kmem_free(km, sizeof (dsl_key_mapping_t));
+
+ if (km_out != NULL)
+ *km_out = NULL;
return (ret);
}
- km->km_dsobj = dsobj;
+ km->km_dsobj = ds->ds_object;
rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER);
@@ -971,9 +1035,13 @@ spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj,
if (found_km != NULL) {
should_free = B_TRUE;
zfs_refcount_add(&found_km->km_refcnt, tag);
+ if (km_out != NULL)
+ *km_out = found_km;
} else {
zfs_refcount_add(&km->km_refcnt, tag);
avl_insert(&spa->spa_keystore.sk_key_mappings, km, where);
+ if (km_out != NULL)
+ *km_out = km;
}
rw_exit(&spa->spa_keystore.sk_km_lock);
@@ -988,24 +1056,16 @@ spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj,
}
int
-spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag)
-{
- return (spa_keystore_create_mapping_impl(spa, ds->ds_object,
- ds->ds_dir, tag));
-}
-
-int
spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag)
{
int ret;
dsl_key_mapping_t search_km;
dsl_key_mapping_t *found_km;
- boolean_t should_free = B_FALSE;
/* init the search key mapping */
search_km.km_dsobj = dsobj;
- rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER);
+ rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER);
/* find the matching mapping */
found_km = avl_find(&spa->spa_keystore.sk_key_mappings,
@@ -1015,23 +1075,9 @@ spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag)
goto error_unlock;
}
- /*
- * Decrement the refcount on the mapping and remove it from the tree if
- * it is zero. Try to minimize time spent in this lock by deferring
- * cleanup work.
- */
- if (zfs_refcount_remove(&found_km->km_refcnt, tag) == 0) {
- should_free = B_TRUE;
- avl_remove(&spa->spa_keystore.sk_key_mappings, found_km);
- }
-
rw_exit(&spa->spa_keystore.sk_km_lock);
- /* destroy the key mapping */
- if (should_free) {
- spa_keystore_dsl_key_rele(spa, found_km->km_key, found_km);
- kmem_free(found_km, sizeof (dsl_key_mapping_t));
- }
+ key_mapping_rele(spa, found_km, tag);
return (0);
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 36715b8a2..757b2922e 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -438,8 +438,8 @@ dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag)
}
int
-dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
- ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp)
+dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
+ dsl_dataset_t **dsp)
{
objset_t *mos = dp->dp_meta_objset;
dmu_buf_t *dbuf;
@@ -599,6 +599,7 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
}
}
}
+
ASSERT3P(ds->ds_dbuf, ==, dbuf);
ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data);
ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 ||
@@ -606,22 +607,40 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
*dsp = ds;
- if ((flags & DS_HOLD_FLAG_DECRYPT) && ds->ds_dir->dd_crypto_obj != 0) {
- err = spa_keystore_create_mapping(dp->dp_spa, ds, ds);
- if (err != 0) {
- dsl_dataset_rele(ds, tag);
- return (SET_ERROR(EACCES));
- }
- }
-
return (0);
}
int
-dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
- dsl_dataset_t **dsp)
+dsl_dataset_create_key_mapping(dsl_dataset_t *ds)
{
- return (dsl_dataset_hold_obj_flags(dp, dsobj, 0, tag, dsp));
+ dsl_dir_t *dd = ds->ds_dir;
+
+ if (dd->dd_crypto_obj == 0)
+ return (0);
+
+ return (spa_keystore_create_mapping(dd->dd_pool->dp_spa,
+ ds, ds, &ds->ds_key_mapping));
+}
+
+int
+dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp)
+{
+ int err;
+
+ err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
+ if (err != 0)
+ return (err);
+
+ ASSERT3P(*dsp, !=, NULL);
+
+ if (flags & DS_HOLD_FLAG_DECRYPT) {
+ err = dsl_dataset_create_key_mapping(*dsp);
+ if (err != 0)
+ dsl_dataset_rele(*dsp, tag);
+ }
+
+ return (err);
}
int
@@ -788,21 +807,30 @@ dsl_dataset_namelen(dsl_dataset_t *ds)
}
void
-dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
+dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
{
- if (ds->ds_dir != NULL && ds->ds_dir->dd_crypto_obj != 0 &&
- (flags & DS_HOLD_FLAG_DECRYPT)) {
- (void) spa_keystore_remove_mapping(ds->ds_dir->dd_pool->dp_spa,
- ds->ds_object, ds);
- }
-
dmu_buf_rele(ds->ds_dbuf, tag);
}
void
-dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
+dsl_dataset_remove_key_mapping(dsl_dataset_t *ds)
+{
+ dsl_dir_t *dd = ds->ds_dir;
+
+ if (dd == NULL || dd->dd_crypto_obj == 0)
+ return;
+
+ (void) spa_keystore_remove_mapping(dd->dd_pool->dp_spa,
+ ds->ds_object, ds);
+}
+
+void
+dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
{
- dsl_dataset_rele_flags(ds, 0, tag);
+ if (flags & DS_HOLD_FLAG_DECRYPT)
+ dsl_dataset_remove_key_mapping(ds);
+
+ dsl_dataset_rele(ds, tag);
}
void
@@ -1154,8 +1182,18 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
dp = ds->ds_dir->dd_pool;
if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
+ objset_t *os = ds->ds_objset;
+
/* up the hold count until we can be written out */
dmu_buf_add_ref(ds->ds_dbuf, ds);
+
+ /* if this dataset is encrypted, grab a reference to the DCK */
+ if (ds->ds_dir->dd_crypto_obj != 0 &&
+ !os->os_raw_receive &&
+ !os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
+ ASSERT3P(ds->ds_key_mapping, !=, NULL);
+ key_mapping_add_ref(ds->ds_key_mapping, ds);
+ }
}
}
@@ -1800,6 +1838,11 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
os->os_synced_dnodes = NULL;
}
+ if (os->os_encrypted)
+ os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
+ else
+ ASSERT0(os->os_next_write_raw[tx->tx_txg & TXG_MASK]);
+
ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
dmu_buf_rele(ds->ds_dbuf, ds);
diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c
index e8f519b18..bd2055710 100644
--- a/module/zfs/dsl_pool.c
+++ b/module/zfs/dsl_pool.c
@@ -516,7 +516,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp,
obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx);
/* create the root objset */
- VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
+ VERIFY0(dsl_dataset_hold_obj_flags(dp, obj,
+ DS_HOLD_FLAG_DECRYPT, FTAG, &ds));
#ifdef _KERNEL
{
objset_t *os;
@@ -527,7 +528,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp,
zfs_create_fs(os, kcred, zplprops, tx);
}
#endif
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
dmu_tx_commit(tx);
@@ -690,9 +691,22 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
*/
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
+ objset_t *os = ds->ds_objset;
+
ASSERT(list_link_active(&ds->ds_synced_link));
dmu_buf_rele(ds->ds_dbuf, ds);
dsl_dataset_sync(ds, zio, tx);
+
+ /*
+ * Release any key mappings created by calls to
+ * dsl_dataset_dirty() from the userquota accounting
+ * code paths.
+ */
+ if (os->os_encrypted && !os->os_raw_receive &&
+ !os->os_next_write_raw[txg & TXG_MASK]) {
+ ASSERT3P(ds->ds_key_mapping, !=, NULL);
+ key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds);
+ }
}
VERIFY0(zio_wait(zio));
@@ -702,8 +716,17 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
*
* - move dead blocks from the pending deadlist to the on-disk deadlist
* - release hold from dsl_dataset_dirty()
+ * - release key mapping hold from dsl_dataset_dirty()
*/
while ((ds = list_remove_head(&synced_datasets)) != NULL) {
+ objset_t *os = ds->ds_objset;
+
+ if (os->os_encrypted && !os->os_raw_receive &&
+ !os->os_next_write_raw[txg & TXG_MASK]) {
+ ASSERT3P(ds->ds_key_mapping, !=, NULL);
+ key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds);
+ }
+
dsl_dataset_sync_done(ds, tx);
}
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index ad8000914..a1851bca2 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -4995,7 +4995,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
uint64_t txg = TXG_INITIAL;
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
- uint64_t version, obj, root_dsobj = 0;
+ uint64_t version, obj;
boolean_t has_features;
boolean_t has_encryption;
spa_feature_t feat;
@@ -5249,27 +5249,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
dmu_tx_commit(tx);
- /*
- * If the root dataset is encrypted we will need to create key mappings
- * for the zio layer before we start to write any data to disk and hold
- * them until after the first txg has been synced. Waiting for the first
- * transaction to complete also ensures that our bean counters are
- * appropriately updated.
- */
- if (dp->dp_root_dir->dd_crypto_obj != 0) {
- root_dsobj = dsl_dir_phys(dp->dp_root_dir)->dd_head_dataset_obj;
- VERIFY0(spa_keystore_create_mapping_impl(spa, root_dsobj,
- dp->dp_root_dir, FTAG));
- }
-
spa->spa_sync_on = B_TRUE;
txg_sync_start(dp);
mmp_thread_start(spa);
txg_wait_synced(dp, txg);
- if (dp->dp_root_dir->dd_crypto_obj != 0)
- VERIFY0(spa_keystore_remove_mapping(spa, root_dsobj, FTAG));
-
spa_spawn_aux_threads(spa);
spa_write_cachefile(spa, B_FALSE, B_TRUE);
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 8b7aeb5c3..e247c1e4f 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -3236,8 +3236,8 @@ zil_suspend(const char *osname, void **cookiep)
* grabbing a reference to it. If the key isn't loaded we have no
* choice but to return an error until the wrapping key is loaded.
*/
- if (os->os_encrypted && spa_keystore_create_mapping(os->os_spa,
- dmu_objset_ds(os), FTAG) != 0) {
+ if (os->os_encrypted &&
+ dsl_dataset_create_key_mapping(dmu_objset_ds(os)) != 0) {
zilog->zl_suspend--;
mutex_exit(&zilog->zl_lock);
dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag);
@@ -3259,9 +3259,10 @@ zil_suspend(const char *osname, void **cookiep)
zil_commit_impl(zilog, 0);
/*
- * Now that we've ensured all lwb's are LWB_STATE_DONE, we use
- * txg_wait_synced() to ensure the data from the zilog has
- * migrated to the main pool before calling zil_destroy().
+ * Now that we've ensured all lwb's are LWB_STATE_DONE,
+ * txg_wait_synced() will be called from within zil_destroy(),
+ * which will ensure the data from the zilog has migrated to the
+ * main pool before it returns.
*/
txg_wait_synced(zilog->zl_dmu_pool, 0);
@@ -3272,19 +3273,8 @@ zil_suspend(const char *osname, void **cookiep)
cv_broadcast(&zilog->zl_cv_suspend);
mutex_exit(&zilog->zl_lock);
- if (os->os_encrypted) {
- /*
- * Encrypted datasets need to wait for all data to be
- * synced out before removing the mapping.
- *
- * XXX: Depending on the number of datasets with
- * outstanding ZIL data on a given log device, this
- * might cause spa_offline_log() to take a long time.
- */
- txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
- VERIFY0(spa_keystore_remove_mapping(os->os_spa,
- dmu_objset_id(os), FTAG));
- }
+ if (os->os_encrypted)
+ dsl_dataset_remove_key_mapping(dmu_objset_ds(os));
if (cookiep == NULL)
zil_resume(os);