diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/dmu_objset.c | 20 | ||||
-rw-r--r-- | module/zfs/dsl_crypt.c | 102 | ||||
-rw-r--r-- | module/zfs/dsl_dataset.c | 87 | ||||
-rw-r--r-- | module/zfs/dsl_pool.c | 27 | ||||
-rw-r--r-- | module/zfs/spa.c | 18 | ||||
-rw-r--r-- | module/zfs/zil.c | 26 |
6 files changed, 186 insertions, 94 deletions
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 3c9a817f7..086a65114 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1157,6 +1157,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) { dmu_objset_create_arg_t *doca = arg; dsl_pool_t *dp = dmu_tx_pool(tx); + spa_t *spa = dp->dp_spa; dsl_dir_t *pdd; const char *tail; dsl_dataset_t *ds; @@ -1174,8 +1175,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) DS_HOLD_FLAG_DECRYPT, FTAG, &ds)); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); bp = dsl_dataset_get_blkptr(ds); - os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, - ds, bp, doca->doca_type, tx); + os = dmu_objset_create_impl(spa, ds, bp, doca->doca_type, tx); rrw_exit(&ds->ds_bp_rwlock, FTAG); if (doca->doca_userfunc != NULL) { @@ -1199,7 +1199,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) ds->ds_owner = FTAG; mutex_exit(&ds->ds_lock); - rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds, tx->tx_txg); if (tmpds != NULL) { @@ -1210,8 +1210,12 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) dmu_objset_do_userquota_updates(os, tx); taskq_wait(dp->dp_sync_taskq); + if (txg_list_member(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { + ASSERT3P(ds->ds_key_mapping, !=, NULL); + key_mapping_rele(spa, ds->ds_key_mapping, ds); + } - rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds, tx->tx_txg); if (tmpds != NULL) { @@ -1220,8 +1224,11 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) } VERIFY0(zio_wait(rzio)); - if (need_sync_done) + if (need_sync_done) { + ASSERT3P(ds->ds_key_mapping, !=, NULL); + key_mapping_rele(spa, ds->ds_key_mapping, ds); dsl_dataset_sync_done(ds, tx); + } mutex_enter(&ds->ds_lock); ds->ds_owner = NULL; @@ -1229,7 +1236,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) } spa_history_log_internal_ds(ds, "create", tx, ""); - zvol_create_minors(dp->dp_spa, doca->doca_name, B_TRUE); + zvol_create_minors(spa, doca->doca_name, B_TRUE); dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); dsl_dir_rele(pdd, FTAG); @@ -1702,7 +1709,6 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) if (os->os_raw_receive || os->os_next_write_raw[tx->tx_txg & TXG_MASK]) { ASSERT(os->os_encrypted); - os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE; arc_convert_to_raw(os->os_phys_buf, os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER, DMU_OT_OBJSET, NULL, NULL, NULL); diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 6937fc9e1..0eeb260c3 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -896,6 +896,20 @@ spa_keystore_unload_wkey(const char *dsname) int ret = 0; dsl_dir_t *dd = NULL; dsl_pool_t *dp = NULL; + spa_t *spa = NULL; + + ret = spa_open(dsname, &spa, FTAG); + if (ret != 0) + return (ret); + + /* + * Wait for any outstanding txg IO to complete, releasing any + * remaining references on the wkey. + */ + if (spa_mode(spa) != FREAD) + txg_wait_synced(spa->spa_dsl_pool, 0); + + spa_close(spa, FTAG); /* hold the dsl dir */ ret = dsl_pool_hold(dsname, FTAG, &dp); @@ -935,9 +949,56 @@ error: return (ret); } +void +key_mapping_add_ref(dsl_key_mapping_t *km, void *tag) +{ + ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1); + zfs_refcount_add(&km->km_refcnt, tag); +} + +/* + * The locking here is a little tricky to ensure we don't cause unnecessary + * performance problems. We want to release a key mapping whenever someone + * decrements the refcount to 0, but freeing the mapping requires removing + * it from the spa_keystore, which requires holding sk_km_lock as a writer. + * Most of the time we don't want to hold this lock as a writer, since the + * same lock is held as a reader for each IO that needs to encrypt / decrypt + * data for any dataset and in practice we will only actually free the + * mapping after unmounting a dataset. + */ +void +key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, void *tag) +{ + ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1); + + if (zfs_refcount_remove(&km->km_refcnt, tag) != 0) + return; + + /* + * We think we are going to need to free the mapping. Add a + * reference to prevent most other releasers from thinking + * this might be their responsibility. This is inherently + * racy, so we will confirm that we are legitimately the + * last holder once we have the sk_km_lock as a writer. + */ + zfs_refcount_add(&km->km_refcnt, FTAG); + + rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); + if (zfs_refcount_remove(&km->km_refcnt, FTAG) != 0) { + rw_exit(&spa->spa_keystore.sk_km_lock); + return; + } + + avl_remove(&spa->spa_keystore.sk_key_mappings, km); + rw_exit(&spa->spa_keystore.sk_km_lock); + + spa_keystore_dsl_key_rele(spa, km->km_key, km); + kmem_free(km, sizeof (dsl_key_mapping_t)); +} + int -spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, - dsl_dir_t *dd, void *tag) +spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag, + dsl_key_mapping_t **km_out) { int ret; avl_index_t where; @@ -948,14 +1009,17 @@ spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, km = kmem_zalloc(sizeof (dsl_key_mapping_t), KM_SLEEP); zfs_refcount_create(&km->km_refcnt); - ret = spa_keystore_dsl_key_hold_dd(spa, dd, km, &km->km_key); + ret = spa_keystore_dsl_key_hold_dd(spa, ds->ds_dir, km, &km->km_key); if (ret != 0) { zfs_refcount_destroy(&km->km_refcnt); kmem_free(km, sizeof (dsl_key_mapping_t)); + + if (km_out != NULL) + *km_out = NULL; return (ret); } - km->km_dsobj = dsobj; + km->km_dsobj = ds->ds_object; rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); @@ -971,9 +1035,13 @@ spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, if (found_km != NULL) { should_free = B_TRUE; zfs_refcount_add(&found_km->km_refcnt, tag); + if (km_out != NULL) + *km_out = found_km; } else { zfs_refcount_add(&km->km_refcnt, tag); avl_insert(&spa->spa_keystore.sk_key_mappings, km, where); + if (km_out != NULL) + *km_out = km; } rw_exit(&spa->spa_keystore.sk_km_lock); @@ -988,24 +1056,16 @@ spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, } int -spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag) -{ - return (spa_keystore_create_mapping_impl(spa, ds->ds_object, - ds->ds_dir, tag)); -} - -int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag) { int ret; dsl_key_mapping_t search_km; dsl_key_mapping_t *found_km; - boolean_t should_free = B_FALSE; /* init the search key mapping */ search_km.km_dsobj = dsobj; - rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); + rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER); /* find the matching mapping */ found_km = avl_find(&spa->spa_keystore.sk_key_mappings, @@ -1015,23 +1075,9 @@ spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag) goto error_unlock; } - /* - * Decrement the refcount on the mapping and remove it from the tree if - * it is zero. Try to minimize time spent in this lock by deferring - * cleanup work. - */ - if (zfs_refcount_remove(&found_km->km_refcnt, tag) == 0) { - should_free = B_TRUE; - avl_remove(&spa->spa_keystore.sk_key_mappings, found_km); - } - rw_exit(&spa->spa_keystore.sk_km_lock); - /* destroy the key mapping */ - if (should_free) { - spa_keystore_dsl_key_rele(spa, found_km->km_key, found_km); - kmem_free(found_km, sizeof (dsl_key_mapping_t)); - } + key_mapping_rele(spa, found_km, tag); return (0); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 36715b8a2..757b2922e 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -438,8 +438,8 @@ dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag) } int -dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj, - ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) +dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, + dsl_dataset_t **dsp) { objset_t *mos = dp->dp_meta_objset; dmu_buf_t *dbuf; @@ -599,6 +599,7 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj, } } } + ASSERT3P(ds->ds_dbuf, ==, dbuf); ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data); ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 || @@ -606,22 +607,40 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj, dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); *dsp = ds; - if ((flags & DS_HOLD_FLAG_DECRYPT) && ds->ds_dir->dd_crypto_obj != 0) { - err = spa_keystore_create_mapping(dp->dp_spa, ds, ds); - if (err != 0) { - dsl_dataset_rele(ds, tag); - return (SET_ERROR(EACCES)); - } - } - return (0); } int -dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, - dsl_dataset_t **dsp) +dsl_dataset_create_key_mapping(dsl_dataset_t *ds) { - return (dsl_dataset_hold_obj_flags(dp, dsobj, 0, tag, dsp)); + dsl_dir_t *dd = ds->ds_dir; + + if (dd->dd_crypto_obj == 0) + return (0); + + return (spa_keystore_create_mapping(dd->dd_pool->dp_spa, + ds, ds, &ds->ds_key_mapping)); +} + +int +dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) +{ + int err; + + err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); + if (err != 0) + return (err); + + ASSERT3P(*dsp, !=, NULL); + + if (flags & DS_HOLD_FLAG_DECRYPT) { + err = dsl_dataset_create_key_mapping(*dsp); + if (err != 0) + dsl_dataset_rele(*dsp, tag); + } + + return (err); } int @@ -788,21 +807,30 @@ dsl_dataset_namelen(dsl_dataset_t *ds) } void -dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag) +dsl_dataset_rele(dsl_dataset_t *ds, void *tag) { - if (ds->ds_dir != NULL && ds->ds_dir->dd_crypto_obj != 0 && - (flags & DS_HOLD_FLAG_DECRYPT)) { - (void) spa_keystore_remove_mapping(ds->ds_dir->dd_pool->dp_spa, - ds->ds_object, ds); - } - dmu_buf_rele(ds->ds_dbuf, tag); } void -dsl_dataset_rele(dsl_dataset_t *ds, void *tag) +dsl_dataset_remove_key_mapping(dsl_dataset_t *ds) +{ + dsl_dir_t *dd = ds->ds_dir; + + if (dd == NULL || dd->dd_crypto_obj == 0) + return; + + (void) spa_keystore_remove_mapping(dd->dd_pool->dp_spa, + ds->ds_object, ds); +} + +void +dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag) { - dsl_dataset_rele_flags(ds, 0, tag); + if (flags & DS_HOLD_FLAG_DECRYPT) + dsl_dataset_remove_key_mapping(ds); + + dsl_dataset_rele(ds, tag); } void @@ -1154,8 +1182,18 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) dp = ds->ds_dir->dd_pool; if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { + objset_t *os = ds->ds_objset; + /* up the hold count until we can be written out */ dmu_buf_add_ref(ds->ds_dbuf, ds); + + /* if this dataset is encrypted, grab a reference to the DCK */ + if (ds->ds_dir->dd_crypto_obj != 0 && + !os->os_raw_receive && + !os->os_next_write_raw[tx->tx_txg & TXG_MASK]) { + ASSERT3P(ds->ds_key_mapping, !=, NULL); + key_mapping_add_ref(ds->ds_key_mapping, ds); + } } } @@ -1800,6 +1838,11 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx) os->os_synced_dnodes = NULL; } + if (os->os_encrypted) + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE; + else + ASSERT0(os->os_next_write_raw[tx->tx_txg & TXG_MASK]); + ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx))); dmu_buf_rele(ds->ds_dbuf, ds); diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index e8f519b18..bd2055710 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -516,7 +516,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp, obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx); /* create the root objset */ - VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); + VERIFY0(dsl_dataset_hold_obj_flags(dp, obj, + DS_HOLD_FLAG_DECRYPT, FTAG, &ds)); #ifdef _KERNEL { objset_t *os; @@ -527,7 +528,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp, zfs_create_fs(os, kcred, zplprops, tx); } #endif - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); dmu_tx_commit(tx); @@ -690,9 +691,22 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) */ zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) { + objset_t *os = ds->ds_objset; + ASSERT(list_link_active(&ds->ds_synced_link)); dmu_buf_rele(ds->ds_dbuf, ds); dsl_dataset_sync(ds, zio, tx); + + /* + * Release any key mappings created by calls to + * dsl_dataset_dirty() from the userquota accounting + * code paths. + */ + if (os->os_encrypted && !os->os_raw_receive && + !os->os_next_write_raw[txg & TXG_MASK]) { + ASSERT3P(ds->ds_key_mapping, !=, NULL); + key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds); + } } VERIFY0(zio_wait(zio)); @@ -702,8 +716,17 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) * * - move dead blocks from the pending deadlist to the on-disk deadlist * - release hold from dsl_dataset_dirty() + * - release key mapping hold from dsl_dataset_dirty() */ while ((ds = list_remove_head(&synced_datasets)) != NULL) { + objset_t *os = ds->ds_objset; + + if (os->os_encrypted && !os->os_raw_receive && + !os->os_next_write_raw[txg & TXG_MASK]) { + ASSERT3P(ds->ds_key_mapping, !=, NULL); + key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds); + } + dsl_dataset_sync_done(ds, tx); } diff --git a/module/zfs/spa.c b/module/zfs/spa.c index ad8000914..a1851bca2 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -4995,7 +4995,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, uint64_t txg = TXG_INITIAL; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; - uint64_t version, obj, root_dsobj = 0; + uint64_t version, obj; boolean_t has_features; boolean_t has_encryption; spa_feature_t feat; @@ -5249,27 +5249,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, dmu_tx_commit(tx); - /* - * If the root dataset is encrypted we will need to create key mappings - * for the zio layer before we start to write any data to disk and hold - * them until after the first txg has been synced. Waiting for the first - * transaction to complete also ensures that our bean counters are - * appropriately updated. - */ - if (dp->dp_root_dir->dd_crypto_obj != 0) { - root_dsobj = dsl_dir_phys(dp->dp_root_dir)->dd_head_dataset_obj; - VERIFY0(spa_keystore_create_mapping_impl(spa, root_dsobj, - dp->dp_root_dir, FTAG)); - } - spa->spa_sync_on = B_TRUE; txg_sync_start(dp); mmp_thread_start(spa); txg_wait_synced(dp, txg); - if (dp->dp_root_dir->dd_crypto_obj != 0) - VERIFY0(spa_keystore_remove_mapping(spa, root_dsobj, FTAG)); - spa_spawn_aux_threads(spa); spa_write_cachefile(spa, B_FALSE, B_TRUE); diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 8b7aeb5c3..e247c1e4f 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -3236,8 +3236,8 @@ zil_suspend(const char *osname, void **cookiep) * grabbing a reference to it. If the key isn't loaded we have no * choice but to return an error until the wrapping key is loaded. */ - if (os->os_encrypted && spa_keystore_create_mapping(os->os_spa, - dmu_objset_ds(os), FTAG) != 0) { + if (os->os_encrypted && + dsl_dataset_create_key_mapping(dmu_objset_ds(os)) != 0) { zilog->zl_suspend--; mutex_exit(&zilog->zl_lock); dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag); @@ -3259,9 +3259,10 @@ zil_suspend(const char *osname, void **cookiep) zil_commit_impl(zilog, 0); /* - * Now that we've ensured all lwb's are LWB_STATE_DONE, we use - * txg_wait_synced() to ensure the data from the zilog has - * migrated to the main pool before calling zil_destroy(). + * Now that we've ensured all lwb's are LWB_STATE_DONE, + * txg_wait_synced() will be called from within zil_destroy(), + * which will ensure the data from the zilog has migrated to the + * main pool before it returns. */ txg_wait_synced(zilog->zl_dmu_pool, 0); @@ -3272,19 +3273,8 @@ zil_suspend(const char *osname, void **cookiep) cv_broadcast(&zilog->zl_cv_suspend); mutex_exit(&zilog->zl_lock); - if (os->os_encrypted) { - /* - * Encrypted datasets need to wait for all data to be - * synced out before removing the mapping. - * - * XXX: Depending on the number of datasets with - * outstanding ZIL data on a given log device, this - * might cause spa_offline_log() to take a long time. - */ - txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); - VERIFY0(spa_keystore_remove_mapping(os->os_spa, - dmu_objset_id(os), FTAG)); - } + if (os->os_encrypted) + dsl_dataset_remove_key_mapping(dmu_objset_ds(os)); if (cookiep == NULL) zil_resume(os); |