aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/dmu_objset.c
diff options
context:
space:
mode:
authorTom Caputi <[email protected]>2017-08-14 13:36:48 -0400
committerBrian Behlendorf <[email protected]>2017-08-14 10:36:48 -0700
commitb52563034230b35f0562b6f40ad1a00f02bd9a05 (patch)
tree794ccc5160e997e280cb6e36c7778ce9f7a96548 /module/zfs/dmu_objset.c
parent376994828fd3753aba75d492859727ca76f6a293 (diff)
Native Encryption for ZFS on Linux
This change incorporates three major pieces: The first change is a keystore that manages wrapping and encryption keys for encrypted datasets. These commands mostly involve manipulating the new DSL Crypto Key ZAP Objects that live in the MOS. Each encrypted dataset has its own DSL Crypto Key that is protected with a user's key. This level of indirection allows users to change their keys without re-encrypting their entire datasets. The change implements the new subcommands "zfs load-key", "zfs unload-key" and "zfs change-key" which allow the user to manage their encryption keys and settings. In addition, several new flags and properties have been added to allow dataset creation and to make mounting and unmounting more convenient. The second piece of this patch provides the ability to encrypt, decyrpt, and authenticate protected datasets. Each object set maintains a Merkel tree of Message Authentication Codes that protect the lower layers, similarly to how checksums are maintained. This part impacts the zio layer, which handles the actual encryption and generation of MACs, as well as the ARC and DMU, which need to be able to handle encrypted buffers and protected data. The last addition is the ability to do raw, encrypted sends and receives. The idea here is to send raw encrypted and compressed data and receive it exactly as is on a backup system. This means that the dataset on the receiving system is protected using the same user key that is in use on the sending side. By doing so, datasets can be efficiently backed up to an untrusted system without fear of data being compromised. Reviewed by: Matthew Ahrens <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Jorgen Lundman <[email protected]> Signed-off-by: Tom Caputi <[email protected]> Closes #494 Closes #5769
Diffstat (limited to 'module/zfs/dmu_objset.c')
-rw-r--r--module/zfs/dmu_objset.c296
1 files changed, 239 insertions, 57 deletions
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 9a7a6968d..3faa299d1 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -56,6 +56,7 @@
#include <sys/vdev.h>
#include <sys/policy.h>
#include <sys/spa_impl.h>
+#include <sys/dmu_send.h>
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
@@ -391,16 +392,23 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
if (!BP_IS_HOLE(os->os_rootbp)) {
arc_flags_t aflags = ARC_FLAG_WAIT;
zbookmark_phys_t zb;
+ enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
if (DMU_OS_IS_L2CACHEABLE(os))
aflags |= ARC_FLAG_L2CACHE;
+ if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
+ ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+ ASSERT(BP_IS_AUTHENTICATED(bp));
+ zio_flags |= ZIO_FLAG_RAW;
+ }
+
dprintf_bp(os->os_rootbp, "reading %s", "");
err = arc_read(NULL, spa, os->os_rootbp,
arc_getbuf_func, &os->os_phys_buf,
- ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
+ ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
if (err != 0) {
kmem_free(os, sizeof (objset_t));
/* convert checksum errors into IO errors */
@@ -441,6 +449,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
if (ds != NULL) {
boolean_t needlock = B_FALSE;
+ os->os_encrypted = (ds->ds_dir->dd_crypto_obj != 0);
+
/*
* Note: it's valid to open the objset if the dataset is
* long-held, in which case the pool_config lock will not
@@ -450,6 +460,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
needlock = B_TRUE;
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
}
+
err = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
primary_cache_changed_cb, os);
@@ -517,6 +528,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
/* It's the meta-objset. */
os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
os->os_compress = ZIO_COMPRESS_ON;
+ os->os_encrypted = B_FALSE;
os->os_copies = spa_max_replication(spa);
os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
os->os_dedup_verify = B_FALSE;
@@ -603,16 +615,18 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
* can be held at a time.
*/
int
-dmu_objset_hold(const char *name, void *tag, objset_t **osp)
+dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
+ objset_t **osp)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
+ ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
err = dsl_pool_hold(name, tag, &dp);
if (err != 0)
return (err);
- err = dsl_dataset_hold(dp, name, tag, &ds);
+ err = dsl_dataset_hold_flags(dp, name, flags, tag, &ds);
if (err != 0) {
dsl_pool_rele(dp, tag);
return (err);
@@ -627,23 +641,38 @@ dmu_objset_hold(const char *name, void *tag, objset_t **osp)
return (err);
}
+int
+dmu_objset_hold(const char *name, void *tag, objset_t **osp)
+{
+ return (dmu_objset_hold_flags(name, B_FALSE, tag, osp));
+}
+
static int
dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp)
+ boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
{
int err;
err = dmu_objset_from_ds(ds, osp);
if (err != 0) {
- dsl_dataset_disown(ds, tag);
+ return (err);
} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
- dsl_dataset_disown(ds, tag);
return (SET_ERROR(EINVAL));
} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
- dsl_dataset_disown(ds, tag);
return (SET_ERROR(EROFS));
}
- return (err);
+
+ /* if we are decrypting, we can now check MACs in os->os_phys_buf */
+ if (decrypt && arc_is_unauthenticated((*osp)->os_phys_buf)) {
+ err = arc_untransform((*osp)->os_phys_buf, (*osp)->os_spa,
+ ds->ds_object, B_FALSE);
+ if (err != 0)
+ return (err);
+
+ ASSERT0(arc_is_unauthenticated((*osp)->os_phys_buf));
+ }
+
+ return (0);
}
/*
@@ -653,51 +682,73 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
*/
int
dmu_objset_own(const char *name, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp)
+ boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
+ ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
err = dsl_pool_hold(name, FTAG, &dp);
if (err != 0)
return (err);
- err = dsl_dataset_own(dp, name, tag, &ds);
+ err = dsl_dataset_own(dp, name, flags, tag, &ds);
if (err != 0) {
dsl_pool_rele(dp, FTAG);
return (err);
}
- err = dmu_objset_own_impl(ds, type, readonly, tag, osp);
+ err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp);
+ if (err != 0) {
+ dsl_dataset_disown(ds, flags, tag);
+ dsl_pool_rele(dp, FTAG);
+ return (err);
+ }
+
dsl_pool_rele(dp, FTAG);
- if (err == 0 && dmu_objset_userobjspace_upgradable(*osp))
+ if (dmu_objset_userobjspace_upgradable(*osp))
dmu_objset_userobjspace_upgrade(*osp);
- return (err);
+ return (0);
}
int
dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp)
+ boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
{
dsl_dataset_t *ds;
int err;
+ ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
- err = dsl_dataset_own_obj(dp, obj, tag, &ds);
+ err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds);
if (err != 0)
return (err);
- return (dmu_objset_own_impl(ds, type, readonly, tag, osp));
+ err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp);
+ if (err != 0) {
+ dsl_dataset_disown(ds, flags, tag);
+ return (err);
+ }
+
+ return (0);
}
void
-dmu_objset_rele(objset_t *os, void *tag)
+dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag)
{
+ ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+
dsl_pool_t *dp = dmu_objset_pool(os);
- dsl_dataset_rele(os->os_dsl_dataset, tag);
+ dsl_dataset_rele_flags(os->os_dsl_dataset, flags, tag);
dsl_pool_rele(dp, tag);
}
+void
+dmu_objset_rele(objset_t *os, void *tag)
+{
+ dmu_objset_rele_flags(os, B_FALSE, tag);
+}
+
/*
* When we are called, os MUST refer to an objset associated with a dataset
* that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
@@ -710,7 +761,7 @@ dmu_objset_rele(objset_t *os, void *tag)
* same name so that it can be partially torn down and reconstructed.
*/
void
-dmu_objset_refresh_ownership(objset_t *os, void *tag)
+dmu_objset_refresh_ownership(objset_t *os, boolean_t decrypt, void *tag)
{
dsl_pool_t *dp;
dsl_dataset_t *ds, *newds;
@@ -724,20 +775,22 @@ dmu_objset_refresh_ownership(objset_t *os, void *tag)
dsl_dataset_name(ds, name);
dp = dmu_objset_pool(os);
dsl_pool_config_enter(dp, FTAG);
- dmu_objset_disown(os, tag);
- VERIFY0(dsl_dataset_own(dp, name, tag, &newds));
+ dmu_objset_disown(os, decrypt, tag);
+ VERIFY0(dsl_dataset_own(dp, name,
+ (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, &newds));
VERIFY3P(newds, ==, os->os_dsl_dataset);
dsl_pool_config_exit(dp, FTAG);
}
void
-dmu_objset_disown(objset_t *os, void *tag)
+dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag)
{
/*
* Stop upgrading thread
*/
dmu_objset_upgrade_stop(os);
- dsl_dataset_disown(os->os_dsl_dataset, tag);
+ dsl_dataset_disown(os->os_dsl_dataset,
+ (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag);
}
void
@@ -820,6 +873,8 @@ dmu_objset_evict(objset_t *os)
} else {
mutex_exit(&os->os_lock);
}
+
+
}
void
@@ -866,16 +921,20 @@ dmu_objset_snap_cmtime(objset_t *os)
return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
}
-/* called from dsl for meta-objset */
objset_t *
-dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
- dmu_objset_type_t type, dmu_tx_t *tx)
+dmu_objset_create_impl_dnstats(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
+ dmu_objset_type_t type, int levels, int blksz, int ibs, dmu_tx_t *tx)
{
objset_t *os;
dnode_t *mdn;
ASSERT(dmu_tx_is_syncing(tx));
+ if (blksz == 0)
+ blksz = DNODE_BLOCK_SIZE;
+ if (blksz == 0)
+ ibs = DN_MAX_INDBLKSHIFT;
+
if (ds != NULL)
VERIFY0(dmu_objset_from_ds(ds, &os));
else
@@ -883,8 +942,8 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
mdn = DMU_META_DNODE(os);
- dnode_allocate(mdn, DMU_OT_DNODE, DNODE_BLOCK_SIZE, DN_MAX_INDBLKSHIFT,
- DMU_OT_NONE, 0, DNODE_MIN_SLOTS, tx);
+ dnode_allocate(mdn, DMU_OT_DNODE, blksz, ibs, DMU_OT_NONE, 0,
+ DNODE_MIN_SLOTS, tx);
/*
* We don't want to have to increase the meta-dnode's nlevels
@@ -898,22 +957,25 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
* to convergence, so minimizing its dn_nlevels matters.
*/
if (ds != NULL) {
- int levels = 1;
-
- /*
- * Determine the number of levels necessary for the meta-dnode
- * to contain DN_MAX_OBJECT dnodes. Note that in order to
- * ensure that we do not overflow 64 bits, there has to be
- * a nlevels that gives us a number of blocks > DN_MAX_OBJECT
- * but < 2^64. Therefore,
- * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) (10) must be
- * less than (64 - log2(DN_MAX_OBJECT)) (16).
- */
- while ((uint64_t)mdn->dn_nblkptr <<
- (mdn->dn_datablkshift - DNODE_SHIFT +
- (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
- DN_MAX_OBJECT)
- levels++;
+ if (levels == 0) {
+ levels = 1;
+
+ /*
+ * Determine the number of levels necessary for the
+ * meta-dnode to contain DN_MAX_OBJECT dnodes. Note
+ * that in order to ensure that we do not overflow
+ * 64 bits, there has to be a nlevels that gives us a
+ * number of blocks > DN_MAX_OBJECT but < 2^64.
+ * Therefore, (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)
+ * (10) must be less than (64 - log2(DN_MAX_OBJECT))
+ * (16).
+ */
+ while ((uint64_t)mdn->dn_nblkptr <<
+ (mdn->dn_datablkshift - DNODE_SHIFT + (levels - 1) *
+ (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
+ DN_MAX_OBJECT)
+ levels++;
+ }
mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
mdn->dn_nlevels = levels;
@@ -923,7 +985,13 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
ASSERT(type != DMU_OST_ANY);
ASSERT(type < DMU_OST_NUMTYPES);
os->os_phys->os_type = type;
- if (dmu_objset_userused_enabled(os)) {
+
+ /*
+ * Enable user accounting if it is enabled and this is not an
+ * encrypted receive.
+ */
+ if (dmu_objset_userused_enabled(os) &&
+ (!os->os_encrypted || !dmu_objset_is_receiving(os))) {
os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
if (dmu_objset_userobjused_enabled(os)) {
ds->ds_feature_activation_needed[
@@ -939,6 +1007,14 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
return (os);
}
+/* called from dsl for meta-objset */
+objset_t *
+dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
+ dmu_objset_type_t type, dmu_tx_t *tx)
+{
+ return (dmu_objset_create_impl_dnstats(spa, ds, bp, type, 0, 0, 0, tx));
+}
+
typedef struct dmu_objset_create_arg {
const char *doca_name;
cred_t *doca_cred;
@@ -947,6 +1023,7 @@ typedef struct dmu_objset_create_arg {
void *doca_userarg;
dmu_objset_type_t doca_type;
uint64_t doca_flags;
+ dsl_crypto_params_t *doca_dcp;
} dmu_objset_create_arg_t;
/*ARGSUSED*/
@@ -972,8 +1049,16 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
dsl_dir_rele(pdd, FTAG);
return (SET_ERROR(EEXIST));
}
+
+ error = dmu_objset_create_crypt_check(pdd, doca->doca_dcp);
+ if (error != 0) {
+ dsl_dir_rele(pdd, FTAG);
+ return (error);
+ }
+
error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
doca->doca_cred);
+
dsl_dir_rele(pdd, FTAG);
return (error);
@@ -990,13 +1075,15 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
uint64_t obj;
blkptr_t *bp;
objset_t *os;
+ zio_t *rzio;
VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail));
obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags,
- doca->doca_cred, tx);
+ doca->doca_cred, doca->doca_dcp, tx);
- VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
+ VERIFY0(dsl_dataset_hold_obj_flags(pdd->dd_pool, obj,
+ DS_HOLD_FLAG_DECRYPT, FTAG, &ds));
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
bp = dsl_dataset_get_blkptr(ds);
os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
@@ -1008,18 +1095,56 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
doca->doca_cred, tx);
}
+ /*
+ * The doca_userfunc() will write out some data that needs to be
+ * encrypted if the dataset is encrypted (specifically the root
+ * directory). This data must be written out before the encryption
+ * key mapping is removed by dsl_dataset_rele_flags(). Force the
+ * I/O to occur immediately by invoking the relevant sections of
+ * dsl_pool_sync().
+ */
+ if (os->os_encrypted) {
+ dsl_dataset_t *tmpds = NULL;
+ boolean_t need_sync_done = B_FALSE;
+
+ rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+ tmpds = txg_list_remove(&dp->dp_dirty_datasets, tx->tx_txg);
+ if (tmpds != NULL) {
+ ASSERT3P(ds, ==, tmpds);
+ dsl_dataset_sync(ds, rzio, tx);
+ need_sync_done = B_TRUE;
+ }
+ VERIFY0(zio_wait(rzio));
+
+ dmu_objset_do_userquota_updates(os, tx);
+ taskq_wait(dp->dp_sync_taskq);
+
+ rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+ tmpds = txg_list_remove(&dp->dp_dirty_datasets, tx->tx_txg);
+ if (tmpds != NULL) {
+ ASSERT3P(ds, ==, tmpds);
+ dmu_buf_rele(ds->ds_dbuf, ds);
+ dsl_dataset_sync(ds, rzio, tx);
+ }
+ VERIFY0(zio_wait(rzio));
+
+ if (need_sync_done)
+ dsl_dataset_sync_done(ds, tx);
+ }
+
spa_history_log_internal_ds(ds, "create", tx, "");
zvol_create_minors(dp->dp_spa, doca->doca_name, B_TRUE);
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
dsl_dir_rele(pdd, FTAG);
}
int
dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
- void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
+ dsl_crypto_params_t *dcp, dmu_objset_create_sync_func_t func, void *arg)
{
dmu_objset_create_arg_t doca;
+ dsl_crypto_params_t tmp_dcp = { 0 };
doca.doca_name = name;
doca.doca_cred = CRED();
@@ -1028,9 +1153,19 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
doca.doca_userarg = arg;
doca.doca_type = type;
+ /*
+ * Some callers (mostly for testing) do not provide a dcp on their
+ * own but various code inside the sync task will require it to be
+ * allocated. Rather than adding NULL checks throughout this code
+ * or adding dummy dcp's to all of the callers we simply create a
+ * dummy one here and use that. This zero dcp will have the same
+ * effect as asking for inheritence of all encryption params.
+ */
+ doca.doca_dcp = (dcp != NULL) ? dcp : &tmp_dcp;
+
return (dsl_sync_task(name,
dmu_objset_create_check, dmu_objset_create_sync, &doca,
- 5, ZFS_SPACE_CHECK_NORMAL));
+ 6, ZFS_SPACE_CHECK_NORMAL));
}
typedef struct dmu_objset_clone_arg {
@@ -1070,18 +1205,29 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
dsl_dir_rele(pdd, FTAG);
return (SET_ERROR(EDQUOT));
}
- dsl_dir_rele(pdd, FTAG);
error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
- if (error != 0)
+ if (error != 0) {
+ dsl_dir_rele(pdd, FTAG);
return (error);
+ }
/* You can only clone snapshots, not the head datasets. */
if (!origin->ds_is_snapshot) {
dsl_dataset_rele(origin, FTAG);
+ dsl_dir_rele(pdd, FTAG);
return (SET_ERROR(EINVAL));
}
+
+ error = dmu_objset_clone_crypt_check(pdd, origin->ds_dir);
+ if (error != 0) {
+ dsl_dataset_rele(origin, FTAG);
+ dsl_dir_rele(pdd, FTAG);
+ return (error);
+ }
+
dsl_dataset_rele(origin, FTAG);
+ dsl_dir_rele(pdd, FTAG);
return (0);
}
@@ -1101,7 +1247,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
obj = dsl_dataset_create_sync(pdd, tail, origin, 0,
- doca->doca_cred, tx);
+ doca->doca_cred, NULL, tx);
VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
dsl_dataset_name(origin, namebuf);
@@ -1124,7 +1270,7 @@ dmu_objset_clone(const char *clone, const char *origin)
return (dsl_sync_task(clone,
dmu_objset_clone_check, dmu_objset_clone_sync, &doca,
- 5, ZFS_SPACE_CHECK_NORMAL));
+ 6, ZFS_SPACE_CHECK_NORMAL));
}
int
@@ -1232,6 +1378,7 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
blkptr_t *bp = zio->io_bp;
objset_t *os = arg;
dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
+ uint64_t fill = 0;
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
@@ -1243,9 +1390,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
* objects that are stored in the objset_phys_t -- the meta
* dnode and user/group accounting objects).
*/
- bp->blk_fill = 0;
for (i = 0; i < dnp->dn_nblkptr; i++)
- bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
+ fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
+
+ BP_SET_FILL(bp, fill);
+
if (os->os_dsl_dataset != NULL)
rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG);
*os->os_rootbp = *bp;
@@ -1334,6 +1483,19 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
dmu_write_policy(os, NULL, 0, 0, &zp);
+ /*
+ * If we are either claiming the ZIL or doing a raw receive write out
+ * the os_phys_buf raw. Neither of these actions will effect the MAC
+ * at this point.
+ */
+ if (arc_is_unauthenticated(os->os_phys_buf) || os->os_next_write_raw) {
+ ASSERT(os->os_encrypted);
+ os->os_next_write_raw = B_FALSE;
+ arc_convert_to_raw(os->os_phys_buf,
+ os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER,
+ DMU_OT_OBJSET, NULL, NULL, NULL);
+ }
+
zio = arc_write(pio, os->os_spa, tx->tx_txg,
blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
@@ -1357,7 +1519,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
txgoff = tx->tx_txg & TXG_MASK;
- if (dmu_objset_userused_enabled(os)) {
+ if (dmu_objset_userused_enabled(os) &&
+ (!os->os_encrypted || !dmu_objset_is_receiving(os))) {
/*
* We must create the list here because it uses the
* dn_dirty_link[] of this txg. But it may already
@@ -1637,6 +1800,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
if (!dmu_objset_userused_enabled(os))
return;
+ /* if this is a raw receive just return and handle accounting later */
+ if (os->os_encrypted && dmu_objset_is_receiving(os))
+ return;
+
/* Allocate the user/groupused objects if necessary. */
if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
VERIFY0(zap_create_claim(os,
@@ -1716,6 +1883,18 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
if (!dmu_objset_userused_enabled(dn->dn_objset))
return;
+ /*
+ * Raw receives introduce a problem with user accounting. Raw
+ * receives cannot update the user accounting info because the
+ * user ids and the sizes are encrypted. To guarantee that we
+ * never end up with bad user accounting, we simply disable it
+ * during raw receives. We also disable this for normal receives
+ * so that an incremental raw receive may be done on top of an
+ * existing non-raw receive.
+ */
+ if (os->os_encrypted && dmu_objset_is_receiving(os))
+ return;
+
if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
DN_ID_CHKED_SPILL)))
return;
@@ -2493,8 +2672,10 @@ EXPORT_SYMBOL(dmu_objset_ds);
EXPORT_SYMBOL(dmu_objset_type);
EXPORT_SYMBOL(dmu_objset_name);
EXPORT_SYMBOL(dmu_objset_hold);
+EXPORT_SYMBOL(dmu_objset_hold_flags);
EXPORT_SYMBOL(dmu_objset_own);
EXPORT_SYMBOL(dmu_objset_rele);
+EXPORT_SYMBOL(dmu_objset_rele_flags);
EXPORT_SYMBOL(dmu_objset_disown);
EXPORT_SYMBOL(dmu_objset_from_ds);
EXPORT_SYMBOL(dmu_objset_create);
@@ -2512,6 +2693,7 @@ EXPORT_SYMBOL(dmu_objset_dnodesize);
EXPORT_SYMBOL(dmu_objset_sync);
EXPORT_SYMBOL(dmu_objset_is_dirty);
+EXPORT_SYMBOL(dmu_objset_create_impl_dnstats);
EXPORT_SYMBOL(dmu_objset_create_impl);
EXPORT_SYMBOL(dmu_objset_open_impl);
EXPORT_SYMBOL(dmu_objset_evict);