diff options
Diffstat (limited to 'zfs/lib/libzpool/dmu_objset.c')
-rw-r--r-- | zfs/lib/libzpool/dmu_objset.c | 343 |
1 files changed, 211 insertions, 132 deletions
diff --git a/zfs/lib/libzpool/dmu_objset.c b/zfs/lib/libzpool/dmu_objset.c index 829df52a4..7981e0682 100644 --- a/zfs/lib/libzpool/dmu_objset.c +++ b/zfs/lib/libzpool/dmu_objset.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "@(#)dmu_objset.c 1.37 08/04/27 SMI" - #include <sys/cred.h> #include <sys/zfs_context.h> #include <sys/dmu_objset.h> @@ -133,6 +131,34 @@ copies_changed_cb(void *arg, uint64_t newval) osi->os_copies = newval; } +static void +primary_cache_changed_cb(void *arg, uint64_t newval) +{ + objset_impl_t *osi = arg; + + /* + * Inheritance and range checking should have been done by now. + */ + ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || + newval == ZFS_CACHE_METADATA); + + osi->os_primary_cache = newval; +} + +static void +secondary_cache_changed_cb(void *arg, uint64_t newval) +{ + objset_impl_t *osi = arg; + + /* + * Inheritance and range checking should have been done by now. + */ + ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || + newval == ZFS_CACHE_METADATA); + + osi->os_secondary_cache = newval; +} + void dmu_objset_byteswap(void *buf, size_t size) { @@ -149,7 +175,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *osi; - int i, err, checksum; + int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); @@ -165,19 +191,26 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; + if (DMU_OS_IS_L2CACHEABLE(osi)) + aflags |= ARC_L2CACHE; dprintf_bp(osi->os_rootbp, "reading %s", ""); - err = arc_read(NULL, spa, osi->os_rootbp, - dmu_ot[DMU_OT_OBJSET].ot_byteswap, + /* + * NB: when bprewrite scrub can change the bp, + * and this is called from dmu_objset_open_ds_os, the bp + * could change, and we'll need a lock. + */ + err = arc_read_nolock(NULL, spa, osi->os_rootbp, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); + /* convert checksum errors into IO errors */ + if (err == ECKSUM) + err = EIO; return (err); } osi->os_phys = osi->os_phys_buf->b_data; - if (ds == NULL || dsl_dataset_is_snapshot(ds) == 0) - arc_release(osi->os_phys_buf, &osi->os_phys_buf); } else { osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); @@ -188,18 +221,26 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the - * default (fletcher2/off). Snapshots don't need to know, and - * registering would complicate clone promotion. + * default (fletcher2/off). Snapshots don't need to know about + * checksum/compression/copies. */ - if (ds && ds->ds_phys->ds_num_children == 0) { - err = dsl_prop_register(ds, "checksum", - checksum_changed_cb, osi); - if (err == 0) - err = dsl_prop_register(ds, "compression", - compression_changed_cb, osi); + if (ds) { + err = dsl_prop_register(ds, "primarycache", + primary_cache_changed_cb, osi); if (err == 0) - err = dsl_prop_register(ds, "copies", - copies_changed_cb, osi); + err = dsl_prop_register(ds, "secondarycache", + secondary_cache_changed_cb, osi); + if (!dsl_dataset_is_snapshot(ds)) { + if (err == 0) + err = dsl_prop_register(ds, "checksum", + checksum_changed_cb, osi); + if (err == 0) + err = dsl_prop_register(ds, "compression", + compression_changed_cb, osi); + if (err == 0) + err = dsl_prop_register(ds, "copies", + copies_changed_cb, osi); + } if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); @@ -211,24 +252,12 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); + osi->os_primary_cache = ZFS_CACHE_ALL; + osi->os_secondary_cache = ZFS_CACHE_ALL; } - osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); - - /* - * Metadata always gets compressed and checksummed. - * If the data checksum is multi-bit correctable, and it's not - * a ZBT-style checksum, then it's suitable for metadata as well. - * Otherwise, the metadata checksum defaults to fletcher4. - */ - checksum = osi->os_checksum; - - if (zio_checksum_table[checksum].ci_correctable && - !zio_checksum_table[checksum].ci_zbt) - osi->os_md_checksum = checksum; - else - osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; - osi->os_md_compress = ZIO_COMPRESS_LZJB; + osi->os_zil_header = osi->os_phys->os_zil_header; + osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), @@ -265,20 +294,23 @@ static int dmu_objset_open_ds_os(dsl_dataset_t *ds, objset_t *os, dmu_objset_type_t type) { objset_impl_t *osi; - int err; mutex_enter(&ds->ds_opening_lock); osi = dsl_dataset_get_user_ptr(ds); if (osi == NULL) { + int err; + err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), ds, &ds->ds_phys->ds_bp, &osi); - if (err) + if (err) { + mutex_exit(&ds->ds_opening_lock); return (err); + } } mutex_exit(&ds->ds_opening_lock); os->os = osi; - os->os_mode = DS_MODE_NONE; + os->os_mode = DS_MODE_NOHOLD; if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) return (EINVAL); @@ -309,21 +341,28 @@ dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, dsl_dataset_t *ds; int err; - ASSERT(mode != DS_MODE_NONE); + ASSERT(DS_MODE_TYPE(mode) == DS_MODE_USER || + DS_MODE_TYPE(mode) == DS_MODE_OWNER); os = kmem_alloc(sizeof (objset_t), KM_SLEEP); - err = dsl_dataset_open(name, mode, os, &ds); + if (DS_MODE_TYPE(mode) == DS_MODE_USER) + err = dsl_dataset_hold(name, os, &ds); + else + err = dsl_dataset_own(name, mode, os, &ds); if (err) { kmem_free(os, sizeof (objset_t)); return (err); } err = dmu_objset_open_ds_os(ds, os, type); - os->os_mode = mode; if (err) { + if (DS_MODE_TYPE(mode) == DS_MODE_USER) + dsl_dataset_rele(ds, os); + else + dsl_dataset_disown(ds, os); kmem_free(os, sizeof (objset_t)); - dsl_dataset_close(ds, mode, os); } else { + os->os_mode = mode; *osp = os; } return (err); @@ -332,8 +371,14 @@ dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, void dmu_objset_close(objset_t *os) { - if (os->os_mode != DS_MODE_NONE) - dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os); + ASSERT(DS_MODE_TYPE(os->os_mode) == DS_MODE_USER || + DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER || + DS_MODE_TYPE(os->os_mode) == DS_MODE_NOHOLD); + + if (DS_MODE_TYPE(os->os_mode) == DS_MODE_USER) + dsl_dataset_rele(os->os->os_dsl_dataset, os); + else if (DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER) + dsl_dataset_disown(os->os->os_dsl_dataset, os); kmem_free(os, sizeof (objset_t)); } @@ -389,13 +434,19 @@ dmu_objset_evict(dsl_dataset_t *ds, void *arg) ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); } - if (ds && ds->ds_phys->ds_num_children == 0) { - VERIFY(0 == dsl_prop_unregister(ds, "checksum", - checksum_changed_cb, osi)); - VERIFY(0 == dsl_prop_unregister(ds, "compression", - compression_changed_cb, osi)); - VERIFY(0 == dsl_prop_unregister(ds, "copies", - copies_changed_cb, osi)); + if (ds) { + if (!dsl_dataset_is_snapshot(ds)) { + VERIFY(0 == dsl_prop_unregister(ds, "checksum", + checksum_changed_cb, osi)); + VERIFY(0 == dsl_prop_unregister(ds, "compression", + compression_changed_cb, osi)); + VERIFY(0 == dsl_prop_unregister(ds, "copies", + copies_changed_cb, osi)); + } + VERIFY(0 == dsl_prop_unregister(ds, "primarycache", + primary_cache_changed_cb, osi)); + VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", + secondary_cache_changed_cb, osi)); } /* @@ -530,8 +581,7 @@ dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) dsobj = dsl_dataset_create_sync(dd, oa->lastname, oa->clone_parent, oa->flags, cr, tx); - VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, - DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds)); + VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, FTAG, &ds)); bp = dsl_dataset_get_blkptr(ds); if (BP_IS_HOLE(bp)) { objset_impl_t *osi; @@ -547,7 +597,7 @@ dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa, tx, cr, "dataset = %llu", dsobj); - dsl_dataset_close(ds, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG); + dsl_dataset_rele(ds, FTAG); } int @@ -606,17 +656,16 @@ dmu_objset_destroy(const char *name) * but the replay log objset is modified in open context. */ error = dmu_objset_open(name, DMU_OST_ANY, - DS_MODE_EXCLUSIVE|DS_MODE_READONLY, &os); + DS_MODE_OWNER|DS_MODE_READONLY|DS_MODE_INCONSISTENT, &os); if (error == 0) { dsl_dataset_t *ds = os->os->os_dsl_dataset; zil_destroy(dmu_objset_zil(os), B_FALSE); + error = dsl_dataset_destroy(ds, os); /* * dsl_dataset_destroy() closes the ds. - * os is just used as the tag after it's freed. */ kmem_free(os, sizeof (objset_t)); - error = dsl_dataset_destroy(ds, os); } return (error); @@ -633,7 +682,7 @@ dmu_objset_rollback(objset_t *os) ds = os->os->os_dsl_dataset; - if (!dsl_dataset_tryupgrade(ds, DS_MODE_STANDARD, DS_MODE_EXCLUSIVE)) { + if (!dsl_dataset_tryown(ds, TRUE, os)) { dmu_objset_close(os); return (EBUSY); } @@ -645,7 +694,7 @@ dmu_objset_rollback(objset_t *os) * actually implicitly called dmu_objset_evict(), thus freeing * the objset_impl_t. */ - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, os); + dsl_dataset_disown(ds, os); kmem_free(os, sizeof (objset_t)); return (err); } @@ -668,7 +717,6 @@ dmu_objset_snapshot_one(char *name, void *arg) { struct snaparg *sn = arg; objset_t *os; - dmu_objset_stats_t stat; int err; (void) strcpy(sn->failed, name); @@ -682,15 +730,12 @@ dmu_objset_snapshot_one(char *name, void *arg) (err = zfs_secpolicy_snapshot_perms(name, CRED()))) return (err); - err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_STANDARD, &os); + err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_USER, &os); if (err != 0) return (err); - /* - * If the objset is in an inconsistent state, return busy. - */ - dmu_objset_fast_stat(os, &stat); - if (stat.dds_inconsistent) { + /* If the objset is in an inconsistent state, return busy */ + if (os->os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { dmu_objset_close(os); return (EBUSY); } @@ -798,49 +843,39 @@ dmu_objset_sync_dnodes(list_t *list, dmu_tx_t *tx) static void ready(zio_t *zio, arc_buf_t *abuf, void *arg) { + blkptr_t *bp = zio->io_bp; + blkptr_t *bp_orig = &zio->io_bp_orig; objset_impl_t *os = arg; - blkptr_t *bp = os->os_rootbp; dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; - int i; - ASSERT(bp == zio->io_bp); + ASSERT(bp == os->os_rootbp); + ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET); + ASSERT(BP_GET_LEVEL(bp) == 0); /* * Update rootbp fill count. */ bp->blk_fill = 1; /* count the meta-dnode */ - for (i = 0; i < dnp->dn_nblkptr; i++) + for (int i = 0; i < dnp->dn_nblkptr; i++) bp->blk_fill += dnp->dn_blkptr[i].blk_fill; - BP_SET_TYPE(bp, DMU_OT_OBJSET); - BP_SET_LEVEL(bp, 0); - - /* We must do this after we've set the bp's type and level */ - if (!DVA_EQUAL(BP_IDENTITY(bp), - BP_IDENTITY(&zio->io_bp_orig))) { + if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { + ASSERT(DVA_EQUAL(BP_IDENTITY(bp), BP_IDENTITY(bp_orig))); + } else { if (zio->io_bp_orig.blk_birth == os->os_synctx->tx_txg) - dsl_dataset_block_kill(os->os_dsl_dataset, - &zio->io_bp_orig, NULL, os->os_synctx); + (void) dsl_dataset_block_kill(os->os_dsl_dataset, + &zio->io_bp_orig, zio, os->os_synctx); dsl_dataset_block_born(os->os_dsl_dataset, bp, os->os_synctx); } } -/* ARGSUSED */ -static void -killer(zio_t *zio, arc_buf_t *abuf, void *arg) -{ - objset_impl_t *os = arg; - - ASSERT3U(zio->io_error, ==, 0); - arc_release(os->os_phys_buf, &os->os_phys_buf); -} - /* called from dsl */ void dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) { int txgoff; zbookmark_t zb; + writeprops_t wp = { 0 }; zio_t *zio; list_t *list; dbuf_dirty_record_t *dr; @@ -865,18 +900,24 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) */ zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; zb.zb_object = 0; - zb.zb_level = -1; + zb.zb_level = -1; /* for block ordering; it's level 0 on disk */ zb.zb_blkid = 0; + + wp.wp_type = DMU_OT_OBJSET; + wp.wp_level = 0; /* on-disk BP level; see above */ + wp.wp_copies = os->os_copies; + wp.wp_oschecksum = os->os_checksum; + wp.wp_oscompress = os->os_compress; + if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) { - dsl_dataset_block_kill(os->os_dsl_dataset, + (void) dsl_dataset_block_kill(os->os_dsl_dataset, os->os_rootbp, pio, tx); } - zio = arc_write(pio, os->os_spa, os->os_md_checksum, - os->os_md_compress, - dmu_get_replication_level(os, &zb, DMU_OT_OBJSET), - tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, killer, os, - ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_METADATA, - &zb); + + arc_release(os->os_phys_buf, &os->os_phys_buf); + zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os), + tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os, + ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); /* * Sync meta-dnode - the parent IO for the sync is the root block @@ -900,6 +941,7 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) * Free intent log blocks up to this tx. */ zil_sync(os->os_zil, tx); + os->os_phys->os_zil_header = os->os_zil_header; zio_nowait(zio); } @@ -1036,48 +1078,80 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name, return (0); } +struct findarg { + int (*func)(char *, void *); + void *arg; +}; + +/* ARGSUSED */ +static int +findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +{ + struct findarg *fa = arg; + return (fa->func((char *)dsname, fa->arg)); +} + /* * Find all objsets under name, and for each, call 'func(child_name, arg)'. + * Perhaps change all callers to use dmu_objset_find_spa()? */ int dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags) { + struct findarg fa; + fa.func = func; + fa.arg = arg; + return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags)); +} + +/* + * Find all objsets under name, call func on each + */ +int +dmu_objset_find_spa(spa_t *spa, const char *name, + int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags) +{ dsl_dir_t *dd; - objset_t *os; - uint64_t snapobj; + dsl_pool_t *dp; + dsl_dataset_t *ds; zap_cursor_t zc; zap_attribute_t *attr; char *child; - int do_self, err; + uint64_t thisobj; + int err; - err = dsl_dir_open(name, FTAG, &dd, NULL); + if (name == NULL) + name = spa_name(spa); + err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL); if (err) return (err); - /* NB: the $MOS dir doesn't have a head dataset */ - do_self = (dd->dd_phys->dd_head_dataset_obj != 0); + /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ + if (dd->dd_myname[0] == '$') { + dsl_dir_close(dd, FTAG); + return (0); + } + + thisobj = dd->dd_phys->dd_head_dataset_obj; attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + dp = dd->dd_pool; /* * Iterate over all children. */ if (flags & DS_FIND_CHILDREN) { - for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, + for (zap_cursor_init(&zc, dp->dp_meta_objset, dd->dd_phys->dd_child_dir_zapobj); zap_cursor_retrieve(&zc, attr) == 0; (void) zap_cursor_advance(&zc)) { ASSERT(attr->za_integer_length == sizeof (uint64_t)); ASSERT(attr->za_num_integers == 1); - /* - * No separating '/' because parent's name ends in /. - */ child = kmem_alloc(MAXPATHLEN, KM_SLEEP); - /* XXX could probably just use name here */ - dsl_dir_name(dd, child); + (void) strcpy(child, name); (void) strcat(child, "/"); (void) strcat(child, attr->za_name); - err = dmu_objset_find(child, func, arg, flags); + err = dmu_objset_find_spa(spa, child, func, arg, flags); kmem_free(child, MAXPATHLEN); if (err) break; @@ -1094,30 +1168,36 @@ dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags) /* * Iterate over all snapshots. */ - if ((flags & DS_FIND_SNAPSHOTS) && - dmu_objset_open(name, DMU_OST_ANY, - DS_MODE_STANDARD | DS_MODE_READONLY, &os) == 0) { - - snapobj = os->os->os_dsl_dataset->ds_phys->ds_snapnames_zapobj; - dmu_objset_close(os); - - for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, snapobj); - zap_cursor_retrieve(&zc, attr) == 0; - (void) zap_cursor_advance(&zc)) { - ASSERT(attr->za_integer_length == sizeof (uint64_t)); - ASSERT(attr->za_num_integers == 1); - - child = kmem_alloc(MAXPATHLEN, KM_SLEEP); - /* XXX could probably just use name here */ - dsl_dir_name(dd, child); - (void) strcat(child, "@"); - (void) strcat(child, attr->za_name); - err = func(child, arg); - kmem_free(child, MAXPATHLEN); - if (err) - break; + if (flags & DS_FIND_SNAPSHOTS) { + if (!dsl_pool_sync_context(dp)) + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); + if (!dsl_pool_sync_context(dp)) + rw_exit(&dp->dp_config_rwlock); + + if (err == 0) { + uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; + dsl_dataset_rele(ds, FTAG); + + for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); + zap_cursor_retrieve(&zc, attr) == 0; + (void) zap_cursor_advance(&zc)) { + ASSERT(attr->za_integer_length == + sizeof (uint64_t)); + ASSERT(attr->za_num_integers == 1); + + child = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) strcpy(child, name); + (void) strcat(child, "@"); + (void) strcat(child, attr->za_name); + err = func(spa, attr->za_first_integer, + child, arg); + kmem_free(child, MAXPATHLEN); + if (err) + break; + } + zap_cursor_fini(&zc); } - zap_cursor_fini(&zc); } dsl_dir_close(dd, FTAG); @@ -1129,8 +1209,7 @@ dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags) /* * Apply to self if appropriate. */ - if (do_self) - err = func(name, arg); + err = func(spa, thisobj, name, arg); return (err); } |