From cc63068e95ee725cce03b1b7ce50179825a6cda5 Mon Sep 17 00:00:00 2001 From: sanjeevbagewadi Date: Fri, 9 Feb 2018 23:45:53 +0530 Subject: Handle zap_add() failures in mixed case mode With "casesensitivity=mixed", zap_add() could fail when the number of files/directories with the same name (varying in case) exceed the capacity of the leaf node of a Fatzap. This results in a ASSERT() failure as zfs_link_create() does not expect zap_add() to fail. The fix is to handle these failures and rollback the transactions. Reviewed by: Matt Ahrens Reviewed-by: Chunwei Chen Reviewed-by: Brian Behlendorf Signed-off-by: Sanjeev Bagewadi Closes #7011 Closes #7054 --- module/zfs/zap.c | 25 ++++++++++++++++- module/zfs/zap_leaf.c | 2 +- module/zfs/zap_micro.c | 38 +++++++++++++++++++++++++- module/zfs/zfs_dir.c | 29 ++++++++++++++++---- module/zfs/zfs_vnops.c | 74 ++++++++++++++++++++++++++++++++++++++------------ 5 files changed, 141 insertions(+), 27 deletions(-) (limited to 'module') diff --git a/module/zfs/zap.c b/module/zfs/zap.c index 2f6aed667..1c2030bda 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -818,15 +818,19 @@ fzap_lookup(zap_name_t *zn, return (err); } +#define MAX_EXPAND_RETRIES 2 + int fzap_add_cd(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers, const void *val, uint32_t cd, void *tag, dmu_tx_t *tx) { zap_leaf_t *l; + zap_leaf_t *prev_l = NULL; int err; zap_entry_handle_t zeh; zap_t *zap = zn->zn_zap; + int expand_retries = 0; ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); ASSERT(!zap->zap_ismicro); @@ -850,10 +854,29 @@ retry: if (err == 0) { zap_increment_num_entries(zap, 1, tx); } else if (err == EAGAIN) { + /* + * If the last two expansions did not help, there is no point + * trying to expand again + */ + if (expand_retries > MAX_EXPAND_RETRIES && prev_l == l) { + err = SET_ERROR(ENOSPC); + goto out; + } + err = zap_expand_leaf(zn, l, tag, tx, &l); zap = zn->zn_zap; /* zap_expand_leaf() may change zap */ - if (err == 0) + if (err == 0) { + prev_l = l; + expand_retries++; goto retry; + } else if (err == ENOSPC) { + /* + * If we failed to expand the leaf, then bailout + * as there is no point trying + * zap_put_leaf_maybe_grow_ptrtbl(). + */ + return (err); + } } out: diff --git a/module/zfs/zap_leaf.c b/module/zfs/zap_leaf.c index 5341fc098..661fd747b 100644 --- a/module/zfs/zap_leaf.c +++ b/module/zfs/zap_leaf.c @@ -53,7 +53,7 @@ static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry); ((h) >> \ (64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len))) -#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)]) +#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)]) extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l); diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index b81a48a0f..ad9a62161 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -363,6 +363,41 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash) return (cd); } +/* + * Each mzap entry requires at max : 4 chunks + * 3 chunks for names + 1 chunk for value. + */ +#define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \ + ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t))) + +/* + * Check if the current entry keeps the colliding entries under the fatzap leaf + * size. + */ +static boolean_t +mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash) +{ + zap_t *zap = zn->zn_zap; + mzap_ent_t mze_tofind; + mzap_ent_t *mze; + avl_index_t idx; + avl_tree_t *avl = &zap->zap_m.zap_avl; + uint32_t mzap_ents = 0; + + mze_tofind.mze_hash = hash; + mze_tofind.mze_cd = 0; + + for (mze = avl_find(avl, &mze_tofind, &idx); + mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { + mzap_ents++; + } + + /* Include the new entry being added */ + mzap_ents++; + + return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS)); +} + static void mze_remove(zap_t *zap, mzap_ent_t *mze) { @@ -1190,7 +1225,8 @@ zap_add_impl(zap_t *zap, const char *key, err = fzap_add(zn, integer_size, num_integers, val, tag, tx); zap = zn->zn_zap; /* fzap_add() may change zap */ } else if (integer_size != 8 || num_integers != 1 || - strlen(key) >= MZAP_NAME_LEN) { + strlen(key) >= MZAP_NAME_LEN || + !mze_canfit_fzap_leaf(zn, zn->zn_hash)) { err = mzap_upgrade(&zn->zn_zap, tag, tx, 0); if (err == 0) { err = fzap_add(zn, integer_size, num_integers, val, diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index 9a8bbccd9..6398a1d15 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -742,7 +742,11 @@ zfs_dirent(znode_t *zp, uint64_t mode) } /* - * Link zp into dl. Can only fail if zp has been unlinked. + * Link zp into dl. Can fail in the following cases : + * - if zp has been unlinked. + * - if the number of entries with the same hash (aka. colliding entries) + * exceed the capacity of a leaf-block of fatzap and splitting of the + * leaf-block does not help. */ int zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) @@ -776,6 +780,24 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) NULL, &links, sizeof (links)); } } + + value = zfs_dirent(zp, zp->z_mode); + error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1, + &value, tx); + + /* + * zap_add could fail to add the entry if it exceeds the capacity of the + * leaf-block and zap_leaf_split() failed to help. + * The caller of this routine is responsible for failing the transaction + * which will rollback the SA updates done above. + */ + if (error != 0) { + if (!(flag & ZRENAMING) && !(flag & ZNEW)) + drop_nlink(ZTOI(zp)); + mutex_exit(&zp->z_lock); + return (error); + } + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &dzp->z_id, sizeof (dzp->z_id)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, @@ -813,11 +835,6 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) ASSERT(error == 0); mutex_exit(&dzp->z_lock); - value = zfs_dirent(zp, zp->z_mode); - error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, - 8, 1, &value, tx); - ASSERT(error == 0); - return (0); } diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 5e34cb2ff..e21fe1aca 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -1429,6 +1429,7 @@ top: dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, acl_ids.z_aclp->z_acl_bytes); } + error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); if (error) { @@ -1446,10 +1447,22 @@ top: } zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); + error = zfs_link_create(dl, zp, tx, ZNEW); + if (error != 0) { + /* + * Since, we failed to add the directory entry for it, + * delete the newly created dnode. + */ + zfs_znode_delete(zp, tx); + remove_inode_hash(ZTOI(zp)); + zfs_acl_ids_free(&acl_ids); + dmu_tx_commit(tx); + goto out; + } + if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); - (void) zfs_link_create(dl, zp, tx, ZNEW); txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); if (flag & FIGNORECASE) txtype |= TX_CI; @@ -2040,13 +2053,18 @@ top: */ zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); - if (fuid_dirtied) - zfs_fuid_sync(zfsvfs, tx); - /* * Now put new name in parent dir. */ - (void) zfs_link_create(dl, zp, tx, ZNEW); + error = zfs_link_create(dl, zp, tx, ZNEW); + if (error != 0) { + zfs_znode_delete(zp, tx); + remove_inode_hash(ZTOI(zp)); + goto out; + } + + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); *ipp = ZTOI(zp); @@ -2056,6 +2074,7 @@ top: zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, acl_ids.z_fuidp, vap); +out: zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); @@ -2065,10 +2084,14 @@ top: if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - zfs_inode_update(dzp); - zfs_inode_update(zp); + if (error != 0) { + iput(ZTOI(zp)); + } else { + zfs_inode_update(dzp); + zfs_inode_update(zp); + } ZFS_EXIT(zfsvfs); - return (0); + return (error); } /* @@ -3686,6 +3709,13 @@ top: VERIFY3U(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL), ==, 0); } + } else { + /* + * If we had removed the existing target, subsequent + * call to zfs_link_create() to add back the same entry + * but, the new dnode (szp) should not fail. + */ + ASSERT(tzp == NULL); } } @@ -3856,14 +3886,18 @@ top: /* * Insert the new object into the directory. */ - (void) zfs_link_create(dl, zp, tx, ZNEW); - - if (flags & FIGNORECASE) - txtype |= TX_CI; - zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); + error = zfs_link_create(dl, zp, tx, ZNEW); + if (error != 0) { + zfs_znode_delete(zp, tx); + remove_inode_hash(ZTOI(zp)); + } else { + if (flags & FIGNORECASE) + txtype |= TX_CI; + zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); - zfs_inode_update(dzp); - zfs_inode_update(zp); + zfs_inode_update(dzp); + zfs_inode_update(zp); + } zfs_acl_ids_free(&acl_ids); @@ -3871,10 +3905,14 @@ top: zfs_dirent_unlock(dl); - *ipp = ZTOI(zp); + if (error == 0) { + *ipp = ZTOI(zp); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + } else { + iput(ZTOI(zp)); + } ZFS_EXIT(zfsvfs); return (error); -- cgit v1.2.3