5 files changed, 130 insertions, 32 deletions
diff --git a/module/zfs/zap.c b/module/zfs/zap.c
index 2f6aed667..d899ccdb7 100644
--- a/module/zfs/zap.c
+++ b/module/zfs/zap.c
@@ -852,8 +852,16 @@ retry:
 	} else if (err == EAGAIN) {
 		err = zap_expand_leaf(zn, l, tag, tx, &l);
 		zap = zn->zn_zap;	/* zap_expand_leaf() may change zap */
-		if (err == 0)
+		if (err == 0) {
 			goto retry;
+		} else if (err == ENOSPC) {
+			/*
+			 * If we failed to expand the leaf, then bailout
+			 * as there is no point trying
+			 * zap_put_leaf_maybe_grow_ptrtbl().
+			 */
+			return (err);
+		}
 	}
 
 out:
diff --git a/module/zfs/zap_leaf.c b/module/zfs/zap_leaf.c
index 5341fc098..661fd747b 100644
--- a/module/zfs/zap_leaf.c
+++ b/module/zfs/zap_leaf.c
@@ -53,7 +53,7 @@ static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
 	((h) >> \
 	(64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len)))
 
-#define	LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
+#define	LEAF_HASH_ENTPTR(l, h)	(&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
 
 extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l);
 
diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c
index b81a48a0f..791cbee88 100644
--- a/module/zfs/zap_micro.c
+++ b/module/zfs/zap_micro.c
@@ -363,6 +363,41 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash)
 	return (cd);
 }
 
+/*
+ * Each mzap entry requires at max : 4 chunks
+ * 3 chunks for names + 1 chunk for value.
+ */
+#define	MZAP_ENT_CHUNKS	(1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \
+	ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t)))
+
+/*
+ * Check if the current entry keeps the colliding entries under the fatzap leaf
+ * size.
+ */
+static boolean_t
+mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
+{
+	zap_t *zap = zn->zn_zap;
+	mzap_ent_t mze_tofind;
+	mzap_ent_t *mze;
+	avl_index_t idx;
+	avl_tree_t *avl = &zap->zap_m.zap_avl;
+	uint32_t mzap_ents = 0;
+
+	mze_tofind.mze_hash = hash;
+	mze_tofind.mze_cd = 0;
+
+	for (mze = avl_find(avl, &mze_tofind, &idx);
+	    mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
+		mzap_ents++;
+	}
+
+	/* Include the new entry being added */
+	mzap_ents++;
+
+	return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS));
+}
+
 static void
 mze_remove(zap_t *zap, mzap_ent_t *mze)
 {
@@ -638,16 +673,15 @@ mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags)
 		dprintf("adding %s=%llu\n",
 		    mze->mze_name, mze->mze_value);
 		zn = zap_name_alloc(zap, mze->mze_name, 0);
-		err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
-		    tag, tx);
+		/* If we fail here, we would end up losing entries */
+		VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
+		    tag, tx));
 		zap = zn->zn_zap;	/* fzap_add_cd() may change zap */
 		zap_name_free(zn);
-		if (err)
-			break;
 	}
 	vmem_free(mzp, sz);
 	*zapp = zap;
-	return (err);
+	return (0);
 }
 
 /*
@@ -1190,7 +1224,8 @@ zap_add_impl(zap_t *zap, const char *key,
 		err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
 		zap = zn->zn_zap;	/* fzap_add() may change zap */
 	} else if (integer_size != 8 || num_integers != 1 ||
-	    strlen(key) >= MZAP_NAME_LEN) {
+	    strlen(key) >= MZAP_NAME_LEN ||
+	    !mze_canfit_fzap_leaf(zn, zn->zn_hash)) {
 		err = mzap_upgrade(&zn->zn_zap, tag, tx, 0);
 		if (err == 0) {
 			err = fzap_add(zn, integer_size, num_integers, val,
diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
index 76f79cc11..7eb426b78 100644
--- a/module/zfs/zfs_dir.c
+++ b/module/zfs/zfs_dir.c
@@ -742,7 +742,11 @@ zfs_dirent(znode_t *zp, uint64_t mode)
 }
 
 /*
- * Link zp into dl.  Can only fail if zp has been unlinked.
+ * Link zp into dl.  Can fail in the following cases :
+ * - if zp has been unlinked.
+ * - if the number of entries with the same hash (aka. colliding entries)
+ *    exceed the capacity of a leaf-block of fatzap and splitting of the
+ *    leaf-block does not help.
  */
 int
 zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
@@ -776,6 +780,24 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
 			    NULL, &links, sizeof (links));
 		}
 	}
+
+	value = zfs_dirent(zp, zp->z_mode);
+	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
+	    &value, tx);
+
+	/*
+	 * zap_add could fail to add the entry if it exceeds the capacity of the
+	 * leaf-block and zap_leaf_split() failed to help.
+	 * The caller of this routine is responsible for failing the transaction
+	 * which will rollback the SA updates done above.
+	 */
+	if (error != 0) {
+		if (!(flag & ZRENAMING) && !(flag & ZNEW))
+			drop_nlink(ZTOI(zp));
+		mutex_exit(&zp->z_lock);
+		return (error);
+	}
+
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
 	    &dzp->z_id, sizeof (dzp->z_id));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
@@ -813,11 +835,6 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
 	ASSERT(error == 0);
 	mutex_exit(&dzp->z_lock);
 
-	value = zfs_dirent(zp, zp->z_mode);
-	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
-	    8, 1, &value, tx);
-	ASSERT(error == 0);
-
 	return (0);
 }
 
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index f7f2f6167..0eeb2bb42 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -1450,6 +1450,7 @@ top:
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 			    0, acl_ids.z_aclp->z_acl_bytes);
 		}
+
 		error = dmu_tx_assign(tx,
 		    (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 		if (error) {
@@ -1467,10 +1468,22 @@ top:
 		}
 		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
+		error = zfs_link_create(dl, zp, tx, ZNEW);
+		if (error != 0) {
+			/*
+			 * Since, we failed to add the directory entry for it,
+			 * delete the newly created dnode.
+			 */
+			zfs_znode_delete(zp, tx);
+			remove_inode_hash(ZTOI(zp));
+			zfs_acl_ids_free(&acl_ids);
+			dmu_tx_commit(tx);
+			goto out;
+		}
+
 		if (fuid_dirtied)
 			zfs_fuid_sync(zfsvfs, tx);
 
-		(void) zfs_link_create(dl, zp, tx, ZNEW);
 		txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
 		if (flag & FIGNORECASE)
 			txtype |= TX_CI;
@@ -2064,13 +2077,18 @@ top:
 	 */
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
 	/*
 	 * Now put new name in parent dir.
 	 */
-	(void) zfs_link_create(dl, zp, tx, ZNEW);
+	error = zfs_link_create(dl, zp, tx, ZNEW);
+	if (error != 0) {
+		zfs_znode_delete(zp, tx);
+		remove_inode_hash(ZTOI(zp));
+		goto out;
+	}
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
 
 	*ipp = ZTOI(zp);
 
@@ -2080,6 +2098,7 @@ top:
 	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
 	    acl_ids.z_fuidp, vap);
 
+out:
 	zfs_acl_ids_free(&acl_ids);
 
 	dmu_tx_commit(tx);
@@ -2089,10 +2108,14 @@ top:
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	zfs_inode_update(dzp);
-	zfs_inode_update(zp);
+	if (error != 0) {
+		iput(ZTOI(zp));
+	} else {
+		zfs_inode_update(dzp);
+		zfs_inode_update(zp);
+	}
 	ZFS_EXIT(zfsvfs);
-	return (0);
+	return (error);
 }
 
 /*
@@ -3950,6 +3973,13 @@ top:
 				VERIFY3U(zfs_link_destroy(tdl, szp, tx,
 				    ZRENAMING, NULL), ==, 0);
 			}
+		} else {
+			/*
+			 * If we had removed the existing target, subsequent
+			 * call to zfs_link_create() to add back the same entry
+			 * but, the new dnode (szp) should not fail.
+			 */
+			ASSERT(tzp == NULL);
 		}
 	}
 
@@ -4120,14 +4150,18 @@ top:
 	/*
 	 * Insert the new object into the directory.
 	 */
-	(void) zfs_link_create(dl, zp, tx, ZNEW);
-
-	if (flags & FIGNORECASE)
-		txtype |= TX_CI;
-	zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
+	error = zfs_link_create(dl, zp, tx, ZNEW);
+	if (error != 0) {
+		zfs_znode_delete(zp, tx);
+		remove_inode_hash(ZTOI(zp));
+	} else {
+		if (flags & FIGNORECASE)
+			txtype |= TX_CI;
+		zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
 
-	zfs_inode_update(dzp);
-	zfs_inode_update(zp);
+		zfs_inode_update(dzp);
+		zfs_inode_update(zp);
+	}
 
 	zfs_acl_ids_free(&acl_ids);
 
@@ -4135,10 +4169,14 @@ top:
 
 	zfs_dirent_unlock(dl);
 
-	*ipp = ZTOI(zp);
+	if (error == 0) {
+		*ipp = ZTOI(zp);
 
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
+		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+			zil_commit(zilog, 0);
+	} else {
+		iput(ZTOI(zp));
+	}
 
 	ZFS_EXIT(zfsvfs);
 	return (error);