64 files changed, 1575 insertions, 224 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 3311a6e6f..ee5e21f10 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -1880,15 +1880,15 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
 	dnode_t *dn;
 	void *bonus = NULL;
 	size_t bsize = 0;
-	char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
+	char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32];
 	char bonus_size[32];
 	char aux[50];
 	int error;
 
 	if (*print_header) {
-		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
-		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
-		    "%full", "type");
+		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %6s  %5s  %6s  %s\n",
+		    "Object", "lvl", "iblk", "dblk", "dsize", "dnsize",
+		    "lsize", "%full", "type");
 		*print_header = 0;
 	}
 
@@ -1910,6 +1910,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
 	zdb_nicenum(doi.doi_max_offset, lsize);
 	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
 	zdb_nicenum(doi.doi_bonus_size, bonus_size);
+	zdb_nicenum(doi.doi_dnodesize, dnsize);
 	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
 	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
 	    doi.doi_max_offset);
@@ -1926,13 +1927,13 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
 		    ZDB_COMPRESS_NAME(doi.doi_compress));
 	}
 
-	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
+	(void) printf("%10lld  %3u  %5s  %5s  %5s  %6s  %5s  %6s  %s%s\n",
 	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
-	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
+	    asize, dnsize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
 
 	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
-		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
-		    "", "", "", "", "", bonus_size, "bonus",
+		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %5s  %6s  %s\n",
+		    "", "", "", "", "", "", bonus_size, "bonus",
 		    ZDB_OT_NAME(doi.doi_bonus_type));
 	}
 
diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c
index 93b905793..1501e879d 100644
--- a/cmd/zdb/zdb_il.c
+++ b/cmd/zdb/zdb_il.c
@@ -80,8 +80,10 @@ zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
 	}
 
 	(void) printf("%s%s", prefix, ctime(&crtime));
-	(void) printf("%sdoid %llu, foid %llu, mode %llo\n", prefix,
-	    (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid,
+	(void) printf("%sdoid %llu, foid %llu, slots %llu, mode %llo\n", prefix,
+	    (u_longlong_t)lr->lr_doid,
+	    (u_longlong_t)LR_FOID_GET_OBJ(lr->lr_foid),
+	    (u_longlong_t)LR_FOID_GET_SLOTS(lr->lr_foid),
 	    (longlong_t)lr->lr_mode);
 	(void) printf("%suid %llu, gid %llu, gen %llu, rdev 0x%llx\n", prefix,
 	    (u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
index e6d6e9803..ad15dea1d 100644
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@@ -221,6 +221,7 @@ typedef struct ztest_block_tag {
 	uint64_t	bt_magic;
 	uint64_t	bt_objset;
 	uint64_t	bt_object;
+	uint64_t	bt_dnodesize;
 	uint64_t	bt_offset;
 	uint64_t	bt_gen;
 	uint64_t	bt_txg;
@@ -258,6 +259,7 @@ typedef struct ztest_od {
 	dmu_object_type_t od_crtype;
 	uint64_t	od_blocksize;
 	uint64_t	od_crblocksize;
+	uint64_t	od_crdnodesize;
 	uint64_t	od_gen;
 	uint64_t	od_crgen;
 	char		od_name[MAXNAMELEN];
@@ -329,6 +331,7 @@ ztest_func_t ztest_split_pool;
 ztest_func_t ztest_reguid;
 ztest_func_t ztest_spa_upgrade;
 ztest_func_t ztest_fletcher;
+ztest_func_t ztest_verify_dnode_bt;
 
 uint64_t zopt_always = 0ULL * NANOSEC;		/* all the time */
 uint64_t zopt_incessant = 1ULL * NANOSEC / 10;	/* every 1/10 second */
@@ -375,6 +378,7 @@ ztest_info_t ztest_info[] = {
 	ZTI_INIT(ztest_vdev_add_remove, 1, &ztest_opts.zo_vdevtime),
 	ZTI_INIT(ztest_vdev_aux_add_remove, 1, &ztest_opts.zo_vdevtime),
 	ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
+	ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
 };
 
 #define	ZTEST_FUNCS	(sizeof (ztest_info) / sizeof (ztest_info_t))
@@ -1028,6 +1032,36 @@ ztest_random_blocksize(void)
 }
 
 static int
+ztest_random_dnodesize(void)
+{
+	int slots;
+	int max_slots = spa_maxdnodesize(ztest_spa) >> DNODE_SHIFT;
+
+	if (max_slots == DNODE_MIN_SLOTS)
+		return (DNODE_MIN_SIZE);
+
+	/*
+	 * Weight the random distribution more heavily toward smaller
+	 * dnode sizes since that is more likely to reflect real-world
+	 * usage.
+	 */
+	ASSERT3U(max_slots, >, 4);
+	switch (ztest_random(10)) {
+	case 0:
+		slots = 5 + ztest_random(max_slots - 4);
+		break;
+	case 1 ... 4:
+		slots = 2 + ztest_random(3);
+		break;
+	default:
+		slots = 1;
+		break;
+	}
+
+	return (slots << DNODE_SHIFT);
+}
+
+static int
 ztest_random_ibshift(void)
 {
 	return (DN_MIN_INDBLKSHIFT +
@@ -1417,11 +1451,13 @@ ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
 
 static void
 ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
-    uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
+    uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg,
+    uint64_t crtxg)
 {
 	bt->bt_magic = BT_MAGIC;
 	bt->bt_objset = dmu_objset_id(os);
 	bt->bt_object = object;
+	bt->bt_dnodesize = dnodesize;
 	bt->bt_offset = offset;
 	bt->bt_gen = gen;
 	bt->bt_txg = txg;
@@ -1430,11 +1466,13 @@ ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
 
 static void
 ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
-    uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
+    uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg,
+    uint64_t crtxg)
 {
 	ASSERT3U(bt->bt_magic, ==, BT_MAGIC);
 	ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
 	ASSERT3U(bt->bt_object, ==, object);
+	ASSERT3U(bt->bt_dnodesize, ==, dnodesize);
 	ASSERT3U(bt->bt_offset, ==, offset);
 	ASSERT3U(bt->bt_gen, <=, gen);
 	ASSERT3U(bt->bt_txg, <=, txg);
@@ -1456,6 +1494,52 @@ ztest_bt_bonus(dmu_buf_t *db)
 }
 
 /*
+ * Generate a token to fill up unused bonus buffer space.  Try to make
+ * it unique to the object, generation, and offset to verify that data
+ * is not getting overwritten by data from other dnodes.
+ */
+#define	ZTEST_BONUS_FILL_TOKEN(obj, ds, gen, offset) \
+	(((ds) << 48) | ((gen) << 32) | ((obj) << 8) | (offset))
+
+/*
+ * Fill up the unused bonus buffer region before the block tag with a
+ * verifiable pattern. Filling the whole bonus area with non-zero data
+ * helps ensure that all dnode traversal code properly skips the
+ * interior regions of large dnodes.
+ */
+void
+ztest_fill_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
+    objset_t *os, uint64_t gen)
+{
+	uint64_t *bonusp;
+
+	ASSERT(IS_P2ALIGNED((char *)end - (char *)db->db_data, 8));
+
+	for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) {
+		uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os),
+		    gen, bonusp - (uint64_t *)db->db_data);
+		*bonusp = token;
+	}
+}
+
+/*
+ * Verify that the unused area of a bonus buffer is filled with the
+ * expected tokens.
+ */
+void
+ztest_verify_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
+    objset_t *os, uint64_t gen)
+{
+	uint64_t *bonusp;
+
+	for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) {
+		uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os),
+		    gen, bonusp - (uint64_t *)db->db_data);
+		VERIFY3U(*bonusp, ==, token);
+	}
+}
+
+/*
  * ZIL logging ops
  */
 
@@ -1463,7 +1547,7 @@ ztest_bt_bonus(dmu_buf_t *db)
 #define	lrz_blocksize	lr_uid
 #define	lrz_ibshift	lr_gid
 #define	lrz_bonustype	lr_rdev
-#define	lrz_bonuslen	lr_crtime[1]
+#define	lrz_dnodesize	lr_crtime[1]
 
 static void
 ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr)
@@ -1578,6 +1662,7 @@ ztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap)
 	dmu_tx_t *tx;
 	uint64_t txg;
 	int error = 0;
+	int bonuslen;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
@@ -1600,26 +1685,27 @@ ztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap)
 		return (ENOSPC);
 
 	ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid);
+	bonuslen = DN_BONUS_SIZE(lr->lrz_dnodesize);
 
 	if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
 		if (lr->lr_foid == 0) {
-			lr->lr_foid = zap_create(os,
+			lr->lr_foid = zap_create_dnsize(os,
 			    lr->lrz_type, lr->lrz_bonustype,
-			    lr->lrz_bonuslen, tx);
+			    bonuslen, lr->lrz_dnodesize, tx);
 		} else {
-			error = zap_create_claim(os, lr->lr_foid,
+			error = zap_create_claim_dnsize(os, lr->lr_foid,
 			    lr->lrz_type, lr->lrz_bonustype,
-			    lr->lrz_bonuslen, tx);
+			    bonuslen, lr->lrz_dnodesize, tx);
 		}
 	} else {
 		if (lr->lr_foid == 0) {
-			lr->lr_foid = dmu_object_alloc(os,
+			lr->lr_foid = dmu_object_alloc_dnsize(os,
 			    lr->lrz_type, 0, lr->lrz_bonustype,
-			    lr->lrz_bonuslen, tx);
+			    bonuslen, lr->lrz_dnodesize, tx);
 		} else {
-			error = dmu_object_claim(os, lr->lr_foid,
+			error = dmu_object_claim_dnsize(os, lr->lr_foid,
 			    lr->lrz_type, 0, lr->lrz_bonustype,
-			    lr->lrz_bonuslen, tx);
+			    bonuslen, lr->lrz_dnodesize, tx);
 		}
 	}
 
@@ -1639,7 +1725,9 @@ ztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap)
 	VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 	bbt = ztest_bt_bonus(db);
 	dmu_buf_will_dirty(db, tx);
-	ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg);
+	ztest_bt_generate(bbt, os, lr->lr_foid, lr->lrz_dnodesize, -1ULL,
+	    lr->lr_gen, txg, txg);
+	ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, lr->lr_gen);
 	dmu_buf_rele(db, FTAG);
 
 	VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
@@ -1785,7 +1873,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
 			VERIFY(dmu_read(os, lr->lr_foid, offset,
 			    sizeof (rbt), &rbt, prefetch) == 0);
 			if (rbt.bt_magic == BT_MAGIC) {
-				ztest_bt_verify(&rbt, os, lr->lr_foid,
+				ztest_bt_verify(&rbt, os, lr->lr_foid, 0,
 				    offset, gen, txg, crtxg);
 			}
 		}
@@ -1797,7 +1885,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
 		 * as it was when the write was generated.
 		 */
 		if (zd->zd_zilog->zl_replay) {
-			ztest_bt_verify(bt, os, lr->lr_foid, offset,
+			ztest_bt_verify(bt, os, lr->lr_foid, 0, offset,
 			    MAX(gen, bt->bt_gen), MAX(txg, lrtxg),
 			    bt->bt_crtxg);
 		}
@@ -1806,7 +1894,8 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
 		 * Set the bt's gen/txg to the bonus buffer's gen/txg
 		 * so that all of the usual ASSERTs will work.
 		 */
-		ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg);
+		ztest_bt_generate(bt, os, lr->lr_foid, 0, offset, gen, txg,
+		    crtxg);
 	}
 
 	if (abuf == NULL) {
@@ -1874,7 +1963,7 @@ ztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap)
 	dmu_tx_t *tx;
 	dmu_buf_t *db;
 	ztest_block_tag_t *bbt;
-	uint64_t txg, lrtxg, crtxg;
+	uint64_t txg, lrtxg, crtxg, dnodesize;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
@@ -1897,6 +1986,7 @@ ztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap)
 	ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
 	crtxg = bbt->bt_crtxg;
 	lrtxg = lr->lr_common.lrc_txg;
+	dnodesize = bbt->bt_dnodesize;
 
 	if (zd->zd_zilog->zl_replay) {
 		ASSERT(lr->lr_size != 0);
@@ -1915,7 +2005,7 @@ ztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap)
 	/*
 	 * Verify that the current bonus buffer is not newer than our txg.
 	 */
-	ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode,
+	ztest_bt_verify(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode,
 	    MAX(txg, lrtxg), crtxg);
 
 	dmu_buf_will_dirty(db, tx);
@@ -1925,8 +2015,9 @@ ztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap)
 	VERIFY0(dmu_set_bonus(db, lr->lr_size, tx));
 	bbt = ztest_bt_bonus(db);
 
-	ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg);
-
+	ztest_bt_generate(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode,
+	    txg, crtxg);
+	ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, bbt->bt_gen);
 	dmu_buf_rele(db, FTAG);
 
 	(void) ztest_log_setattr(zd, tx, lr);
@@ -2171,7 +2262,7 @@ ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
 		lr->lrz_blocksize = od->od_crblocksize;
 		lr->lrz_ibshift = ztest_random_ibshift();
 		lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
-		lr->lrz_bonuslen = dmu_bonus_max();
+		lr->lrz_dnodesize = od->od_crdnodesize;
 		lr->lr_gen = od->od_crgen;
 		lr->lr_crtime[0] = time(NULL);
 
@@ -2351,7 +2442,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
 	switch (io_type) {
 
 	case ZTEST_IO_WRITE_TAG:
-		ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0);
+		ztest_bt_generate(&wbt, zd->zd_os, object, doi.doi_dnodesize,
+		    offset, 0, 0, 0);
 		(void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
 		break;
 
@@ -2414,13 +2506,15 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
  */
 static void
 ztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index,
-    dmu_object_type_t type, uint64_t blocksize, uint64_t gen)
+    dmu_object_type_t type, uint64_t blocksize, uint64_t dnodesize,
+    uint64_t gen)
 {
 	od->od_dir = ZTEST_DIROBJ;
 	od->od_object = 0;
 
 	od->od_crtype = type;
 	od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
+	od->od_crdnodesize = dnodesize ? dnodesize : ztest_random_dnodesize();
 	od->od_crgen = gen;
 
 	od->od_type = DMU_OT_NONE;
@@ -3740,7 +3834,8 @@ ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
 	batchsize = OD_ARRAY_SIZE;
 
 	for (b = 0; b < batchsize; b++)
-		ztest_od_init(od + b, id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0);
+		ztest_od_init(od + b, id, FTAG, b, DMU_OT_UINT64_OTHER,
+		    0, 0, 0);
 
 	/*
 	 * Destroy the previous batch of objects, create a new batch,
@@ -3809,8 +3904,9 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
 	/*
 	 * Read the directory info.  If it's the first time, set things up.
 	 */
-	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize);
-	ztest_od_init(od + 1, id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize);
+	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, chunksize);
+	ztest_od_init(od + 1, id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
+	    chunksize);
 
 	if (ztest_object_init(zd, od, size, B_FALSE) != 0) {
 		umem_free(od, size);
@@ -4090,8 +4186,9 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
 	/*
 	 * Read the directory info.  If it's the first time, set things up.
 	 */
-	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
-	ztest_od_init(od + 1, id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize);
+	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
+	ztest_od_init(od + 1, id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
+	    chunksize);
 
 
 	if (ztest_object_init(zd, od, size, B_FALSE) != 0) {
@@ -4299,7 +4396,7 @@ ztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id)
 	 * to verify that parallel writes to an object -- even to the
 	 * same blocks within the object -- doesn't cause any trouble.
 	 */
-	ztest_od_init(od, ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
+	ztest_od_init(od, ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0)
 		return;
@@ -4322,7 +4419,7 @@ ztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id)
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
 
-	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
+	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t),
 	    !ztest_random(2)) != 0) {
@@ -4375,7 +4472,7 @@ ztest_zap(ztest_ds_t *zd, uint64_t id)
 	char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" };
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
-	ztest_od_init(od, id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0);
+	ztest_od_init(od, id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t),
 			!ztest_random(2)) != 0)
@@ -4512,7 +4609,7 @@ ztest_fzap(ztest_ds_t *zd, uint64_t id)
 	int i;
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
-	ztest_od_init(od, id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0);
+	ztest_od_init(od, id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t),
 				!ztest_random(2)) != 0)
@@ -4561,7 +4658,7 @@ ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
 	void *data;
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
-	ztest_od_init(od, ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0);
+	ztest_od_init(od, ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
 		umem_free(od, sizeof (ztest_od_t));
@@ -4750,7 +4847,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
 	int i, error = 0;
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
-	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
+	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
 		umem_free(od, sizeof (ztest_od_t));
@@ -4871,6 +4968,41 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
 	umem_free(od, sizeof (ztest_od_t));
 }
 
+/*
+ * Visit each object in the dataset. Verify that its properties
+ * are consistent what was stored in the block tag when it was created,
+ * and that its unused bonus buffer space has not been overwritten.
+ */
+void
+ztest_verify_dnode_bt(ztest_ds_t *zd, uint64_t id)
+{
+	objset_t *os = zd->zd_os;
+	uint64_t obj;
+	int err = 0;
+
+	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
+		ztest_block_tag_t *bt = NULL;
+		dmu_object_info_t doi;
+		dmu_buf_t *db;
+
+		if (dmu_bonus_hold(os, obj, FTAG, &db) != 0)
+			continue;
+
+		dmu_object_info_from_db(db, &doi);
+		if (doi.doi_bonus_size >= sizeof (*bt))
+			bt = ztest_bt_bonus(db);
+
+		if (bt && bt->bt_magic == BT_MAGIC) {
+			ztest_bt_verify(bt, os, obj, doi.doi_dnodesize,
+			    bt->bt_offset, bt->bt_gen, bt->bt_txg,
+			    bt->bt_crtxg);
+			ztest_verify_unused_bonus(db, bt, obj, os, bt->bt_gen);
+		}
+
+		dmu_buf_rele(db, FTAG);
+	}
+}
+
 /* ARGSUSED */
 void
 ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
@@ -5317,7 +5449,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
 	blocksize = MIN(blocksize, 2048);	/* because we write so many */
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
-	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
+	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
 		umem_free(od, sizeof (ztest_od_t));
@@ -6194,7 +6326,7 @@ ztest_freeze(void)
 	    numloops++ < ztest_opts.zo_maxloops &&
 	    metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) {
 		ztest_od_t od;
-		ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
+		ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
 		VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE));
 		ztest_io(zd, od.od_object,
 		    ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
diff --git a/configure.ac b/configure.ac
index 99c25bc27..9264d6bc6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -230,6 +230,7 @@ AC_CONFIG_FILES([
 	tests/zfs-tests/tests/functional/devices/Makefile
 	tests/zfs-tests/tests/functional/exec/Makefile
 	tests/zfs-tests/tests/functional/features/async_destroy/Makefile
+	tests/zfs-tests/tests/functional/features/large_dnode/Makefile
 	tests/zfs-tests/tests/functional/features/Makefile
 	tests/zfs-tests/tests/functional/grow_pool/Makefile
 	tests/zfs-tests/tests/functional/grow_replicas/Makefile
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index 64d28334e..74001d8f3 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -334,10 +334,19 @@ typedef struct dmu_buf {
  */
 uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
+uint64_t dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot,
+    int blocksize, dmu_object_type_t bonus_type, int bonus_len,
+    int dnodesize, dmu_tx_t *tx);
 int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
+int dmu_object_claim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
+    int blocksize, dmu_object_type_t bonus_type, int bonus_len,
+    int dnodesize, dmu_tx_t *tx);
 int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *txp);
+int dmu_object_reclaim_dnsize(objset_t *os, uint64_t object,
+    dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype,
+    int bonuslen, int dnodesize, dmu_tx_t *txp);
 
 /*
  * Free an object from this objset.
@@ -756,6 +765,7 @@ typedef struct dmu_object_info {
 	uint8_t doi_compress;
 	uint8_t doi_nblkptr;
 	uint8_t doi_pad[4];
+	uint64_t doi_dnodesize;
 	uint64_t doi_physical_blocks_512;	/* data + metadata, 512b blks */
 	uint64_t doi_max_offset;
 	uint64_t doi_fill_count;		/* number of non-empty blocks */
@@ -797,6 +807,8 @@ void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
 void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
     u_longlong_t *nblk512);
 
+void dmu_object_dnsize_from_db(dmu_buf_t *db, int *dnsize);
+
 typedef struct dmu_objset_stats {
 	uint64_t dds_num_clones; /* number of clones of this */
 	uint64_t dds_creation_txg;
@@ -854,6 +866,7 @@ extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
 extern void dmu_objset_name(objset_t *os, char *buf);
 extern dmu_objset_type_t dmu_objset_type(objset_t *os);
 extern uint64_t dmu_objset_id(objset_t *os);
+extern uint64_t dmu_objset_dnodesize(objset_t *os);
 extern zfs_sync_type_t dmu_objset_syncprop(objset_t *os);
 extern zfs_logbias_op_t dmu_objset_logbias(objset_t *os);
 extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
index 0f03699f1..81bb89bfc 100644
--- a/include/sys/dmu_objset.h
+++ b/include/sys/dmu_objset.h
@@ -88,6 +88,7 @@ struct objset {
 	list_node_t os_evicting_node;
 
 	/* can change, under dsl_dir's locks: */
+	uint64_t os_dnodesize; /* default dnode size for new objects */
 	enum zio_checksum os_checksum;
 	enum zio_compress os_compress;
 	uint8_t os_copies;
diff --git a/include/sys/dnode.h b/include/sys/dnode.h
index c5250d51a..cee4ea783 100644
--- a/include/sys/dnode.h
+++ b/include/sys/dnode.h
@@ -79,11 +79,18 @@ extern "C" {
 /*
  * Derived constants.
  */
-#define	DNODE_SIZE	(1 << DNODE_SHIFT)
-#define	DN_MAX_NBLKPTR	((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
-#define	DN_MAX_BONUSLEN	(DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
+#define	DNODE_MIN_SIZE		(1 << DNODE_SHIFT)
+#define	DNODE_MAX_SIZE		(1 << DNODE_BLOCK_SHIFT)
+#define	DNODE_BLOCK_SIZE	(1 << DNODE_BLOCK_SHIFT)
+#define	DNODE_MIN_SLOTS		(DNODE_MIN_SIZE >> DNODE_SHIFT)
+#define	DNODE_MAX_SLOTS		(DNODE_MAX_SIZE >> DNODE_SHIFT)
+#define	DN_BONUS_SIZE(dnsize)	((dnsize) - DNODE_CORE_SIZE - \
+	(1 << SPA_BLKPTRSHIFT))
+#define	DN_SLOTS_TO_BONUSLEN(slots)	DN_BONUS_SIZE((slots) << DNODE_SHIFT)
+#define	DN_OLD_MAX_BONUSLEN	(DN_BONUS_SIZE(DNODE_MIN_SIZE))
+#define	DN_MAX_NBLKPTR	((DNODE_MIN_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
 #define	DN_MAX_OBJECT	(1ULL << DN_MAX_OBJECT_SHIFT)
-#define	DN_ZERO_BONUSLEN	(DN_MAX_BONUSLEN + 1)
+#define	DN_ZERO_BONUSLEN	(DN_BONUS_SIZE(DNODE_MAX_SIZE) + 1)
 #define	DN_KILL_SPILLBLK (1)
 
 #define	DNODES_PER_BLOCK_SHIFT	(DNODE_BLOCK_SHIFT - DNODE_SHIFT)
@@ -131,7 +138,8 @@ typedef struct dnode_phys {
 	uint8_t dn_flags;		/* DNODE_FLAG_* */
 	uint16_t dn_datablkszsec;	/* data block size in 512b sectors */
 	uint16_t dn_bonuslen;		/* length of dn_bonus */
-	uint8_t dn_pad2[4];
+	uint8_t dn_extra_slots;		/* # of subsequent slots consumed */
+	uint8_t dn_pad2[3];
 
 	/* accounting is protected by dn_dirty_mtx */
 	uint64_t dn_maxblkid;		/* largest allocated block ID */
@@ -140,8 +148,11 @@ typedef struct dnode_phys {
 	uint64_t dn_pad3[4];
 
 	/*
-	 * The tail region is 448 bytes, and there are three ways to
-	 * look at it.
+	 * The tail region is 448 bytes for a 512 byte dnode, and
+	 * correspondingly larger for larger dnode sizes. The spill
+	 * block pointer, when present, is always at the end of the tail
+	 * region. There are three ways this space may be used, using
+	 * a 512 byte dnode for this diagram:
 	 *
 	 * 0       64      128     192     256     320     384     448 (offset)
 	 * +---------------+---------------+---------------+-------+
@@ -149,23 +160,27 @@ typedef struct dnode_phys {
 	 * +---------------+---------------+---------------+-------+
 	 * | dn_blkptr[0]  | dn_bonus[0..319]                      |
 	 * +---------------+-----------------------+---------------+
-	 * | dn_blkptr[0]  | /                     | dn_spill      |
+	 * | dn_blkptr[0]  | dn_bonus[0..191]      | dn_spill      |
 	 * +---------------+-----------------------+---------------+
 	 */
 	union {
-		blkptr_t dn_blkptr[1+DN_MAX_BONUSLEN/sizeof (blkptr_t)];
+		blkptr_t dn_blkptr[1+DN_OLD_MAX_BONUSLEN/sizeof (blkptr_t)];
 		struct {
 			blkptr_t __dn_ignore1;
-			uint8_t dn_bonus[DN_MAX_BONUSLEN];
+			uint8_t dn_bonus[DN_OLD_MAX_BONUSLEN];
 		};
 		struct {
 			blkptr_t __dn_ignore2;
-			uint8_t __dn_ignore3[DN_MAX_BONUSLEN-sizeof (blkptr_t)];
+			uint8_t __dn_ignore3[DN_OLD_MAX_BONUSLEN -
+			    sizeof (blkptr_t)];
 			blkptr_t dn_spill;
 		};
 	};
 } dnode_phys_t;
 
+#define	DN_SPILL_BLKPTR(dnp)	(blkptr_t *)((char *)(dnp) + \
+	(((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT))
+
 typedef struct dnode {
 	/*
 	 * Protects the structure of the dnode, including the number of levels
@@ -202,6 +217,7 @@ typedef struct dnode {
 	uint32_t dn_datablksz;		/* in bytes */
 	uint64_t dn_maxblkid;
 	uint8_t dn_next_type[TXG_SIZE];
+	uint8_t dn_num_slots;		/* metadnode slots consumed on disk */
 	uint8_t dn_next_nblkptr[TXG_SIZE];
 	uint8_t dn_next_nlevels[TXG_SIZE];
 	uint8_t dn_next_indblkshift[TXG_SIZE];
@@ -299,7 +315,7 @@ void dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx);
 
 int dnode_hold(struct objset *dd, uint64_t object,
     void *ref, dnode_t **dnp);
-int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
+int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, int dn_slots,
     void *ref, dnode_t **dnp);
 boolean_t dnode_add_ref(dnode_t *dn, void *ref);
 void dnode_rele(dnode_t *dn, void *ref);
@@ -307,9 +323,9 @@ void dnode_rele_and_unlock(dnode_t *dn, void *tag);
 void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
 void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
 void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
-    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+    dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
 void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
-    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+    dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
 void dnode_free(dnode_t *dn, dmu_tx_t *tx);
 void dnode_byteswap(dnode_phys_t *dnp);
 void dnode_buf_byteswap(void *buf, size_t size);
diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h
index a596642e3..f3e64a772 100644
--- a/include/sys/dsl_dataset.h
+++ b/include/sys/dsl_dataset.h
@@ -92,6 +92,13 @@ struct dsl_pool;
 #define	DS_FIELD_LARGE_BLOCKS "org.open-zfs:large_blocks"
 
 /*
+ * This field is present (with value=0) if this dataset may contain large
+ * dnodes (>512B).  If it is present, then this dataset is counted in the
+ * refcount of the SPA_FEATURE_LARGE_DNODE feature.
+ */
+#define	DS_FIELD_LARGE_DNODE "org.zfsonlinux:large_dnode"
+
+/*
  * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
  * name lookups should be performed case-insensitively.
  */
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 9de50e9a2..0935dca77 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -137,6 +137,7 @@ typedef enum {
 	ZFS_PROP_DEDUP,
 	ZFS_PROP_MLSLABEL,
 	ZFS_PROP_SYNC,
+	ZFS_PROP_DNODESIZE,
 	ZFS_PROP_REFRATIO,
 	ZFS_PROP_WRITTEN,
 	ZFS_PROP_CLONES,
@@ -204,6 +205,7 @@ typedef enum {
 	ZPOOL_PROP_LEAKED,
 	ZPOOL_PROP_MAXBLOCKSIZE,
 	ZPOOL_PROP_TNAME,
+	ZPOOL_PROP_MAXDNODESIZE,
 	ZPOOL_NUM_PROPS
 } zpool_prop_t;
 
@@ -362,6 +364,16 @@ typedef enum {
 } zfs_xattr_type_t;
 
 typedef enum {
+	ZFS_DNSIZE_LEGACY = 0,
+	ZFS_DNSIZE_AUTO = 1,
+	ZFS_DNSIZE_1K = 1024,
+	ZFS_DNSIZE_2K = 2048,
+	ZFS_DNSIZE_4K = 4096,
+	ZFS_DNSIZE_8K = 8192,
+	ZFS_DNSIZE_16K = 16384
+} zfs_dnsize_type_t;
+
+typedef enum {
 	ZFS_REDUNDANT_METADATA_ALL,
 	ZFS_REDUNDANT_METADATA_MOST
 } zfs_redundant_metadata_type_t;
diff --git a/include/sys/sa_impl.h b/include/sys/sa_impl.h
index 6f2f1db6d..b68b7610b 100644
--- a/include/sys/sa_impl.h
+++ b/include/sys/sa_impl.h
@@ -235,7 +235,7 @@ struct sa_handle {
 #define	SA_BONUSTYPE_FROM_DB(db) \
 	(dmu_get_bonustype((dmu_buf_t *)db))
 
-#define	SA_BLKPTR_SPACE	(DN_MAX_BONUSLEN - sizeof (blkptr_t))
+#define	SA_BLKPTR_SPACE	(DN_OLD_MAX_BONUSLEN - sizeof (blkptr_t))
 
 #define	SA_LAYOUT_NUM(x, type) \
 	((!IS_SA_BONUSTYPE(type) ? 0 : (((IS_SA_BONUSTYPE(type)) && \
diff --git a/include/sys/spa.h b/include/sys/spa.h
index 7c09caf6c..17bf76de8 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -822,6 +822,7 @@ extern boolean_t spa_is_root(spa_t *spa);
 extern boolean_t spa_writeable(spa_t *spa);
 extern boolean_t spa_has_pending_synctask(spa_t *spa);
 extern int spa_maxblocksize(spa_t *spa);
+extern int spa_maxdnodesize(spa_t *spa);
 extern void zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp);
 
 extern int spa_mode(spa_t *spa);
diff --git a/include/sys/zap.h b/include/sys/zap.h
index ed60b86db..70cbaae97 100644
--- a/include/sys/zap.h
+++ b/include/sys/zap.h
@@ -129,16 +129,30 @@ typedef enum zap_flags {
  *     MT_FIRST/MT_BEST matching will find entries that match without
  *     regard to case (eg. looking for "foo" can find an entry "Foo").
  * Eventually, other flags will permit unicode normalization as well.
+ *
+ * dnodesize specifies the on-disk size of the dnode for the new zapobj.
+ * Valid values are multiples of 512 up to DNODE_MAX_SIZE.
  */
 uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+uint64_t zap_create_dnsize(objset_t *ds, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
 uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+uint64_t zap_create_norm_dnsize(objset_t *ds, int normflags,
+    dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,
+    int dnodesize, dmu_tx_t *tx);
 uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+uint64_t zap_create_flags_dnsize(objset_t *os, int normflags,
+    zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift,
+    int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
+    int dnodesize, dmu_tx_t *tx);
 uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,
     uint64_t parent_obj, const char *name, dmu_tx_t *tx);
+uint64_t zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot,
+    uint64_t parent_obj, const char *name, int dnodesize, dmu_tx_t *tx);
 
 /*
  * Initialize an already-allocated object.
@@ -152,9 +166,14 @@ void mzap_create_impl(objset_t *os, uint64_t obj, int normflags,
  */
 int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+int zap_create_claim_dnsize(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
 int zap_create_claim_norm(objset_t *ds, uint64_t obj,
     int normflags, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+int zap_create_claim_norm_dnsize(objset_t *ds, uint64_t obj,
+    int normflags, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
 
 /*
  * The zapobj passed in must be a valid ZAP object for all of the
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index ac70f690e..301f83232 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -98,6 +98,7 @@ typedef enum drr_headertype {
 #define	DMU_BACKUP_FEATURE_EMBED_DATA_LZ4	(1<<17)
 /* flag #18 is reserved for a Delphix feature */
 #define	DMU_BACKUP_FEATURE_LARGE_BLOCKS		(1<<19)
+#define	DMU_BACKUP_FEATURE_LARGE_DNODE		(1<<20)
 
 /*
  * Mask of all supported backup features
@@ -105,7 +106,7 @@ typedef enum drr_headertype {
 #define	DMU_BACKUP_FEATURE_MASK	(DMU_BACKUP_FEATURE_DEDUP | \
     DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
     DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 | \
-    DMU_BACKUP_FEATURE_LARGE_BLOCKS)
+    DMU_BACKUP_FEATURE_LARGE_BLOCKS | DMU_BACKUP_FEATURE_LARGE_DNODE)
 
 /* Are all features in the given flag word currently supported? */
 #define	DMU_STREAM_SUPPORTED(x)	(!((x) & ~DMU_BACKUP_FEATURE_MASK))
@@ -173,7 +174,8 @@ typedef struct dmu_replay_record {
 			uint32_t drr_bonuslen;
 			uint8_t drr_checksumtype;
 			uint8_t drr_compress;
-			uint8_t drr_pad[6];
+			uint8_t drr_dn_slots;
+			uint8_t drr_pad[5];
 			uint64_t drr_toguid;
 			/* bonus content follows */
 		} drr_object;
diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
index 43b591915..2c09feee1 100644
--- a/include/sys/zfs_znode.h
+++ b/include/sys/zfs_znode.h
@@ -196,6 +196,7 @@ typedef struct znode {
 	uint_t		z_blksz;	/* block size in bytes */
 	uint_t		z_seq;		/* modification sequence number */
 	uint64_t	z_mapcnt;	/* number of pages mapped to file */
+	uint64_t	z_dnodesize;	/* dnode size */
 	uint64_t	z_size;		/* file size (cached) */
 	uint64_t	z_links;	/* file links (cached) */
 	uint64_t	z_pflags;	/* pflags (cached) */
diff --git a/include/sys/zil.h b/include/sys/zil.h
index 65b14f1cd..1f04d1833 100644
--- a/include/sys/zil.h
+++ b/include/sys/zil.h
@@ -173,6 +173,19 @@ typedef enum zil_create {
 	(txtype) == TX_WRITE2)
 
 /*
+ * The number of dnode slots consumed by the object is stored in the 8
+ * unused upper bits of the object ID. We subtract 1 from the value
+ * stored on disk for compatibility with implementations that don't
+ * support large dnodes. The slot count for a single-slot dnode will
+ * contain 0 for those bits to preserve the log record format for
+ * "small" dnodes.
+ */
+#define	LR_FOID_GET_SLOTS(oid) (BF64_GET((oid), 56, 8) + 1)
+#define	LR_FOID_SET_SLOTS(oid, x) BF64_SET((oid), 56, 8, (x) - 1)
+#define	LR_FOID_GET_OBJ(oid) BF64_GET((oid), 0, DN_MAX_OBJECT_SHIFT)
+#define	LR_FOID_SET_OBJ(oid, x) BF64_SET((oid), 0, DN_MAX_OBJECT_SHIFT, (x))
+
+/*
  * Format of log records.
  * The fields are carefully defined to allow them to be aligned
  * and sized the same on sparc & intel architectures.
diff --git a/include/zfeature_common.h b/include/zfeature_common.h
index d481a28a8..41cfdf807 100644
--- a/include/zfeature_common.h
+++ b/include/zfeature_common.h
@@ -50,6 +50,7 @@ typedef enum spa_feature {
 	SPA_FEATURE_BOOKMARKS,
 	SPA_FEATURE_FS_SS_LIMIT,
 	SPA_FEATURE_LARGE_BLOCKS,
+	SPA_FEATURE_LARGE_DNODE,
 	SPA_FEATURES
 } spa_feature_t;
 
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index 87f79a532..07f2e75a6 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -1473,6 +1473,7 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
 
 	case ERANGE:
 		if (prop == ZFS_PROP_COMPRESSION ||
+		    prop == ZFS_PROP_DNODESIZE ||
 		    prop == ZFS_PROP_RECORDSIZE) {
 			(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "property setting is not allowed on "
diff --git a/man/man5/zpool-features.5 b/man/man5/zpool-features.5
index 6d74c9a78..fa04d6e81 100644
--- a/man/man5/zpool-features.5
+++ b/man/man5/zpool-features.5
@@ -432,5 +432,30 @@ set larger than 128KB, and will return to being \fBenabled\fR once all
 filesystems that have ever had their recordsize larger than 128KB are destroyed.
 .RE
 
+.sp
+.ne 2
+.na
+\fB\fBlarge_dnode\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID	org.zfsonlinux:large_dnode
+READ\-ONLY COMPATIBLE	no
+DEPENDENCIES	extensible_dataset
+.TE
+
+The \fBlarge_dnode\fR feature allows the size of dnodes in a dataset to be
+set larger than 512B.
+
+This feature becomes \fBactive\fR once a dataset contains an object with
+a dnode larger than 512B, which occurs as a result of setting the
+\fBdnodesize\fR dataset property to a value other than \fBlegacy\fR. The
+feature will return to being \fBenabled\fR once all filesystems that
+have ever contained a dnode larger than 512B are destroyed. Large dnodes
+allow more data to be stored in the bonus buffer, thus potentially
+improving performance by avoiding the use of spill blocks.
+.RE
+
 .SH "SEE ALSO"
 \fBzpool\fR(8)
diff --git a/man/man8/zfs.8 b/man/man8/zfs.8
index 4e3fa54a8..2aca58b76 100644
--- a/man/man8/zfs.8
+++ b/man/man8/zfs.8
@@ -910,6 +910,35 @@ The values \fBon\fR and \fBoff\fR are equivalent to the \fBdev\fR and \fBnodev\f
 .sp
 .ne 2
 .na
+\fB\fBdnodesize\fR=\fBlegacy\fR | \fBauto\fR | \fB1k\fR | \fB2k\fR | \fB4k\fR | \fB8k\fR | \fB16k\fR\fR
+.ad
+.sp .6
+.RS 4n
+Specifies a compatibility mode or literal value for the size of dnodes
+in the file system. The default value is \fBlegacy\fR. Setting this
+property to a value other than \fBlegacy\fR requires the
+\fBlarge_dnode\fR pool feature to be enabled.
+.sp
+Consider setting \fBdnodesize\fR to \fBauto\fR if the dataset uses the
+\fBxattr=sa\fR property setting and the workload makes heavy use of
+extended attributes. This may be applicable to SELinux-enabled systems,
+Lustre servers, and Samba servers, for example. Literal values are
+supported for cases where the optimal size is known in advance and for
+performance testing.
+.sp
+Leave \fBdnodesize\fR set to \fBlegacy\fR if you need to receive
+a \fBzfs send\fR stream of this dataset on a pool that doesn't enable
+the \fBlarge_dnode\fR feature, or if you need to import this pool on a
+system that doesn't support the \fBlarge_dnode\fR feature.
+.sp
+This property can also be referred to by its shortened column name,
+\fBdnsize\fR.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
 \fB\fBexec\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c
index 76d564b43..1dbeab084 100644
--- a/module/zcommon/zfs_prop.c
+++ b/module/zcommon/zfs_prop.c
@@ -211,6 +211,17 @@ zfs_prop_init(void)
 		{ NULL }
 	};
 
+	static zprop_index_t dnsize_table[] = {
+		{ "legacy",	ZFS_DNSIZE_LEGACY },
+		{ "auto",	ZFS_DNSIZE_AUTO },
+		{ "1k",		ZFS_DNSIZE_1K },
+		{ "2k",		ZFS_DNSIZE_2K },
+		{ "4k",		ZFS_DNSIZE_4K },
+		{ "8k",		ZFS_DNSIZE_8K },
+		{ "16k",	ZFS_DNSIZE_16K },
+		{ NULL }
+	};
+
 	static zprop_index_t redundant_metadata_table[] = {
 		{ "all",	ZFS_REDUNDANT_METADATA_ALL },
 		{ "most",	ZFS_REDUNDANT_METADATA_MOST },
@@ -271,6 +282,9 @@ zfs_prop_init(void)
 	zprop_register_index(ZFS_PROP_XATTR, "xattr", ZFS_XATTR_DIR,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
 	    "on | off | dir | sa", "XATTR", xattr_table);
+	zprop_register_index(ZFS_PROP_DNODESIZE, "dnodesize",
+	    ZFS_DNSIZE_LEGACY, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
+	    "legacy | auto | 1k | 2k | 4k | 8k | 16k", "DNSIZE", dnsize_table);
 
 	/* inherit index (boolean) properties */
 	zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c
index 910c56dcc..4a5836e5b 100644
--- a/module/zcommon/zpool_prop.c
+++ b/module/zcommon/zpool_prop.c
@@ -135,6 +135,8 @@ zpool_prop_init(void)
 	    PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXBLOCKSIZE");
 	zprop_register_hidden(ZPOOL_PROP_TNAME, "tname", PROP_TYPE_STRING,
 	    PROP_ONETIME, ZFS_TYPE_POOL, "TNAME");
+	zprop_register_hidden(ZPOOL_PROP_MAXDNODESIZE, "maxdnodesize",
+	    PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXDNODESIZE");
 }
 
 /*
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 126748994..4bbbd0525 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -478,7 +478,6 @@ dbuf_verify(dmu_buf_impl_t *db)
 		ASSERT3U(db->db.db_offset, ==, DMU_BONUS_BLKID);
 	} else if (db->db_blkid == DMU_SPILL_BLKID) {
 		ASSERT(dn != NULL);
-		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
 		ASSERT0(db->db.db_offset);
 	} else {
 		ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
@@ -730,13 +729,18 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 	ASSERT(db->db_buf == NULL);
 
 	if (db->db_blkid == DMU_BONUS_BLKID) {
+		/*
+		 * The bonus length stored in the dnode may be less than
+		 * the maximum available space in the bonus buffer.
+		 */
 		int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
+		int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
 
 		ASSERT3U(bonuslen, <=, db->db.db_size);
-		db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
-		arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
-		if (bonuslen < DN_MAX_BONUSLEN)
-			bzero(db->db.db_data, DN_MAX_BONUSLEN);
+		db->db.db_data = zio_buf_alloc(max_bonuslen);
+		arc_space_consume(max_bonuslen, ARC_SPACE_OTHER);
+		if (bonuslen < max_bonuslen)
+			bzero(db->db.db_data, max_bonuslen);
 		if (bonuslen)
 			bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
 		DB_DNODE_EXIT(db);
@@ -962,9 +966,11 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
 	ASSERT(dr->dr_txg >= txg - 2);
 	if (db->db_blkid == DMU_BONUS_BLKID) {
 		/* Note that the data bufs here are zio_bufs */
-		dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN);
-		arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
-		bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN);
+		dnode_t *dn = DB_DNODE(db);
+		int bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
+		dr->dt.dl.dr_data = zio_buf_alloc(bonuslen);
+		arc_space_consume(bonuslen, ARC_SPACE_OTHER);
+		bcopy(db->db.db_data, dr->dt.dl.dr_data, bonuslen);
 	} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
 		int size = db->db.db_size;
 		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
@@ -1858,8 +1864,10 @@ dbuf_clear(dmu_buf_impl_t *db)
 	if (db->db_state == DB_CACHED) {
 		ASSERT(db->db.db_data != NULL);
 		if (db->db_blkid == DMU_BONUS_BLKID) {
-			zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN);
-			arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
+			int slots = DB_DNODE(db)->dn_num_slots;
+			int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
+			zio_buf_free(db->db.db_data, bonuslen);
+			arc_space_return(bonuslen, ARC_SPACE_OTHER);
 		}
 		db->db.db_data = NULL;
 		db->db_state = DB_UNCACHED;
@@ -1929,7 +1937,7 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
 		mutex_enter(&dn->dn_mtx);
 		if (dn->dn_have_spill &&
 		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
-			*bpp = &dn->dn_phys->dn_spill;
+			*bpp = DN_SPILL_BLKPTR(dn->dn_phys);
 		else
 			*bpp = NULL;
 		dbuf_add_ref(dn->dn_dbuf, NULL);
@@ -2018,7 +2026,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
 
 	if (blkid == DMU_BONUS_BLKID) {
 		ASSERT3P(parent, ==, dn->dn_dbuf);
-		db->db.db_size = DN_MAX_BONUSLEN -
+		db->db.db_size = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots) -
 		    (dn->dn_nblkptr-1) * sizeof (blkptr_t);
 		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
 		db->db.db_offset = DMU_BONUS_BLKID;
@@ -2810,7 +2818,7 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
 		return;
 
 	if (db->db_blkid == DMU_SPILL_BLKID) {
-		db->db_blkptr = &dn->dn_phys->dn_spill;
+		db->db_blkptr = DN_SPILL_BLKPTR(dn->dn_phys);
 		BP_ZERO(db->db_blkptr);
 		return;
 	}
@@ -2950,13 +2958,16 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 
 		ASSERT(*datap != NULL);
 		ASSERT0(db->db_level);
-		ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN);
+		ASSERT3U(dn->dn_phys->dn_bonuslen, <=,
+		    DN_SLOTS_TO_BONUSLEN(dn->dn_phys->dn_extra_slots + 1));
 		bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
 		DB_DNODE_EXIT(db);
 
 		if (*datap != db->db.db_data) {
-			zio_buf_free(*datap, DN_MAX_BONUSLEN);
-			arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
+			int slots = DB_DNODE(db)->dn_num_slots;
+			int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
+			zio_buf_free(*datap, bonuslen);
+			arc_space_return(bonuslen, ARC_SPACE_OTHER);
 		}
 		db->db_data_pending = NULL;
 		drp = &db->db_last_dirty;
@@ -3107,7 +3118,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
 	if (db->db_blkid == DMU_SPILL_BLKID) {
 		ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
 		ASSERT(!(BP_IS_HOLE(bp)) &&
-		    db->db_blkptr == &dn->dn_phys->dn_spill);
+		    db->db_blkptr == DN_SPILL_BLKPTR(dn->dn_phys));
 	}
 #endif
 
@@ -3119,11 +3130,16 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
 		mutex_exit(&dn->dn_mtx);
 
 		if (dn->dn_type == DMU_OT_DNODE) {
-			dnode_phys_t *dnp = db->db.db_data;
-			for (i = db->db.db_size >> DNODE_SHIFT; i > 0;
-			    i--, dnp++) {
-				if (dnp->dn_type != DMU_OT_NONE)
+			i = 0;
+			while (i < db->db.db_size) {
+				dnode_phys_t *dnp = db->db.db_data + i;
+
+				i += DNODE_MIN_SIZE;
+				if (dnp->dn_type != DMU_OT_NONE) {
 					fill++;
+					i += dnp->dn_extra_slots *
+					    DNODE_MIN_SIZE;
+				}
 			}
 		} else {
 			if (BP_IS_HOLE(bp)) {
@@ -3270,7 +3286,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
 		dn = DB_DNODE(db);
 		ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
 		ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
-		    db->db_blkptr == &dn->dn_phys->dn_spill);
+		    db->db_blkptr == DN_SPILL_BLKPTR(dn->dn_phys));
 		DB_DNODE_EXIT(db);
 	}
 #endif
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index a423264c6..e1dfb41ff 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -180,7 +180,7 @@ dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
 int
 dmu_bonus_max(void)
 {
-	return (DN_MAX_BONUSLEN);
+	return (DN_OLD_MAX_BONUSLEN);
 }
 
 int
@@ -1853,6 +1853,7 @@ __dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
 	doi->doi_type = dn->dn_type;
 	doi->doi_bonus_type = dn->dn_bonustype;
 	doi->doi_bonus_size = dn->dn_bonuslen;
+	doi->doi_dnodesize = dn->dn_num_slots << DNODE_SHIFT;
 	doi->doi_indirection = dn->dn_nlevels;
 	doi->doi_checksum = dn->dn_checksum;
 	doi->doi_compress = dn->dn_compress;
@@ -1924,9 +1925,21 @@ dmu_object_size_from_db(dmu_buf_t *db_fake, uint32_t *blksize,
 	dn = DB_DNODE(db);
 
 	*blksize = dn->dn_datablksz;
-	/* add 1 for dnode space */
+	/* add in number of slots used for the dnode itself */
 	*nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >>
-	    SPA_MINBLOCKSHIFT) + 1;
+	    SPA_MINBLOCKSHIFT) + dn->dn_num_slots;
+	DB_DNODE_EXIT(db);
+}
+
+void
+dmu_object_dnsize_from_db(dmu_buf_t *db_fake, int *dnsize)
+{
+	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+	dnode_t *dn;
+
+	DB_DNODE_ENTER(db);
+	dn = DB_DNODE(db);
+	*dnsize = dn->dn_num_slots << DNODE_SHIFT;
 	DB_DNODE_EXIT(db);
 }
 
@@ -2020,6 +2033,7 @@ EXPORT_SYMBOL(dmu_object_info);
 EXPORT_SYMBOL(dmu_object_info_from_dnode);
 EXPORT_SYMBOL(dmu_object_info_from_db);
 EXPORT_SYMBOL(dmu_object_size_from_db);
+EXPORT_SYMBOL(dmu_object_dnsize_from_db);
 EXPORT_SYMBOL(dmu_object_set_blocksize);
 EXPORT_SYMBOL(dmu_object_set_checksum);
 EXPORT_SYMBOL(dmu_object_set_compress);
diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
index a5a53418b..e54043fc3 100644
--- a/module/zfs/dmu_object.c
+++ b/module/zfs/dmu_object.c
@@ -30,28 +30,55 @@
 #include <sys/dnode.h>
 #include <sys/zap.h>
 #include <sys/zfeature.h>
+#include <sys/dsl_dataset.h>
 
 uint64_t
 dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
+	return dmu_object_alloc_dnsize(os, ot, blocksize, bonustype, bonuslen,
+	    0, tx);
+}
+
+uint64_t
+dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
 	uint64_t object;
 	uint64_t L1_dnode_count = DNODES_PER_BLOCK <<
 	    (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT);
 	dnode_t *dn = NULL;
+	int dn_slots = dnodesize >> DNODE_SHIFT;
+	boolean_t restarted = B_FALSE;
+
+	if (dn_slots == 0) {
+		dn_slots = DNODE_MIN_SLOTS;
+	} else {
+		ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS);
+		ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS);
+	}
 
 	mutex_enter(&os->os_obj_lock);
 	for (;;) {
 		object = os->os_obj_next;
 		/*
 		 * Each time we polish off a L1 bp worth of dnodes (2^12
-		 * objects), move to another L1 bp that's still reasonably
-		 * sparse (at most 1/4 full). Look from the beginning at most
-		 * once per txg, but after that keep looking from here.
+		 * objects), move to another L1 bp that's still
+		 * reasonably sparse (at most 1/4 full). Look from the
+		 * beginning at most once per txg. If we still can't
+		 * allocate from that L1 block, search for an empty L0
+		 * block, which will quickly skip to the end of the
+		 * metadnode if the no nearby L0 blocks are empty. This
+		 * fallback avoids a pathology where full dnode blocks
+		 * containing large dnodes appear sparse because they
+		 * have a low blk_fill, leading to many failed
+		 * allocation attempts. In the long term a better
+		 * mechanism to search for sparse metadnode regions,
+		 * such as spacemaps, could be implemented.
+		 *
 		 * os_scan_dnodes is set during txg sync if enough objects
 		 * have been freed since the previous rescan to justify
-		 * backfilling again. If we can't find a suitable block, just
-		 * keep going from here.
+		 * backfilling again.
 		 *
 		 * Note that dmu_traverse depends on the behavior that we use
 		 * multiple blocks of the dnode object before going back to
@@ -59,9 +86,10 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
 		 * that property or find another solution to the issues
 		 * described in traverse_visitbp.
 		 */
-
 		if (P2PHASE(object, L1_dnode_count) == 0) {
 			uint64_t offset;
+			uint64_t blkfill;
+			int minlvl;
 			int error;
 			if (os->os_rescan_dnodes) {
 				offset = 0;
@@ -69,13 +97,15 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
 			} else {
 				offset = object << DNODE_SHIFT;
 			}
+			blkfill = restarted ? 1 : DNODES_PER_BLOCK >> 2;
+			minlvl = restarted ? 1 : 2;
+			restarted = B_TRUE;
 			error = dnode_next_offset(DMU_META_DNODE(os),
-			    DNODE_FIND_HOLE,
-			    &offset, 2, DNODES_PER_BLOCK >> 2, 0);
+			    DNODE_FIND_HOLE, &offset, minlvl, blkfill, 0);
 			if (error == 0)
 				object = offset >> DNODE_SHIFT;
 		}
-		os->os_obj_next = ++object;
+		os->os_obj_next = object + dn_slots;
 
 		/*
 		 * XXX We should check for an i/o error here and return
@@ -83,16 +113,22 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
 		 * dmu_tx_assign(), but there is currently no mechanism
 		 * to do so.
 		 */
-		(void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
+		(void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, dn_slots,
 		    FTAG, &dn);
 		if (dn)
 			break;
 
 		if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
-			os->os_obj_next = object - 1;
+			os->os_obj_next = object;
+		else
+			/*
+			 * Skip to next known valid starting point for a dnode.
+			 */
+			os->os_obj_next = P2ROUNDUP(object + 1,
+			    DNODES_PER_BLOCK);
 	}
 
-	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
+	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, dn_slots, tx);
 	dnode_rele(dn, FTAG);
 
 	mutex_exit(&os->os_obj_lock);
@@ -105,16 +141,33 @@ int
 dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
+	return (dmu_object_claim_dnsize(os, object, ot, blocksize, bonustype,
+	    bonuslen, 0, tx));
+}
+
+int
+dmu_object_claim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
+    int blocksize, dmu_object_type_t bonustype, int bonuslen,
+    int dnodesize, dmu_tx_t *tx)
+{
 	dnode_t *dn;
+	int dn_slots = dnodesize >> DNODE_SHIFT;
 	int err;
 
+	if (dn_slots == 0)
+		dn_slots = DNODE_MIN_SLOTS;
+	ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS);
+	ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS);
+
 	if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
 		return (SET_ERROR(EBADF));
 
-	err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, dn_slots,
+	    FTAG, &dn);
 	if (err)
 		return (err);
-	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
+
+	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, dn_slots, tx);
 	dnode_rele(dn, FTAG);
 
 	dmu_tx_add_new_object(tx, os, object);
@@ -125,23 +178,34 @@ int
 dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
+	return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
+	    bonuslen, 0, tx));
+}
+
+int
+dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
+    int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize,
+    dmu_tx_t *tx)
+{
 	dnode_t *dn;
+	int dn_slots = dnodesize >> DNODE_SHIFT;
 	int err;
 
 	if (object == DMU_META_DNODE_OBJECT)
 		return (SET_ERROR(EBADF));
 
-	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
 	    FTAG, &dn);
 	if (err)
 		return (err);
 
-	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
+	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, tx);
 
 	dnode_rele(dn, FTAG);
 	return (err);
 }
 
+
 int
 dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
 {
@@ -150,7 +214,7 @@ dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
 
 	ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
 
-	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
 	    FTAG, &dn);
 	if (err)
 		return (err);
@@ -171,9 +235,30 @@ dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
 int
 dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
 {
-	uint64_t offset = (*objectp + 1) << DNODE_SHIFT;
+	uint64_t offset;
+	dmu_object_info_t doi;
+	struct dsl_dataset *ds = os->os_dsl_dataset;
+	int dnodesize;
 	int error;
 
+	/*
+	 * Avoid expensive dnode hold if this dataset doesn't use large dnodes.
+	 */
+	if (ds && ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE]) {
+		error = dmu_object_info(os, *objectp, &doi);
+		if (error && !(error == EINVAL && *objectp == 0))
+			return (SET_ERROR(error));
+		else
+			dnodesize = doi.doi_dnodesize;
+	} else {
+		dnodesize = DNODE_MIN_SIZE;
+	}
+
+	if (*objectp == 0)
+		offset = 1 << DNODE_SHIFT;
+	else
+		offset = (*objectp << DNODE_SHIFT) + dnodesize;
+
 	error = dnode_next_offset(DMU_META_DNODE(os),
 	    (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg);
 
@@ -235,8 +320,11 @@ dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx)
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(dmu_object_alloc);
+EXPORT_SYMBOL(dmu_object_alloc_dnsize);
 EXPORT_SYMBOL(dmu_object_claim);
+EXPORT_SYMBOL(dmu_object_claim_dnsize);
 EXPORT_SYMBOL(dmu_object_reclaim);
+EXPORT_SYMBOL(dmu_object_reclaim_dnsize);
 EXPORT_SYMBOL(dmu_object_free);
 EXPORT_SYMBOL(dmu_object_next);
 EXPORT_SYMBOL(dmu_object_zapify);
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 03b30dd3b..cdc897726 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -138,6 +138,12 @@ dmu_objset_id(objset_t *os)
 	return (ds ? ds->ds_object : 0);
 }
 
+uint64_t
+dmu_objset_dnodesize(objset_t *os)
+{
+	return (os->os_dnodesize);
+}
+
 zfs_sync_type_t
 dmu_objset_syncprop(objset_t *os)
 {
@@ -268,6 +274,34 @@ redundant_metadata_changed_cb(void *arg, uint64_t newval)
 }
 
 static void
+dnodesize_changed_cb(void *arg, uint64_t newval)
+{
+	objset_t *os = arg;
+
+	switch (newval) {
+	case ZFS_DNSIZE_LEGACY:
+		os->os_dnodesize = DNODE_MIN_SIZE;
+		break;
+	case ZFS_DNSIZE_AUTO:
+		/*
+		 * Choose a dnode size that will work well for most
+		 * workloads if the user specified "auto". Future code
+		 * improvements could dynamically select a dnode size
+		 * based on observed workload patterns.
+		 */
+		os->os_dnodesize = DNODE_MIN_SIZE * 2;
+		break;
+	case ZFS_DNSIZE_1K:
+	case ZFS_DNSIZE_2K:
+	case ZFS_DNSIZE_4K:
+	case ZFS_DNSIZE_8K:
+	case ZFS_DNSIZE_16K:
+		os->os_dnodesize = newval;
+		break;
+	}
+}
+
+static void
 logbias_changed_cb(void *arg, uint64_t newval)
 {
 	objset_t *os = arg;
@@ -421,6 +455,11 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 				    zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
 				    recordsize_changed_cb, os);
 			}
+			if (err == 0) {
+				err = dsl_prop_register(ds,
+				    zfs_prop_to_name(ZFS_PROP_DNODESIZE),
+				    dnodesize_changed_cb, os);
+			}
 		}
 		if (err != 0) {
 			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
@@ -439,6 +478,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 		os->os_sync = ZFS_SYNC_STANDARD;
 		os->os_primary_cache = ZFS_CACHE_ALL;
 		os->os_secondary_cache = ZFS_CACHE_ALL;
+		os->os_dnodesize = DNODE_MIN_SIZE;
 	}
 
 	if (ds == NULL || !ds->ds_is_snapshot)
@@ -768,8 +808,8 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 
 	mdn = DMU_META_DNODE(os);
 
-	dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
-	    DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
+	dnode_allocate(mdn, DMU_OT_DNODE, DNODE_BLOCK_SIZE, DN_MAX_INDBLKSHIFT,
+	    DMU_OT_NONE, 0, DNODE_MIN_SLOTS, tx);
 
 	/*
 	 * We don't want to have to increase the meta-dnode's nlevels
@@ -1202,7 +1242,7 @@ do_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
     uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
 {
 	if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
-		int64_t delta = DNODE_SIZE + used;
+		int64_t delta = DNODE_MIN_SIZE + used;
 		if (subtract)
 			delta = -delta;
 		VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT,
@@ -2023,6 +2063,7 @@ EXPORT_SYMBOL(dmu_objset_find);
 EXPORT_SYMBOL(dmu_objset_byteswap);
 EXPORT_SYMBOL(dmu_objset_evict_dbufs);
 EXPORT_SYMBOL(dmu_objset_snap_cmtime);
+EXPORT_SYMBOL(dmu_objset_dnodesize);
 
 EXPORT_SYMBOL(dmu_objset_sync);
 EXPORT_SYMBOL(dmu_objset_is_dirty);
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 896a84b50..901386a5a 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -445,6 +445,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
 	drro->drr_bonustype = dnp->dn_bonustype;
 	drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	drro->drr_bonuslen = dnp->dn_bonuslen;
+	drro->drr_dn_slots = dnp->dn_extra_slots + 1;
 	drro->drr_checksumtype = dnp->dn_checksum;
 	drro->drr_compress = dnp->dn_compress;
 	drro->drr_toguid = dsp->dsa_toguid;
@@ -570,7 +571,6 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
 	spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
 	dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
 	int err = 0;
-	dnode_phys_t *blk;
 	uint64_t dnobj;
 
 	ASSERT3U(zb->zb_level, >=, 0);
@@ -590,7 +590,8 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
 	} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
 		return (0);
 	} else if (type == DMU_OT_DNODE) {
-		int blksz = BP_GET_LSIZE(bp);
+		dnode_phys_t *blk;
+		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 		arc_flags_t aflags = ARC_FLAG_WAIT;
 		arc_buf_t *abuf;
 		int i;
@@ -603,8 +604,8 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
 			return (SET_ERROR(EIO));
 
 		blk = abuf->b_data;
-		dnobj = zb->zb_blkid * (blksz >> DNODE_SHIFT);
-		for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
+		dnobj = zb->zb_blkid * epb;
+		for (i = 0; i < epb; i += blk[i].dn_extra_slots + 1) {
 			err = dump_dnode(dsa, dnobj + i, blk + i);
 			if (err != 0)
 				break;
@@ -736,6 +737,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 
 	if (large_block_ok && to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS])
 		featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS;
+	if (to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE])
+		featureflags |= DMU_BACKUP_FEATURE_LARGE_DNODE;
 	if (embedok &&
 	    spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
 		featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
@@ -1252,6 +1255,15 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
 		return (SET_ERROR(ENOTSUP));
 
+	/*
+	 * The receiving code doesn't know how to translate large dnodes
+	 * to smaller ones, so the pool must have the LARGE_DNODE
+	 * feature enabled if the stream has LARGE_DNODE.
+	 */
+	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_DNODE) &&
+	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_DNODE))
+		return (SET_ERROR(ENOTSUP));
+
 	error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
 	if (error == 0) {
 		/* target fs already exists; recv into temp clone */
@@ -1658,7 +1670,8 @@ deduce_nblkptr(dmu_object_type_t bonus_type, uint64_t bonus_size)
 		return (1);
 	} else {
 		return (1 +
-		    ((DN_MAX_BONUSLEN - bonus_size) >> SPA_BLKPTRSHIFT));
+		    ((DN_OLD_MAX_BONUSLEN -
+		    MIN(DN_OLD_MAX_BONUSLEN, bonus_size)) >> SPA_BLKPTRSHIFT));
 	}
 }
 
@@ -1679,7 +1692,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 	    P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
 	    drro->drr_blksz < SPA_MINBLOCKSIZE ||
 	    drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(rwa->os)) ||
-	    drro->drr_bonuslen > DN_MAX_BONUSLEN) {
+	    drro->drr_bonuslen >
+	    DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(rwa->os)))) {
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -1719,9 +1733,10 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 
 	if (object == DMU_NEW_OBJECT) {
 		/* currently free, want to be allocated */
-		err = dmu_object_claim(rwa->os, drro->drr_object,
+		err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
-		    drro->drr_bonustype, drro->drr_bonuslen, tx);
+		    drro->drr_bonustype, drro->drr_bonuslen,
+		    drro->drr_dn_slots << DNODE_SHIFT, tx);
 	} else if (drro->drr_type != doi.doi_type ||
 	    drro->drr_blksz != doi.doi_data_block_size ||
 	    drro->drr_bonustype != doi.doi_bonus_type ||
@@ -1771,18 +1786,25 @@ receive_freeobjects(struct receive_writer_arg *rwa,
 	if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
 		return (SET_ERROR(EINVAL));
 
-	for (obj = drrfo->drr_firstobj;
+	for (obj = drrfo->drr_firstobj == 0 ? 1 : drrfo->drr_firstobj;
 	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
 	    (void) dmu_object_next(rwa->os, &obj, FALSE, 0)) {
+		dmu_object_info_t doi;
 		int err;
 
-		if (dmu_object_info(rwa->os, obj, NULL) != 0)
+		err = dmu_object_info(rwa->os, obj, &doi);
+		if (err == ENOENT) {
+			obj++;
 			continue;
+		} else if (err != 0) {
+			return (err);
+		}
 
 		err = dmu_free_long_object(rwa->os, obj);
 		if (err != 0)
 			return (err);
 	}
+
 	return (0);
 }
 
diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c
index bba9efe14..44ba74181 100644
--- a/module/zfs/dmu_traverse.c
+++ b/module/zfs/dmu_traverse.c
@@ -331,13 +331,13 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 			goto post;
 		cdnp = buf->b_data;
 
-		for (i = 0; i < epb; i++) {
+		for (i = 0; i < epb; i += cdnp[i].dn_extra_slots + 1) {
 			prefetch_dnode_metadata(td, &cdnp[i], zb->zb_objset,
 			    zb->zb_blkid * epb + i);
 		}
 
 		/* recursively visitbp() blocks below this */
-		for (i = 0; i < epb; i++) {
+		for (i = 0; i < epb; i += cdnp[i].dn_extra_slots + 1) {
 			err = traverse_dnode(td, &cdnp[i], zb->zb_objset,
 			    zb->zb_blkid * epb + i);
 			if (err != 0)
@@ -439,7 +439,7 @@ prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *dnp,
 
 	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
 		SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
-		traverse_prefetch_metadata(td, &dnp->dn_spill, &czb);
+		traverse_prefetch_metadata(td, DN_SPILL_BLKPTR(dnp), &czb);
 	}
 }
 
@@ -470,7 +470,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
 
 	if (err == 0 && (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
 		SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
-		err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb);
+		err = traverse_visitbp(td, dnp, DN_SPILL_BLKPTR(dnp), &czb);
 	}
 
 	if (err == 0 && (td->td_flags & TRAVERSE_POST)) {
diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
index 74e323dbd..ed29bfbc6 100644
--- a/module/zfs/dmu_tx.c
+++ b/module/zfs/dmu_tx.c
@@ -1586,7 +1586,7 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
 	} else {
 		blkptr_t *bp;
 
-		bp = &dn->dn_phys->dn_spill;
+		bp = DN_SPILL_BLKPTR(dn->dn_phys);
 		if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
 		    bp, bp->blk_birth))
 			txh->txh_space_tooverwrite += SPA_OLD_MAXBLOCKSIZE;
@@ -1618,7 +1618,7 @@ dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize)
 
 	dmu_tx_sa_registration_hold(sa, tx);
 
-	if (attrsize <= DN_MAX_BONUSLEN && !sa->sa_force_spill)
+	if (attrsize <= DN_OLD_MAX_BONUSLEN && !sa->sa_force_spill)
 		return;
 
 	(void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT,
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 38bcecd46..975bd5fb8 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -248,6 +248,7 @@ dnode_verify(dnode_t *dn)
 	}
 	if (dn->dn_phys->dn_type != DMU_OT_NONE || dn->dn_allocated_txg != 0) {
 		int i;
+		int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
 		ASSERT3U(dn->dn_indblkshift, <=, SPA_MAXBLOCKSHIFT);
 		if (dn->dn_datablkshift) {
 			ASSERT3U(dn->dn_datablkshift, >=, SPA_MINBLOCKSHIFT);
@@ -258,12 +259,12 @@ dnode_verify(dnode_t *dn)
 		ASSERT(DMU_OT_IS_VALID(dn->dn_type));
 		ASSERT3U(dn->dn_nblkptr, >=, 1);
 		ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
-		ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
+		ASSERT3U(dn->dn_bonuslen, <=, max_bonuslen);
 		ASSERT3U(dn->dn_datablksz, ==,
 		    dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
 		ASSERT3U(ISP2(dn->dn_datablksz), ==, dn->dn_datablkshift != 0);
 		ASSERT3U((dn->dn_nblkptr - 1) * sizeof (blkptr_t) +
-		    dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
+		    dn->dn_bonuslen, <=, max_bonuslen);
 		for (i = 0; i < TXG_SIZE; i++) {
 			ASSERT3U(dn->dn_next_nlevels[i], <=, dn->dn_nlevels);
 		}
@@ -294,6 +295,7 @@ dnode_byteswap(dnode_phys_t *dnp)
 
 	dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec);
 	dnp->dn_bonuslen = BSWAP_16(dnp->dn_bonuslen);
+	dnp->dn_extra_slots = BSWAP_8(dnp->dn_extra_slots);
 	dnp->dn_maxblkid = BSWAP_64(dnp->dn_maxblkid);
 	dnp->dn_used = BSWAP_64(dnp->dn_used);
 
@@ -320,7 +322,8 @@ dnode_byteswap(dnode_phys_t *dnp)
 		 * dnode buffer).
 		 */
 		int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
-		size_t len = DN_MAX_BONUSLEN - off;
+		int slots = dnp->dn_extra_slots + 1;
+		size_t len = DN_SLOTS_TO_BONUSLEN(slots) - off;
 		dmu_object_byteswap_t byteswap;
 		ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype));
 		byteswap = DMU_OT_BYTESWAP(dnp->dn_bonustype);
@@ -329,23 +332,24 @@ dnode_byteswap(dnode_phys_t *dnp)
 
 	/* Swap SPILL block if we have one */
 	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
-		byteswap_uint64_array(&dnp->dn_spill, sizeof (blkptr_t));
-
+		byteswap_uint64_array(DN_SPILL_BLKPTR(dnp), sizeof (blkptr_t));
 }
 
 void
 dnode_buf_byteswap(void *vbuf, size_t size)
 {
-	dnode_phys_t *buf = vbuf;
-	int i;
+	int i = 0;
 
 	ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT));
 	ASSERT((size & (sizeof (dnode_phys_t)-1)) == 0);
 
-	size >>= DNODE_SHIFT;
-	for (i = 0; i < size; i++) {
-		dnode_byteswap(buf);
-		buf++;
+	while (i < size) {
+		dnode_phys_t *dnp = vbuf + i;
+		dnode_byteswap(dnp);
+
+		i += DNODE_MIN_SIZE;
+		if (dnp->dn_type != DMU_OT_NONE)
+			i += dnp->dn_extra_slots * DNODE_MIN_SIZE;
 	}
 }
 
@@ -356,7 +360,7 @@ dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx)
 
 	dnode_setdirty(dn, tx);
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
-	ASSERT3U(newsize, <=, DN_MAX_BONUSLEN -
+	ASSERT3U(newsize, <=, DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots) -
 	    (dn->dn_nblkptr-1) * sizeof (blkptr_t));
 	dn->dn_bonuslen = newsize;
 	if (newsize == 0)
@@ -434,6 +438,7 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
 	dn->dn_compress = dnp->dn_compress;
 	dn->dn_bonustype = dnp->dn_bonustype;
 	dn->dn_bonuslen = dnp->dn_bonuslen;
+	dn->dn_num_slots = dnp->dn_extra_slots + 1;
 	dn->dn_maxblkid = dnp->dn_maxblkid;
 	dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
 	dn->dn_id_flags = 0;
@@ -534,10 +539,13 @@ dnode_destroy(dnode_t *dn)
 
 void
 dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
-    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+    dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx)
 {
 	int i;
 
+	ASSERT3U(dn_slots, >, 0);
+	ASSERT3U(dn_slots << DNODE_SHIFT, <=,
+	    spa_maxdnodesize(dmu_objset_spa(dn->dn_objset)));
 	ASSERT3U(blocksize, <=,
 	    spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
 	if (blocksize == 0)
@@ -550,8 +558,8 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
 
 	ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT);
 
-	dprintf("os=%p obj=%llu txg=%llu blocksize=%d ibs=%d\n", dn->dn_objset,
-	    dn->dn_object, tx->tx_txg, blocksize, ibs);
+	dprintf("os=%p obj=%llu txg=%llu blocksize=%d ibs=%d dn_slots=%d\n",
+	    dn->dn_objset, dn->dn_object, tx->tx_txg, blocksize, ibs, dn_slots);
 
 	ASSERT(dn->dn_type == DMU_OT_NONE);
 	ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
@@ -562,7 +570,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
 	    (bonustype == DMU_OT_SA && bonuslen == 0) ||
 	    (bonustype != DMU_OT_NONE && bonuslen != 0));
 	ASSERT(DMU_OT_IS_VALID(bonustype));
-	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
+	ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots));
 	ASSERT(dn->dn_type == DMU_OT_NONE);
 	ASSERT0(dn->dn_maxblkid);
 	ASSERT0(dn->dn_allocated_txg);
@@ -588,11 +596,15 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
 	dnode_setdblksz(dn, blocksize);
 	dn->dn_indblkshift = ibs;
 	dn->dn_nlevels = 1;
+	dn->dn_num_slots = dn_slots;
 	if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
 		dn->dn_nblkptr = 1;
-	else
-		dn->dn_nblkptr = 1 +
-		    ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
+	else {
+		dn->dn_nblkptr = MIN(DN_MAX_NBLKPTR,
+		    1 + ((DN_SLOTS_TO_BONUSLEN(dn_slots) - bonuslen) >>
+		    SPA_BLKPTRSHIFT));
+	}
+
 	dn->dn_bonustype = bonustype;
 	dn->dn_bonuslen = bonuslen;
 	dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
@@ -617,7 +629,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
 
 void
 dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
-    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+    dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx)
 {
 	int nblkptr;
 
@@ -631,7 +643,10 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
 	    (bonustype != DMU_OT_NONE && bonuslen != 0) ||
 	    (bonustype == DMU_OT_SA && bonuslen == 0));
 	ASSERT(DMU_OT_IS_VALID(bonustype));
-	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
+	ASSERT3U(bonuslen, <=,
+		DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset))));
+
+	dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS;
 
 	/* clean up any unreferenced dbufs */
 	dnode_evict_dbufs(dn);
@@ -654,7 +669,9 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
 	if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
 		nblkptr = 1;
 	else
-		nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
+		nblkptr = MIN(DN_MAX_NBLKPTR,
+		    1 + ((DN_SLOTS_TO_BONUSLEN(dn_slots) - bonuslen) >>
+		    SPA_BLKPTRSHIFT));
 	if (dn->dn_bonustype != bonustype)
 		dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype;
 	if (dn->dn_nblkptr != nblkptr)
@@ -672,6 +689,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
 	mutex_enter(&dn->dn_mtx);
 	dn->dn_bonustype = bonustype;
 	dn->dn_bonuslen = bonuslen;
+	dn->dn_num_slots = dn_slots;
 	dn->dn_nblkptr = nblkptr;
 	dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
 	dn->dn_compress = ZIO_COMPRESS_INHERIT;
@@ -680,7 +698,8 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
 	/* fix up the bonus db_size */
 	if (dn->dn_bonus) {
 		dn->dn_bonus->db.db_size =
-		    DN_MAX_BONUSLEN - (dn->dn_nblkptr-1) * sizeof (blkptr_t);
+		    DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots) -
+		    (dn->dn_nblkptr-1) * sizeof (blkptr_t);
 		ASSERT(dn->dn_bonuslen <= dn->dn_bonus->db.db_size);
 	}
 
@@ -1053,24 +1072,150 @@ dnode_buf_pageout(void *dbu)
 }
 
 /*
+ * Return true if the given index is interior to a dnode already
+ * allocated in the block. That is, the index is neither free nor
+ * allocated, but is consumed by a large dnode.
+ *
+ * The dnode_phys_t buffer may not be in sync with the in-core dnode
+ * structure, so we try to check the dnode structure first and fall back
+ * to the dnode_phys_t buffer it doesn't exist.
+ */
+static boolean_t
+dnode_is_consumed(dmu_buf_impl_t *db, int idx)
+{
+	dnode_handle_t *dnh;
+	dmu_object_type_t ot;
+	dnode_children_t *children_dnodes;
+	dnode_phys_t *dn_block;
+	int skip;
+	int i;
+
+	children_dnodes = dmu_buf_get_user(&db->db);
+	dn_block = (dnode_phys_t *)db->db.db_data;
+
+	for (i = 0; i < idx; i += skip) {
+		dnh = &children_dnodes->dnc_children[i];
+
+		zrl_add(&dnh->dnh_zrlock);
+		if (dnh->dnh_dnode != NULL) {
+			ot = dnh->dnh_dnode->dn_type;
+			skip = dnh->dnh_dnode->dn_num_slots;
+		} else {
+			ot = dn_block[i].dn_type;
+			skip = dn_block[i].dn_extra_slots + 1;
+		}
+		zrl_remove(&dnh->dnh_zrlock);
+
+		if (ot == DMU_OT_NONE)
+			skip = 1;
+	}
+
+	return (i > idx);
+}
+
+/*
+ * Return true if the given index in the dnode block is a valid
+ * allocated dnode. That is, the index is not consumed by a large
+ * dnode and is not free.
+ *
+ * The dnode_phys_t buffer may not be in sync with the in-core dnode
+ * structure, so we try to check the dnode structure first and fall back
+ * to the dnode_phys_t buffer it doesn't exist.
+ */
+static boolean_t
+dnode_is_allocated(dmu_buf_impl_t *db, int idx)
+{
+	dnode_handle_t *dnh;
+	dmu_object_type_t ot;
+	dnode_children_t *children_dnodes;
+	dnode_phys_t *dn_block;
+
+	if (dnode_is_consumed(db, idx))
+		return (B_FALSE);
+
+	children_dnodes = dmu_buf_get_user(&db->db);
+	dn_block = (dnode_phys_t *)db->db.db_data;
+
+	dnh = &children_dnodes->dnc_children[idx];
+
+	zrl_add(&dnh->dnh_zrlock);
+	if (dnh->dnh_dnode != NULL)
+		ot = dnh->dnh_dnode->dn_type;
+	else
+		ot = dn_block[idx].dn_type;
+	zrl_remove(&dnh->dnh_zrlock);
+
+	return (ot != DMU_OT_NONE);
+}
+
+/*
+ * Return true if the given range of indices in the dnode block are
+ * free. That is, the starting index is not consumed by a large dnode
+ * and none of the indices are allocated.
+ *
+ * The dnode_phys_t buffer may not be in sync with the in-core dnode
+ * structure, so we try to check the dnode structure first and fall back
+ * to the dnode_phys_t buffer it doesn't exist.
+ */
+static boolean_t
+dnode_is_free(dmu_buf_impl_t *db, int idx, int slots)
+{
+	dnode_handle_t *dnh;
+	dmu_object_type_t ot;
+	dnode_children_t *children_dnodes;
+	dnode_phys_t *dn_block;
+	int i;
+
+	if (idx + slots > DNODES_PER_BLOCK)
+		return (B_FALSE);
+
+	children_dnodes = dmu_buf_get_user(&db->db);
+	dn_block = (dnode_phys_t *)db->db.db_data;
+
+	if (dnode_is_consumed(db, idx))
+		return (B_FALSE);
+
+	for (i = idx; i < idx + slots; i++) {
+		dnh = &children_dnodes->dnc_children[i];
+
+		zrl_add(&dnh->dnh_zrlock);
+		if (dnh->dnh_dnode != NULL)
+			ot = dnh->dnh_dnode->dn_type;
+		else
+			ot = dn_block[i].dn_type;
+		zrl_remove(&dnh->dnh_zrlock);
+
+		if (ot != DMU_OT_NONE)
+			return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+/*
  * errors:
  * EINVAL - invalid object number.
+ * ENOSPC - hole too small to fulfill "slots" request
  * EIO - i/o error.
  * succeeds even for free dnodes.
  */
 int
-dnode_hold_impl(objset_t *os, uint64_t object, int flag,
+dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
     void *tag, dnode_t **dnp)
 {
-	int epb, idx, err;
+	int epb, idx, err, i;
 	int drop_struct_lock = FALSE;
 	int type;
 	uint64_t blk;
 	dnode_t *mdn, *dn;
 	dmu_buf_impl_t *db;
 	dnode_children_t *children_dnodes;
+	dnode_phys_t *dn_block_begin;
 	dnode_handle_t *dnh;
 
+	ASSERT(!(flag & DNODE_MUST_BE_ALLOCATED) || (slots == 0));
+	ASSERT(!(flag & DNODE_MUST_BE_FREE) || (slots > 0));
+
 	/*
 	 * If you are holding the spa config lock as writer, you shouldn't
 	 * be asking the DMU to do *anything* unless it's the root pool
@@ -1126,12 +1271,9 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
 	ASSERT3U(db->db.db_size, >=, 1<<DNODE_SHIFT);
 	epb = db->db.db_size >> DNODE_SHIFT;
 
-	idx = object & (epb-1);
-
 	ASSERT(DB_DNODE(db)->dn_type == DMU_OT_DNODE);
 	children_dnodes = dmu_buf_get_user(&db->db);
 	if (children_dnodes == NULL) {
-		int i;
 		dnode_children_t *winner;
 		children_dnodes = kmem_zalloc(sizeof (dnode_children_t) +
 		    epb * sizeof (dnode_handle_t), KM_SLEEP);
@@ -1156,21 +1298,28 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
 	}
 	ASSERT(children_dnodes->dnc_count == epb);
 
+	idx = object & (epb - 1);
+	dn_block_begin = (dnode_phys_t *)db->db.db_data;
+
+	if ((flag & DNODE_MUST_BE_FREE) && !dnode_is_free(db, idx, slots)) {
+		dbuf_rele(db, FTAG);
+		return (ENOSPC);
+	} else if ((flag & DNODE_MUST_BE_ALLOCATED) &&
+	    !dnode_is_allocated(db, idx)) {
+		dbuf_rele(db, FTAG);
+		return (ENOENT);
+	}
+
 	dnh = &children_dnodes->dnc_children[idx];
 	zrl_add(&dnh->dnh_zrlock);
 	dn = dnh->dnh_dnode;
-	if (dn == NULL) {
-		dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
-
-		dn = dnode_create(os, phys, db, object, dnh);
-	}
+	if (dn == NULL)
+		dn = dnode_create(os, dn_block_begin + idx, db, object, dnh);
 
 	mutex_enter(&dn->dn_mtx);
 	type = dn->dn_type;
 	if (dn->dn_free_txg ||
-	    ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
-	    ((flag & DNODE_MUST_BE_FREE) &&
-	    (type != DMU_OT_NONE || !refcount_is_zero(&dn->dn_holds)))) {
+	    ((flag & DNODE_MUST_BE_FREE) && !refcount_is_zero(&dn->dn_holds))) {
 		mutex_exit(&dn->dn_mtx);
 		zrl_remove(&dnh->dnh_zrlock);
 		dbuf_rele(db, FTAG);
@@ -1198,7 +1347,8 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
 int
 dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
 {
-	return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
+	return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0, tag,
+	    dnp));
 }
 
 /*
@@ -1908,17 +2058,21 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
 		error = SET_ERROR(ESRCH);
 	} else if (lvl == 0) {
 		dnode_phys_t *dnp = data;
-		span = DNODE_SHIFT;
+
 		ASSERT(dn->dn_type == DMU_OT_DNODE);
+		ASSERT(!(flags & DNODE_FIND_BACKWARDS));
 
-		for (i = (*offset >> span) & (blkfill - 1);
-		    i >= 0 && i < blkfill; i += inc) {
+		for (i = (*offset >> DNODE_SHIFT) & (blkfill - 1);
+		    i < blkfill; i += dnp[i].dn_extra_slots + 1) {
 			if ((dnp[i].dn_type == DMU_OT_NONE) == hole)
 				break;
-			*offset += (1ULL << span) * inc;
 		}
-		if (i < 0 || i == blkfill)
+
+		if (i == blkfill)
 			error = SET_ERROR(ESRCH);
+
+		*offset = (*offset & ~(DNODE_BLOCK_SIZE - 1)) +
+		    (i << DNODE_SHIFT);
 	} else {
 		blkptr_t *bp = data;
 		uint64_t start = *offset;
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
index bea7be186..54066e2e3 100644
--- a/module/zfs/dnode_sync.c
+++ b/module/zfs/dnode_sync.c
@@ -524,7 +524,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
 	ASSERT(dn->dn_free_txg > 0);
 	if (dn->dn_allocated_txg != dn->dn_free_txg)
 		dmu_buf_will_dirty(&dn->dn_dbuf->db, tx);
-	bzero(dn->dn_phys, sizeof (dnode_phys_t));
+	bzero(dn->dn_phys, sizeof (dnode_phys_t) * dn->dn_num_slots);
 
 	mutex_enter(&dn->dn_mtx);
 	dn->dn_type = DMU_OT_NONE;
@@ -559,7 +559,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
 	ASSERT(dnp->dn_type != DMU_OT_NONE ||
-	    bcmp(dnp, &zerodn, DNODE_SIZE) == 0);
+	    bcmp(dnp, &zerodn, DNODE_MIN_SIZE) == 0);
 	DNODE_VERIFY(dn);
 
 	ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
@@ -591,6 +591,9 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 		dnp->dn_bonustype = dn->dn_bonustype;
 		dnp->dn_bonuslen = dn->dn_bonuslen;
 	}
+
+	dnp->dn_extra_slots = dn->dn_num_slots - 1;
+
 	ASSERT(dnp->dn_nlevels > 1 ||
 	    BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
 	    BP_IS_EMBEDDED(&dnp->dn_blkptr[0]) ||
@@ -623,7 +626,8 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 			dnp->dn_bonuslen = 0;
 		else
 			dnp->dn_bonuslen = dn->dn_next_bonuslen[txgoff];
-		ASSERT(dnp->dn_bonuslen <= DN_MAX_BONUSLEN);
+		ASSERT(dnp->dn_bonuslen <=
+		    DN_SLOTS_TO_BONUSLEN(dnp->dn_extra_slots + 1));
 		dn->dn_next_bonuslen[txgoff] = 0;
 	}
 
@@ -662,7 +666,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 	mutex_exit(&dn->dn_mtx);
 
 	if (kill_spill) {
-		free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
+		free_blocks(dn, DN_SPILL_BLKPTR(dn->dn_phys), 1, tx);
 		mutex_enter(&dn->dn_mtx);
 		dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
 		mutex_exit(&dn->dn_mtx);
@@ -687,6 +691,14 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 		return;
 	}
 
+	if (dn->dn_num_slots > DNODE_MIN_SLOTS) {
+		dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
+		mutex_enter(&ds->ds_lock);
+		ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_DNODE] =
+		    B_TRUE;
+		mutex_exit(&ds->ds_lock);
+	}
+
 	if (dn->dn_next_nlevels[txgoff]) {
 		dnode_increase_indirection(dn, tx);
 		dn->dn_next_nlevels[txgoff] = 0;
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index b5e272fb9..72163521e 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -709,14 +709,18 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
 			scn->scn_phys.scn_errors++;
 			return (err);
 		}
-		for (i = 0, cdnp = buf->b_data; i < epb; i++, cdnp++) {
+		for (i = 0, cdnp = buf->b_data; i < epb;
+		    i += cdnp->dn_extra_slots + 1,
+		    cdnp += cdnp->dn_extra_slots + 1) {
 			for (j = 0; j < cdnp->dn_nblkptr; j++) {
 				blkptr_t *cbp = &cdnp->dn_blkptr[j];
 				dsl_scan_prefetch(scn, buf, cbp,
 				    zb->zb_objset, zb->zb_blkid * epb + i, j);
 			}
 		}
-		for (i = 0, cdnp = buf->b_data; i < epb; i++, cdnp++) {
+		for (i = 0, cdnp = buf->b_data; i < epb;
+		    i += cdnp->dn_extra_slots + 1,
+		    cdnp += cdnp->dn_extra_slots + 1) {
 			dsl_scan_visitdnode(scn, ds, ostype,
 			    cdnp, zb->zb_blkid * epb + i, tx);
 		}
@@ -779,7 +783,7 @@ dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds,
 		zbookmark_phys_t czb;
 		SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object,
 		    0, DMU_SPILL_BLKID);
-		dsl_scan_visitbp(&dnp->dn_spill,
+		dsl_scan_visitbp(DN_SPILL_BLKPTR(dnp),
 		    &czb, dnp, ds, scn, ostype, tx);
 	}
 }
diff --git a/module/zfs/sa.c b/module/zfs/sa.c
index d6ac5fcc7..adc301512 100644
--- a/module/zfs/sa.c
+++ b/module/zfs/sa.c
@@ -33,6 +33,7 @@
 #include <sys/dmu.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_objset.h>
+#include <sys/dmu_tx.h>
 #include <sys/dbuf.h>
 #include <sys/dnode.h>
 #include <sys/zap.h>
@@ -553,12 +554,11 @@ sa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen)
  */
 static int
 sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
-    dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total,
-    boolean_t *will_spill)
+    dmu_buf_t *db, sa_buf_type_t buftype, int full_space, int *index,
+    int *total, boolean_t *will_spill)
 {
 	int var_size_count = 0;
 	int i;
-	int full_space;
 	int hdrsize;
 	int extra_hdrsize;
 
@@ -577,7 +577,6 @@ sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
 	hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 :
 	    sizeof (sa_hdr_phys_t);
 
-	full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size;
 	ASSERT(IS_P2ALIGNED(full_space, 8));
 
 	for (i = 0; i != attr_count; i++) {
@@ -668,6 +667,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
 	void *data_start;
 	sa_attr_type_t *attrs, *attrs_start;
 	int i, lot_count;
+	int dnodesize;
 	int spill_idx;
 	int hdrsize;
 	int spillhdrsize = 0;
@@ -676,20 +676,23 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
 	sa_lot_t *lot;
 	int len_idx;
 	int spill_used;
+	int bonuslen;
 	boolean_t spilling;
 
 	dmu_buf_will_dirty(hdl->sa_bonus, tx);
 	bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus);
+	dmu_object_dnsize_from_db(hdl->sa_bonus, &dnodesize);
+	bonuslen = DN_BONUS_SIZE(dnodesize);
 
 	/* first determine bonus header size and sum of all attributes */
 	hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
-	    SA_BONUS, &spill_idx, &used, &spilling);
+	    SA_BONUS, bonuslen, &spill_idx, &used, &spilling);
 
 	if (used > SPA_OLD_MAXBLOCKSIZE)
 		return (SET_ERROR(EFBIG));
 
-	VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
-	    MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) :
+	VERIFY0(dmu_set_bonus(hdl->sa_bonus, spilling ?
+	    MIN(bonuslen - sizeof (blkptr_t), used + hdrsize) :
 	    used + hdrsize, tx));
 
 	ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) ||
@@ -706,8 +709,8 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
 		dmu_buf_will_dirty(hdl->sa_spill, tx);
 
 		spillhdrsize = sa_find_sizes(sa, &attr_desc[spill_idx],
-		    attr_count - spill_idx, hdl->sa_spill, SA_SPILL, &i,
-		    &spill_used, &dummy);
+		    attr_count - spill_idx, hdl->sa_spill, SA_SPILL,
+		    hdl->sa_spill->db_size, &i, &spill_used, &dummy);
 
 		if (spill_used > SPA_OLD_MAXBLOCKSIZE)
 			return (SET_ERROR(EFBIG));
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index c23fd7a3a..d1aefe585 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -281,6 +281,14 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
 		    SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE);
 	}
 
+	if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE)) {
+		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL,
+		    DNODE_MAX_SIZE, ZPROP_SRC_NONE);
+	} else {
+		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL,
+		    DNODE_MIN_SIZE, ZPROP_SRC_NONE);
+	}
+
 	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
 		if (dp->scd_path == NULL) {
 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
@@ -512,7 +520,8 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
 				/*
 				 * Must be ZPL, and its property settings
 				 * must be supported by GRUB (compression
-				 * is not gzip, and large blocks are not used).
+				 * is not gzip, and large blocks or large
+				 * dnodes are not used).
 				 */
 
 				if (dmu_objset_type(os) != DMU_OST_ZFS) {
@@ -529,6 +538,12 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
 				    &propval)) == 0 &&
 				    propval > SPA_OLD_MAXBLOCKSIZE) {
 					error = SET_ERROR(ENOTSUP);
+				} else if ((error =
+				    dsl_prop_get_int_ds(dmu_objset_ds(os),
+				    zfs_prop_to_name(ZFS_PROP_DNODESIZE),
+				    &propval)) == 0 &&
+				    propval != ZFS_DNSIZE_LEGACY) {
+					error = SET_ERROR(ENOTSUP);
 				} else {
 					objnum = dmu_objset_id(os);
 				}
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index e3e7e36fe..d1303b5c2 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -2000,6 +2000,15 @@ spa_maxblocksize(spa_t *spa)
 		return (SPA_OLD_MAXBLOCKSIZE);
 }
 
+int
+spa_maxdnodesize(spa_t *spa)
+{
+	if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE))
+		return (DNODE_MAX_SIZE);
+	else
+		return (DNODE_MIN_SIZE);
+}
+
 #if defined(_KERNEL) && defined(HAVE_SPL)
 /* Namespace manipulation */
 EXPORT_SYMBOL(spa_lookup);
@@ -2056,6 +2065,7 @@ EXPORT_SYMBOL(spa_bootfs);
 EXPORT_SYMBOL(spa_delegation);
 EXPORT_SYMBOL(spa_meta_objset);
 EXPORT_SYMBOL(spa_maxblocksize);
+EXPORT_SYMBOL(spa_maxdnodesize);
 
 /* Miscellaneous support routines */
 EXPORT_SYMBOL(spa_rename);
diff --git a/module/zfs/zap.c b/module/zfs/zap.c
index 454b4be62..9e4f05049 100644
--- a/module/zfs/zap.c
+++ b/module/zfs/zap.c
@@ -968,9 +968,17 @@ uint64_t
 zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
     const char *name, dmu_tx_t *tx)
 {
+	return (zap_create_link_dnsize(os, ot, parent_obj, name, 0, tx));
+}
+
+uint64_t
+zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
+    const char *name, int dnodesize, dmu_tx_t *tx)
+{
 	uint64_t new_obj;
 
-	VERIFY((new_obj = zap_create(os, ot, DMU_OT_NONE, 0, tx)) > 0);
+	VERIFY((new_obj = zap_create_dnsize(os, ot, DMU_OT_NONE, 0,
+	    dnodesize, tx)) > 0);
 	VERIFY0(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj,
 	    tx));
 
diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c
index 3faf27ce3..f3153cc18 100644
--- a/module/zfs/zap_micro.c
+++ b/module/zfs/zap_micro.c
@@ -630,8 +630,16 @@ int
 zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
-	return (zap_create_claim_norm(os, obj,
-	    0, ot, bonustype, bonuslen, tx));
+	return (zap_create_claim_dnsize(os, obj, ot, bonustype, bonuslen,
+	    0, tx));
+}
+
+int
+zap_create_claim_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
+	return (zap_create_claim_norm_dnsize(os, obj,
+	    0, ot, bonustype, bonuslen, dnodesize, tx));
 }
 
 int
@@ -639,9 +647,19 @@ zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
     dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
+	return (zap_create_claim_norm_dnsize(os, obj, normflags, ot, bonustype,
+	    bonuslen, 0, tx));
+}
+
+int
+zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj, int normflags,
+    dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,
+    int dnodesize, dmu_tx_t *tx)
+{
 	int err;
 
-	err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx);
+	err = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen,
+	    dnodesize, tx);
 	if (err != 0)
 		return (err);
 	mzap_create_impl(os, obj, normflags, 0, tx);
@@ -656,10 +674,27 @@ zap_create(objset_t *os, dmu_object_type_t ot,
 }
 
 uint64_t
+zap_create_dnsize(objset_t *os, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
+	return (zap_create_norm_dnsize(os, 0, ot, bonustype, bonuslen,
+	    dnodesize, tx));
+}
+
+uint64_t
 zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
-	uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
+	return (zap_create_norm_dnsize(os, normflags, ot, bonustype, bonuslen,
+	    0, tx));
+}
+
+uint64_t
+zap_create_norm_dnsize(objset_t *os, int normflags, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
+	uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen,
+	    dnodesize, tx);
 
 	mzap_create_impl(os, obj, normflags, 0, tx);
 	return (obj);
@@ -670,7 +705,17 @@ zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
-	uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
+	return (zap_create_flags_dnsize(os, normflags, flags, ot,
+	    leaf_blockshift, indirect_blockshift, bonustype, bonuslen, 0, tx));
+}
+
+uint64_t
+zap_create_flags_dnsize(objset_t *os, int normflags, zap_flags_t flags,
+    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
+	uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen,
+	    dnodesize, tx);
 
 	ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT &&
 	    leaf_blockshift <= SPA_OLD_MAXBLOCKSHIFT &&
@@ -1458,10 +1503,14 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(zap_create);
+EXPORT_SYMBOL(zap_create_dnsize);
 EXPORT_SYMBOL(zap_create_norm);
+EXPORT_SYMBOL(zap_create_norm_dnsize);
 EXPORT_SYMBOL(zap_create_flags);
+EXPORT_SYMBOL(zap_create_flags_dnsize);
 EXPORT_SYMBOL(zap_create_claim);
 EXPORT_SYMBOL(zap_create_claim_norm);
+EXPORT_SYMBOL(zap_create_claim_norm_dnsize);
 EXPORT_SYMBOL(zap_destroy);
 EXPORT_SYMBOL(zap_lookup);
 EXPORT_SYMBOL(zap_lookup_norm);
diff --git a/module/zfs/zfeature_common.c b/module/zfs/zfeature_common.c
index f57e5489c..3264f6235 100644
--- a/module/zfs/zfeature_common.c
+++ b/module/zfs/zfeature_common.c
@@ -242,4 +242,15 @@ zpool_feature_init(void)
 	    "Support for blocks larger than 128KB.",
 	    ZFEATURE_FLAG_PER_DATASET, large_blocks_deps);
 	}
+
+	{
+	static const spa_feature_t large_dnode_deps[] = {
+		SPA_FEATURE_EXTENSIBLE_DATASET,
+		SPA_FEATURE_NONE
+	};
+	zfeature_register(SPA_FEATURE_LARGE_DNODE,
+	    "org.zfsonlinux:large_dnode", "large_dnode",
+	    "Variable on-disk size of dnodes.",
+	    ZFEATURE_FLAG_PER_DATASET, large_dnode_deps);
+	}
 }
diff --git a/module/zfs/zfs_acl.c b/module/zfs/zfs_acl.c
index 961083d4a..f820cdfd6 100644
--- a/module/zfs/zfs_acl.c
+++ b/module/zfs/zfs_acl.c
@@ -1394,7 +1394,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
 				    otype == DMU_OT_ACL ?
 				    DMU_OT_SYSACL : DMU_OT_NONE,
 				    otype == DMU_OT_ACL ?
-				    DN_MAX_BONUSLEN : 0, tx);
+				    DN_OLD_MAX_BONUSLEN : 0, tx);
 			} else {
 				(void) dmu_object_set_blocksize(zsb->z_os,
 				    aoid, aclp->z_acl_bytes, 0, tx);
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index c63af167a..30338ac14 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -3785,7 +3785,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 
 			/*
 			 * If this is a bootable dataset then
-			 * the we don't allow large (>128K) blocks,
+			 * we don't allow large (>128K) blocks,
 			 * because GRUB doesn't support them.
 			 */
 			if (zfs_is_bootfs(dsname) &&
@@ -3813,6 +3813,34 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 		}
 		break;
 
+	case ZFS_PROP_DNODESIZE:
+		/* Dnode sizes above 512 need the feature to be enabled */
+		if (nvpair_value_uint64(pair, &intval) == 0 &&
+		    intval != ZFS_DNSIZE_LEGACY) {
+			spa_t *spa;
+
+			/*
+			 * If this is a bootable dataset then
+			 * we don't allow large (>512B) dnodes,
+			 * because GRUB doesn't support them.
+			 */
+			if (zfs_is_bootfs(dsname) &&
+				intval != ZFS_DNSIZE_LEGACY) {
+				return (SET_ERROR(EDOM));
+			}
+
+			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+				return (err);
+
+			if (!spa_feature_is_enabled(spa,
+			    SPA_FEATURE_LARGE_DNODE)) {
+				spa_close(spa, FTAG);
+				return (SET_ERROR(ENOTSUP));
+			}
+			spa_close(spa, FTAG);
+		}
+		break;
+
 	case ZFS_PROP_SHARESMB:
 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
 			return (SET_ERROR(ENOTSUP));
diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c
index 38d8de0eb..4d89cb04b 100644
--- a/module/zfs/zfs_log.c
+++ b/module/zfs/zfs_log.c
@@ -279,6 +279,8 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
 	lr = (lr_create_t *)&itx->itx_lr;
 	lr->lr_doid = dzp->z_id;
 	lr->lr_foid = zp->z_id;
+	/* Store dnode slot count in 8 bits above object id. */
+	LR_FOID_SET_SLOTS(lr->lr_foid, zp->z_dnodesize >> DNODE_SHIFT);
 	lr->lr_mode = zp->z_mode;
 	if (!IS_EPHEMERAL(zp->z_uid)) {
 		lr->lr_uid = (uint64_t)zp->z_uid;
diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c
index b97a60ed8..54c175437 100644
--- a/module/zfs/zfs_replay.c
+++ b/module/zfs/zfs_replay.c
@@ -279,6 +279,8 @@ zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
 	void *fuidstart;
 	size_t xvatlen = 0;
 	uint64_t txtype;
+	uint64_t objid;
+	uint64_t dnodesize;
 	int error;
 
 	txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
@@ -304,19 +306,24 @@ zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
 	if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
 		return (error);
 
+	objid = LR_FOID_GET_OBJ(lr->lr_foid);
+	dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT;
+
 	xva_init(&xva);
 	zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
-	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
+	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid);
 
 	/*
 	 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
 	 * eventually end up in zfs_mknode(), which assigns the object's
-	 * creation time and generation number.  The generic zfs_create()
-	 * doesn't have either concept, so we smuggle the values inside
-	 * the vattr's otherwise unused va_ctime and va_nblocks fields.
+	 * creation time, generation number, and dnode size. The generic
+	 * zfs_create() has no concept of these attributes, so we smuggle
+	 * the values inside the vattr's otherwise unused va_ctime,
+	 * va_nblocks, and va_fsid fields.
 	 */
 	ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
 	xva.xva_vattr.va_nblocks = lr->lr_gen;
+	xva.xva_vattr.va_fsid = dnodesize;
 
 	error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL);
 	if (error != ENOENT)
@@ -418,6 +425,8 @@ zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
 	void *start;
 	size_t xvatlen;
 	uint64_t txtype;
+	uint64_t objid;
+	uint64_t dnodesize;
 	int error;
 
 	txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
@@ -431,21 +440,26 @@ zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
 	if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
 		return (error);
 
+	objid = LR_FOID_GET_OBJ(lr->lr_foid);
+	dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT;
+
 	xva_init(&xva);
 	zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
-	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
+	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid);
 
 	/*
 	 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
 	 * eventually end up in zfs_mknode(), which assigns the object's
-	 * creation time and generation number.  The generic zfs_create()
-	 * doesn't have either concept, so we smuggle the values inside
-	 * the vattr's otherwise unused va_ctime and va_nblocks fields.
+	 * creation time, generation number, and dnode slot count. The
+	 * generic zfs_create() has no concept of these attributes, so
+	 * we smuggle the values inside * the vattr's otherwise unused
+	 * va_ctime, va_nblocks, and va_nlink fields.
 	 */
 	ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
 	xva.xva_vattr.va_nblocks = lr->lr_gen;
+	xva.xva_vattr.va_fsid = dnodesize;
 
-	error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL);
+	error = dmu_object_info(zsb->z_os, objid, NULL);
 	if (error != ENOENT)
 		goto out;
 
diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c
index f4841435b..f3eac51f8 100644
--- a/module/zfs/zfs_sa.c
+++ b/module/zfs/zfs_sa.c
@@ -97,8 +97,7 @@ zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx)
 	dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
 
 	if (ZFS_OLD_ZNODE_PHYS_SIZE + len <= dmu_bonus_max()) {
-		VERIFY(dmu_set_bonus(db,
-		    len + ZFS_OLD_ZNODE_PHYS_SIZE, tx) == 0);
+		VERIFY0(dmu_set_bonus(db, len + ZFS_OLD_ZNODE_PHYS_SIZE, tx));
 		if (len) {
 			bcopy(link, (caddr_t)db->db_data +
 			    ZFS_OLD_ZNODE_PHYS_SIZE, len);
@@ -107,8 +106,8 @@ zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx)
 		dmu_buf_t *dbp;
 
 		zfs_grow_blocksize(zp, len, tx);
-		VERIFY(0 == dmu_buf_hold(ZTOZSB(zp)->z_os,
-		    zp->z_id, 0, FTAG, &dbp, DMU_READ_NO_PREFETCH));
+		VERIFY0(dmu_buf_hold(ZTOZSB(zp)->z_os, zp->z_id, 0, FTAG, &dbp,
+		    DMU_READ_NO_PREFETCH));
 
 		dmu_buf_will_dirty(dbp, tx);
 
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index 19cb414a9..310d4827b 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -62,6 +62,7 @@
 
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
+#include <sys/dmu_tx.h>
 #include <sys/refcount.h>
 #include <sys/stat.h>
 #include <sys/zap.h>
@@ -728,6 +729,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
 	timestruc_t	now;
 	uint64_t	gen, obj;
 	int		bonuslen;
+	int		dnodesize;
 	sa_handle_t	*sa_hdl;
 	dmu_object_type_t obj_type;
 	sa_bulk_attr_t	*sa_attrs;
@@ -739,15 +741,21 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
 		obj = vap->va_nodeid;
 		now = vap->va_ctime;		/* see zfs_replay_create() */
 		gen = vap->va_nblocks;		/* ditto */
+		dnodesize = vap->va_fsid;	/* ditto */
 	} else {
 		obj = 0;
 		gethrestime(&now);
 		gen = dmu_tx_get_txg(tx);
+		dnodesize = dmu_objset_dnodesize(zsb->z_os);
 	}
 
+	if (dnodesize == 0)
+		dnodesize = DNODE_MIN_SIZE;
+
 	obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
+
 	bonuslen = (obj_type == DMU_OT_SA) ?
-	    DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;
+	    DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
 
 	/*
 	 * Create a new DMU object.
@@ -760,23 +768,23 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
 	 */
 	if (S_ISDIR(vap->va_mode)) {
 		if (zsb->z_replay) {
-			VERIFY0(zap_create_claim_norm(zsb->z_os, obj,
+			VERIFY0(zap_create_claim_norm_dnsize(zsb->z_os, obj,
 			    zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
-			    obj_type, bonuslen, tx));
+			    obj_type, bonuslen, dnodesize, tx));
 		} else {
-			obj = zap_create_norm(zsb->z_os,
+			obj = zap_create_norm_dnsize(zsb->z_os,
 			    zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
-			    obj_type, bonuslen, tx);
+			    obj_type, bonuslen, dnodesize, tx);
 		}
 	} else {
 		if (zsb->z_replay) {
-			VERIFY0(dmu_object_claim(zsb->z_os, obj,
+			VERIFY0(dmu_object_claim_dnsize(zsb->z_os, obj,
 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
-			    obj_type, bonuslen, tx));
+			    obj_type, bonuslen, dnodesize, tx));
 		} else {
-			obj = dmu_object_alloc(zsb->z_os,
+			obj = dmu_object_alloc_dnsize(zsb->z_os,
 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
-			    obj_type, bonuslen, tx);
+			    obj_type, bonuslen, dnodesize, tx);
 		}
 	}
 
@@ -948,6 +956,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
 
 	(*zpp)->z_pflags = pflags;
 	(*zpp)->z_mode = mode;
+	(*zpp)->z_dnodesize = dnodesize;
 
 	if (obj_type == DMU_OT_ZNODE ||
 	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
@@ -1767,6 +1776,14 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
 	ASSERT(error == 0);
 
 	/*
+	 * Give dmu_object_alloc() a hint about where to start
+	 * allocating new objects. Otherwise, since the metadnode's
+	 * dnode_phys_t structure isn't initialized yet, dmu_object_next()
+	 * would fail and we'd have to skip to the next dnode block.
+	 */
+	os->os_obj_next = moid + 1;
+
+	/*
 	 * Set starting attributes.
 	 */
 	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 289b23c7f..988ffec29 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -1372,7 +1372,8 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
 		itxg->itxg_sod += itx->itx_sod;
 	} else {
 		avl_tree_t *t = &itxs->i_async_tree;
-		uint64_t foid = ((lr_ooo_t *)&itx->itx_lr)->lr_foid;
+		uint64_t foid =
+		    LR_FOID_GET_OBJ(((lr_ooo_t *)&itx->itx_lr)->lr_foid);
 		itx_async_node_t *ian;
 		avl_index_t where;
 
@@ -1918,7 +1919,8 @@ zil_close(zilog_t *zilog)
 	mutex_exit(&zilog->zl_lock);
 	if (txg)
 		txg_wait_synced(zilog->zl_dmu_pool, txg);
-	ASSERT(!zilog_is_dirty(zilog));
+	if (txg < spa_freeze_txg(zilog->zl_spa))
+		ASSERT(!zilog_is_dirty(zilog));
 
 	taskq_destroy(zilog->zl_clean_taskq);
 	zilog->zl_clean_taskq = NULL;
@@ -2122,7 +2124,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
 	 */
 	if (TX_OOO(txtype)) {
 		error = dmu_object_info(zilog->zl_os,
-		    ((lr_ooo_t *)lr)->lr_foid, NULL);
+		    LR_FOID_GET_OBJ(((lr_ooo_t *)lr)->lr_foid), NULL);
 		if (error == ENOENT || error == EEXIST)
 			return (0);
 	}
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 1f52d2815..003c513dd 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -411,6 +411,11 @@ tests = ['exec_001_pos']
 [tests/functional/features/async_destroy]
 tests = ['async_destroy_001_pos']
 
+[tests/functional/features/large_dnode]
+tests = ['large_dnode_001_pos', 'large_dnode_002_pos', 'large_dnode_003_pos',
+         'large_dnode_004_neg', 'large_dnode_005_pos', 'large_dnode_006_pos',
+         'large_dnode_007_neg']
+
 # DISABLED: needs investigation
 #[tests/functional/grow_pool]
 #tests = ['grow_pool_001_pos']
diff --git a/tests/zfs-tests/include/default.cfg.in b/tests/zfs-tests/include/default.cfg.in
index 9474c489e..13317ea87 100644
--- a/tests/zfs-tests/include/default.cfg.in
+++ b/tests/zfs-tests/include/default.cfg.in
@@ -80,7 +80,7 @@ export READMMAP=${READMMAP:-${helperdir}/readmmap}
 export RENAME_DIR=${RENAME_DIR:-${helperdir}/rename_dir}
 export RM_LNKCNT_ZERO_FILE=${RM_LNKCNT_ZERO_FILE:-${helperdir}/rm_lnkcnt_zero_file}
 export THREADSAPPEND=${THREADSAPPEND:-${helperdir}/threadsappend}
-export XATTRTEST=${XATTRTEST:-${helpdir}/xattrtest}
+export XATTRTEST=${XATTRTEST:-${helperdir}/xattrtest}
 
 # ensure we're running in the C locale, since
 # localised messages may result in test failures
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
index d1d89f1af..f7a1d9cb1 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
@@ -35,7 +35,7 @@ typeset -a properties=("size" "capacity" "altroot" "health" "guid" "version"
     "free" "allocated" "readonly" "comment" "expandsize" "freeing" "failmode"
     "listsnapshots" "autoexpand" "fragmentation" "leaked" "ashift"
     "feature@async_destroy" "feature@empty_bpobj" "feature@lz4_compress"
-    "feature@large_blocks" "feature@filesystem_limits"
+    "feature@large_blocks" "feature@large_dnode" "feature@filesystem_limits"
     "feature@spacemap_histogram" "feature@enabled_txg" "feature@hole_birth"
     "feature@extensible_dataset" "feature@bookmarks" "feature@embedded_data")
 else
diff --git a/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib b/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib
index c90329ce1..796cda929 100644
--- a/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib
+++ b/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib
@@ -250,6 +250,10 @@ function check_fs_perm
 			verify_fs_canmount $user $perm $fs
 			ret=$?
 			;;
+		dnodesize)
+			verify_fs_dnodesize $user $perm $fs
+			ret=$?
+			;;
 		recordsize)
 			verify_fs_recordsize $user $perm $fs
 			ret=$?
@@ -1112,6 +1116,21 @@ function verify_fs_recordsize
 	return 0
 }
 
+function verify_fs_dnodesize
+{
+	typeset user=$1
+	typeset perm=$2
+	typeset fs=$3
+	value="2k"
+
+	user_run $user $ZFS set dnodesize=$value $fs
+	if [[ $value != $(get_prop dnodesize $fs) ]]; then
+		return 1
+	fi
+
+	return 0
+}
+
 function verify_fs_quota
 {
 	typeset user=$1
diff --git a/tests/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh b/tests/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh
index 28b2b1dec..2e3be0cf5 100755
--- a/tests/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh
+++ b/tests/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh
@@ -70,6 +70,7 @@ set -A perms	create		true		false	\
 		allow		true		true	\
 		quota		true		false	\
 		reservation	true		true	\
+		dnodesize	true		false	\
 		recordsize	true		false	\
 		checksum	true		true	\
 		compression	true		true	\
@@ -95,6 +96,7 @@ set -A perms	create		true		false	\
 		allow		true		true	\
 		quota		true		false	\
 		reservation	true		true	\
+		dnodesize	true		false	\
 		recordsize	true		false	\
 		mountpoint	true		false	\
 		checksum	true		true	\
diff --git a/tests/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh b/tests/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh
index a64561b69..723fb9292 100755
--- a/tests/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh
+++ b/tests/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh
@@ -59,13 +59,13 @@ if is_linux; then
 set -A perms	create snapshot mount send allow quota reservation \
 		recordsize mountpoint checksum compression canmount atime \
 		devices exec volsize setuid readonly snapdir userprop \
-		rollback clone rename promote \
+		rollback clone rename promote dnodesize \
 		zoned xattr receive destroy
 else
 set -A perms	create snapshot mount send allow quota reservation \
 		recordsize mountpoint checksum compression canmount atime \
 		devices exec volsize setuid readonly snapdir userprop \
-		aclmode aclinherit rollback clone rename promote \
+		aclmode aclinherit rollback clone rename promote dnodesize \
 		zoned xattr receive destroy sharenfs share
 fi
 
diff --git a/tests/zfs-tests/tests/functional/features/Makefile.am b/tests/zfs-tests/tests/functional/features/Makefile.am
index 4c6142a2f..3657461e6 100644
--- a/tests/zfs-tests/tests/functional/features/Makefile.am
+++ b/tests/zfs-tests/tests/functional/features/Makefile.am
@@ -1 +1,3 @@
-SUBDIRS = async_destroy
+SUBDIRS = \
+	async_destroy \
+	large_dnode
diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/Makefile.am b/tests/zfs-tests/tests/functional/features/large_dnode/Makefile.am
new file mode 100644
index 000000000..6afda5362
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/features/large_dnode/Makefile.am
@@ -0,0 +1,11 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/features/large_dnode
+dist_pkgdata_SCRIPTS = \
+	cleanup.ksh \
+	setup.ksh \
+	large_dnode_001_pos.ksh \
+	large_dnode_002_pos.ksh \
+	large_dnode_003_pos.ksh \
+	large_dnode_004_neg.ksh \
+	large_dnode_005_pos.ksh \
+	large_dnode_006_pos.ksh \
+	large_dnode_007_neg.ksh
diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/cleanup.ksh b/tests/zfs-tests/tests/functional/features/large_dnode/cleanup.ksh
new file mode 100755
index 000000000..61caf3910
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/features/large_dnode/cleanup.ksh
@@ -0,0 +1,25 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup
diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_001_pos.ksh b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_001_pos.ksh
new file mode 100755
index 000000000..a05acb97e
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_001_pos.ksh
@@ -0,0 +1,77 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify that the dnode sizes of newly created files are consistent
+# with the dnodesize dataset property.
+#
+# STRATEGY:
+# 1. Create a file system
+# 2. Set dnodesize to a legal literal value
+# 3. Create a file
+# 4. Repeat 2-3 for all legal literal values of dnodesize values
+# 5. Unmount the file system
+# 6. Use zdb to check expected dnode sizes
+#
+
+TEST_FS=$TESTPOOL/large_dnode
+
+verify_runnable "both"
+
+function cleanup
+{
+	datasetexists $TEST_FS && log_must $ZFS destroy $TEST_FS
+}
+
+log_onexit cleanup
+log_assert "dnode sizes are consistent with dnodesize dataset property"
+
+log_must $ZFS create $TEST_FS
+
+set -A dnsizes "512" "1k" "2k" "4k" "8k" "16k"
+set -A inodes
+
+for ((i=0; i < ${#dnsizes[*]}; i++)) ; do
+	size=${dnsizes[$i]}
+	if [[ $size == "512" ]] ; then
+		size="legacy"
+	fi
+	file=/$TEST_FS/file.$size
+	log_must $ZFS set dnsize=$size $TEST_FS
+	touch $file
+	inodes[$i]=$(ls -li $file | awk '{print $1}')
+done
+
+log_must $ZFS umount $TEST_FS
+
+for ((i=0; i < ${#dnsizes[*]}; i++)) ; do
+	dnsize=$($ZDB -dddd $TEST_FS ${inodes[$i]} |
+	    awk '/ZFS plain file/ {print $6}' | tr K k)
+	if [[ "$dnsize" != "${dnsizes[$i]}" ]]; then
+		log_fail "dnode size is $dnsize (expected ${dnsizes[$i]})"
+	fi
+done
+
+log_pass "dnode sizes are consistent with dnodesize dataset property"
diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_002_pos.ksh b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_002_pos.ksh
new file mode 100755
index 000000000..788e33a13
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_002_pos.ksh
@@ -0,0 +1,78 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify that extended attributes can use extra bonus space of a large
+# dnode without kicking in a spill block.
+#
+# STRATEGY:
+# 1. Create a file system with xattr=sa
+# 2. Set dnodesize to a legal literal value
+# 3. Create a file
+# 4  Store an xattr that fits within the dnode size
+# 4. Repeat 2-3 for all legal literal values of dnodesize values
+# 5. Unmount the file system
+# 6. Use zdb to check for missing SPILL_BLKPTR flag
+#
+
+TEST_FS=$TESTPOOL/large_dnode
+
+verify_runnable "both"
+
+function cleanup
+{
+	datasetexists $TEST_FS && log_must $ZFS destroy $TEST_FS
+}
+
+log_onexit cleanup
+log_assert "extended attributes use extra bonus space of a large dnode"
+
+log_must $ZFS create -o xattr=sa $TEST_FS
+
+# Store dnode size minus 512 in an xattr
+set -A xattr_sizes "512" "1536" "3584" "7680" "15872"
+set -A prop_values "1k"  "2k"   "4k"   "8k"   "16k"
+set -A inodes
+
+for ((i=0; i < ${#prop_values[*]}; i++)) ; do
+	prop_val=${prop_values[$i]}
+	file=/$TEST_FS/file.$prop_val
+	log_must $ZFS set dnsize=$prop_val $TEST_FS
+	touch $file
+	xattr_size=${xattr_sizes[$i]}
+	xattr_name=user.foo
+	xattr_val=$(dd if=/dev/urandom bs=1 count=$xattr_size |
+	    openssl enc -a -A)
+	log_must setfattr -n $xattr_name -v 0s$xattr_val $file
+	inodes[$i]=$(ls -li $file | awk '{print $1}')
+done
+
+log_must $ZFS umount $TEST_FS
+
+for ((i=0; i < ${#inodes[*]}; i++)) ; do
+	log_mustnot eval "$ZDB -dddd $TEST_FS ${inodes[$i]} | grep SPILL_BLKPTR"
+done
+
+log_pass "extended attributes use extra bonus space of a large dnode"
diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_003_pos.ksh b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_003_pos.ksh
new file mode 100755
index 000000000..3f5779ded
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_003_pos.ksh
@@ -0,0 +1,65 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "both"
+
+function cleanup
+{
+        if datasetexists $LDNPOOL ; then
+                log_must $ZPOOL destroy -f $LDNPOOL
+        fi
+}
+
+log_onexit cleanup
+
+log_assert "feature correctly switches between enabled and active"
+
+LDNPOOL=ldnpool
+LDNFS=$LDNPOOL/large_dnode
+log_must $MKFILE 64M  $TESTDIR/$LDNPOOL
+log_must $ZPOOL create $LDNPOOL $TESTDIR/$LDNPOOL
+
+
+state=$($ZPOOL list -Ho feature@large_dnode $LDNPOOL)
+if [[ "$state" != "enabled" ]]; then
+        log_fail "large_dnode has state $state (expected enabled)"
+fi
+
+log_must $ZFS create -o dnodesize=1k $LDNFS
+log_must touch /$LDNFS/foo
+log_must $ZFS unmount $LDNFS
+
+state=$($ZPOOL list -Ho feature@large_dnode $LDNPOOL)
+if [[ "$state" != "active" ]]; then
+        log_fail "large_dnode has state $state (expected active)"
+fi
+
+log_must $ZFS destroy $LDNFS
+
+state=$($ZPOOL list -Ho feature@large_dnode $LDNPOOL)
+if [[ "$state" != "enabled" ]]; then
+        log_fail "large_dnode has state $state (expected enabled)"
+fi
+
+log_pass
diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_004_neg.ksh b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_004_neg.ksh
new file mode 100755
index 000000000..26ed66994
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_004_neg.ksh
@@ -0,0 +1,59 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "both"
+
+TEST_FS=$TESTPOOL/large_dnode
+TEST_SNAP=$TESTPOOL/large_dnode@ldnsnap
+TEST_STREAM=$TESTDIR/ldnsnap
+
+function cleanup
+{
+	if datasetexists $TEST_FS ; then
+		log_must $ZFS destroy -r $TEST_FS
+	fi
+
+	if datasetexists $LGCYPOOL ; then
+		log_must $ZPOOL destroy -f $LGCYPOOL
+	fi
+
+	rm -f $TEST_STREAM
+}
+
+log_onexit cleanup
+log_assert "zfs send stream with large dnodes not accepted by legacy pool"
+
+log_must $ZFS create -o dnodesize=1k $TEST_FS
+log_must touch /$TEST_FS/foo
+log_must $ZFS umount $TEST_FS
+log_must $ZFS snap $TEST_SNAP
+log_must eval "$ZFS send $TEST_SNAP > $TEST_STREAM"
+
+LGCYPOOL=ldnpool
+LGCYFS=$LGCYPOOL/legacy
+log_must $MKFILE 64M  $TESTDIR/$LGCYPOOL
+log_must $ZPOOL create -d $LGCYPOOL $TESTDIR/$LGCYPOOL
+log_mustnot eval "$ZFS recv $LGCYFS < $TEST_STREAM"
+
+log_pass
diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_005_pos.ksh b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_005_pos.ksh
new file mode 100755
index 000000000..e03d1274f
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_005_pos.ksh
@@ -0,0 +1,64 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "both"
+
+TEST_SEND_FS=$TESTPOOL/send_large_dnode
+TEST_RECV_FS=$TESTPOOL/recv_large_dnode
+TEST_SNAP=$TEST_SEND_FS@ldnsnap
+TEST_STREAM=$TESTDIR/ldnsnap
+TEST_FILE=foo
+
+
+function cleanup
+{
+	if datasetexists $TEST_SEND_FS ; then
+		log_must $ZFS destroy -r $TEST_SEND_FS
+	fi
+
+	if datasetexists $TEST_RECV_FS ; then
+		log_must $ZFS destroy -r $TEST_RECV_FS
+	fi
+
+	rm -f $TEST_STREAM
+}
+
+log_onexit cleanup
+
+log_assert "zfs send stream with large dnodes accepted by new pool"
+
+log_must $ZFS create -o dnodesize=1k $TEST_SEND_FS
+log_must touch /$TEST_SEND_FS/$TEST_FILE
+log_must $ZFS umount $TEST_SEND_FS
+log_must $ZFS snap $TEST_SNAP
+log_must $ZFS send $TEST_SNAP > $TEST_STREAM
+
+log_must eval "$ZFS recv $TEST_RECV_FS < $TEST_STREAM"
+inode=$(ls -li /$TEST_RECV_FS/$TEST_FILE | awk '{print $1}')
+dnsize=$($ZDB -dddd $TEST_RECV_FS $inode | awk '/ZFS plain file/ {print $6}')
+if [[ "$dnsize" != "1K" ]]; then
+	log_fail "dnode size is $dnsize (expected 1K)"
+fi
+
+log_pass
diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_006_pos.ksh b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_006_pos.ksh
new file mode 100755
index 000000000..198b69bf7
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_006_pos.ksh
@@ -0,0 +1,58 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Run xattrtest on a dataset with large dnodes and xattr=sa
+# to stress xattr usage of the extra bonus space and verify
+# contents
+#
+
+TEST_FS=$TESTPOOL/large_dnode
+
+verify_runnable "both"
+
+function cleanup
+{
+	datasetexists $TEST_FS && log_must $ZFS destroy $TEST_FS
+}
+
+log_onexit cleanup
+log_assert "xattrtest runs cleanly on dataset with large dnodes"
+
+log_must $ZFS create $TEST_FS
+
+set -A xattr_sizes "512" "1536" "3584" "7680" "15872"
+set -A prop_values "1k"  "2k"   "4k"   "8k"   "16k"
+
+for ((i=0; i < ${#prop_values[*]}; i++)) ; do
+	prop_val=${prop_values[$i]}
+	dir=/$TEST_FS/$prop_val
+	xattr_size=${xattr_sizes[$i]}
+	log_must $ZFS set dnsize=$prop_val $TEST_FS
+	log_must mkdir $dir
+	log_must $XATTRTEST -R -y -s $xattr_size -f 1024 -p $dir
+done
+
+log_pass
diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_007_neg.ksh b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_007_neg.ksh
new file mode 100755
index 000000000..d6cc17e0b
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_007_neg.ksh
@@ -0,0 +1,56 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify that the dnodesize dataset property won't accept a value
+# other than "legacy" if the large_dnode feature is not enabled.
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+        if datasetexists $LGCYPOOL ; then
+                log_must $ZPOOL destroy -f $LGCYPOOL
+        fi
+}
+
+log_onexit cleanup
+
+log_assert "values other than dnodesize=legacy rejected by legacy pool"
+
+set -A prop_vals "auto" "1k" "2k" "4k" "8k" "16k"
+
+LGCYPOOL=lgcypool
+LGCYFS=$LGCYPOOL/legacy
+log_must $MKFILE 64M  $TESTDIR/$LGCYPOOL
+log_must $ZPOOL create -d $LGCYPOOL $TESTDIR/$LGCYPOOL
+log_must $ZFS create $LGCYFS
+
+for val in ${prop_vals[@]} ; do
+	log_mustnot $ZFS set dnodesize=$val $LGCYFS
+done
+
+log_pass
diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/setup.ksh b/tests/zfs-tests/tests/functional/features/large_dnode/setup.ksh
new file mode 100755
index 000000000..d9b1a6ee8
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/features/large_dnode/setup.ksh
@@ -0,0 +1,27 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+DISK=${DISKS%% *}
+
+default_setup $DISK
diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib
index 46f96fa61..91779cc78 100644
--- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib
+++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib
@@ -201,8 +201,8 @@ function cmp_ds_prop
 
 	for item in "type" "origin" "volblocksize" "aclinherit" "aclmode" \
 	    "atime" "canmount" "checksum" "compression" "copies" "devices" \
-	    "exec" "quota" "readonly" "recordsize" "reservation" "setuid" \
-	    "sharenfs" "snapdir" "version" "volsize" "xattr" "zoned" \
+	    "dnodesize" "exec" "quota" "readonly" "recordsize" "reservation" \
+	    "setuid" "sharenfs" "snapdir" "version" "volsize" "xattr" "zoned" \
 	    "mountpoint";
 	do
 		$ZFS get -H -o property,value,source $item $dtst1 >> \
diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh
index 4a590fab4..91cdd6e34 100755
--- a/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh
+++ b/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh
@@ -142,6 +142,7 @@ for fs in "$POOL" "$POOL/pclone" "$POOL/$FS" "$POOL/$FS/fs1" \
 	rand_set_prop $fs exec "on" "off"
 	rand_set_prop $fs quota "512M" "1024M"
 	rand_set_prop $fs recordsize "512" "2K" "8K" "32K" "128K"
+	rand_set_prop $fs dnodesize "legacy" "auto" "1k" "2k" "4k" "8k" "16k"
 	rand_set_prop $fs setuid "on" "off"
 	rand_set_prop $fs snapdir "hidden" "visible"
 	rand_set_prop $fs xattr "on" "off"
diff --git a/zfs-script-config.sh.in b/zfs-script-config.sh.in
index 2470c554a..e22cfd595 100644
--- a/zfs-script-config.sh.in
+++ b/zfs-script-config.sh.in
@@ -59,7 +59,7 @@ export READMMAP=${TESTSDIR}/zfs-tests/cmd/readmmap/readmmap
 export RENAME_DIR=${TESTSDIR}/zfs-tests/cmd/rename_dir/rename_dir
 export RM_LNKCNT_ZERO_FILE=${TESTSDIR}/zfs-tests/cmd/rm_lnkcnt_zero_file/rm_lnkcnt_zero_file
 export THREADSAPPEND=${TESTSDIR}/zfs-tests/cmd/threadsappend/threadsappend
-export XATTRTEST=${TESTDIR}/zfs-tests/cmd/xattrtest
+export XATTRTEST=${TESTSDIR}/zfs-tests/cmd/xattrtest/xattrtest
 
 export INTREE=1
 export LDMOD=/sbin/insmod