diff options
author | Don Brady <[email protected]> | 2018-09-05 19:33:36 -0600 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-09-05 18:33:36 -0700 |
commit | cc99f275a28c43fe450a66a7544f73c4935f7361 (patch) | |
tree | f867e1d2cbb550a047c0f87986831252c41a2fd9 /include | |
parent | cfa37548ebc880580782b245f2d233ed540e7a01 (diff) |
Pool allocation classes
Allocation Classes add the ability to have allocation classes in a
pool that are dedicated to serving specific block categories, such
as DDT data, metadata, and small file blocks. A pool can opt-in to
this feature by adding a 'special' or 'dedup' top-level VDEV.
Reviewed by: Pavel Zakharov <[email protected]>
Reviewed-by: Richard Laager <[email protected]>
Reviewed-by: Alek Pinchuk <[email protected]>
Reviewed-by: HÃ¥kan Johansson <[email protected]>
Reviewed-by: Andreas Dilger <[email protected]>
Reviewed-by: DHE <[email protected]>
Reviewed-by: Richard Elling <[email protected]>
Reviewed-by: Gregor Kopka <[email protected]>
Reviewed-by: Kash Pande <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Matthew Ahrens <[email protected]>
Signed-off-by: Don Brady <[email protected]>
Closes #5182
Diffstat (limited to 'include')
-rw-r--r-- | include/sys/dmu.h | 15 | ||||
-rw-r--r-- | include/sys/dmu_objset.h | 7 | ||||
-rw-r--r-- | include/sys/fs/zfs.h | 11 | ||||
-rw-r--r-- | include/sys/metaslab.h | 9 | ||||
-rw-r--r-- | include/sys/spa.h | 9 | ||||
-rw-r--r-- | include/sys/spa_impl.h | 5 | ||||
-rw-r--r-- | include/sys/vdev.h | 3 | ||||
-rw-r--r-- | include/sys/vdev_impl.h | 10 | ||||
-rw-r--r-- | include/sys/zio.h | 2 | ||||
-rw-r--r-- | include/zfeature_common.h | 2 |
10 files changed, 65 insertions, 8 deletions
diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 67f4be1c2..bc7046fdc 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -20,12 +20,13 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright 2014 HybridCluster. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. + * Copyright (c) 2017, Intel Corporation. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -136,6 +137,16 @@ typedef enum dmu_object_byteswap { ((ot) & DMU_OT_METADATA) : \ DMU_OT_IS_METADATA_IMPL(ot)) +#define DMU_OT_IS_DDT(ot) \ + ((ot) == DMU_OT_DDT_ZAP) + +#define DMU_OT_IS_ZIL(ot) \ + ((ot) == DMU_OT_INTENT_LOG) + +/* Note: ztest uses DMU_OT_UINT64_OTHER as a proxy for file blocks */ +#define DMU_OT_IS_FILE(ot) \ + ((ot) == DMU_OT_PLAIN_FILE_CONTENTS || (ot) == DMU_OT_UINT64_OTHER) + #define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \ ((ot) & DMU_OT_ENCRYPTED) : \ DMU_OT_IS_ENCRYPTED_IMPL(ot)) @@ -226,7 +237,7 @@ typedef enum dmu_object_type { * values. * * The DMU_OTN_* types do not have entries in the dmu_ot table, - * use the DMU_OT_IS_METDATA() and DMU_OT_BYTESWAP() macros instead + * use the DMU_OT_IS_METADATA() and DMU_OT_BYTESWAP() macros instead * of indexing into dmu_ot directly (this works for both DMU_OT_* types * and DMU_OTN_* types). */ diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index fd5afc0bc..c0650bcde 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ @@ -137,6 +137,11 @@ struct objset { uint64_t os_normalization; uint64_t os_utf8only; uint64_t os_casesensitivity; + /* + * The largest zpl file block allowed in special class. + * cached here instead of zfsvfs for easier access. + */ + int os_zpl_special_smallblock; /* * Pointer is constant; the blkptr it points to is protected by diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index c8c7cebae..6bbf84346 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -26,6 +26,7 @@ * Copyright (c) 2013, 2017 Joyent, Inc. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, Intel Corporation. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -181,6 +182,7 @@ typedef enum { ZFS_PROP_KEY_GUID, ZFS_PROP_KEYSTATUS, ZFS_PROP_REMAPTXG, /* not exposed to the user */ + ZFS_PROP_SPECIAL_SMALL_BLOCKS, ZFS_NUM_PROPS } zfs_prop_t; @@ -713,6 +715,7 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_MMP_TXG "mmp_txg" /* not stored on disk */ #define ZPOOL_CONFIG_MMP_HOSTNAME "mmp_hostname" /* not stored on disk */ #define ZPOOL_CONFIG_MMP_HOSTID "mmp_hostid" /* not stored on disk */ +#define ZPOOL_CONFIG_ALLOCATION_BIAS "alloc_bias" /* not stored on disk */ /* * The persistent vdev state is stored as separate values rather than a single @@ -759,6 +762,14 @@ typedef struct zpool_load_policy { #define VDEV_TOP_ZAP_POOL_CHECKPOINT_SM \ "com.delphix:pool_checkpoint_sm" +#define VDEV_TOP_ZAP_ALLOCATION_BIAS \ + "org.zfsonlinux:allocation_bias" + +/* vdev metaslab allocation bias */ +#define VDEV_ALLOC_BIAS_LOG "log" +#define VDEV_ALLOC_BIAS_SPECIAL "special" +#define VDEV_ALLOC_BIAS_DEDUP "dedup" + /* * This is needed in userland to report the minimum necessary device size. */ diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index 545bcafa5..fca233a38 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2017, Intel Corporation. */ #ifndef _SYS_METASLAB_H @@ -57,13 +58,17 @@ void metaslab_sync_done(metaslab_t *, uint64_t); void metaslab_sync_reassess(metaslab_group_t *); uint64_t metaslab_block_maxsize(metaslab_t *); +/* + * metaslab alloc flags + */ #define METASLAB_HINTBP_FAVOR 0x0 #define METASLAB_HINTBP_AVOID 0x1 #define METASLAB_GANG_HEADER 0x2 #define METASLAB_GANG_CHILD 0x4 #define METASLAB_ASYNC_ALLOC 0x8 #define METASLAB_DONT_THROTTLE 0x10 -#define METASLAB_FASTWRITE 0x20 +#define METASLAB_MUST_RESERVE 0x20 +#define METASLAB_FASTWRITE 0x40 int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t, blkptr_t *, int, uint64_t, blkptr_t *, int, zio_alloc_list_t *, zio_t *, @@ -96,8 +101,6 @@ boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int, int, zio_t *, int); void metaslab_class_throttle_unreserve(metaslab_class_t *, int, int, zio_t *); -void metaslab_class_space_update(metaslab_class_t *, int64_t, int64_t, - int64_t, int64_t); uint64_t metaslab_class_get_alloc(metaslab_class_t *); uint64_t metaslab_class_get_space(metaslab_class_t *); uint64_t metaslab_class_get_dspace(metaslab_class_t *); diff --git a/include/sys/spa.h b/include/sys/spa.h index 82fe2c18c..b86c65557 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -20,13 +20,14 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 Joyent, Inc. * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, Intel Corporation. */ #ifndef _SYS_SPA_H @@ -976,6 +977,11 @@ extern uint64_t spa_version(spa_t *spa); extern boolean_t spa_deflate(spa_t *spa); extern metaslab_class_t *spa_normal_class(spa_t *spa); extern metaslab_class_t *spa_log_class(spa_t *spa); +extern metaslab_class_t *spa_special_class(spa_t *spa); +extern metaslab_class_t *spa_dedup_class(spa_t *spa); +extern metaslab_class_t *spa_preferred_class(spa_t *spa, uint64_t size, + dmu_object_type_t objtype, uint_t level, uint_t special_smallblk); + extern void spa_evicting_os_register(spa_t *, objset_t *os); extern void spa_evicting_os_deregister(spa_t *, objset_t *os); extern void spa_evicting_os_wait(spa_t *spa); @@ -1040,6 +1046,7 @@ extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing); extern boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa); extern boolean_t spa_multihost(spa_t *spa); extern unsigned long spa_get_hostid(void); +extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *); extern int spa_mode(spa_t *spa); extern uint64_t zfs_strtonum(const char *str, char **nptr); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 1b8e48180..676e8b8a2 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -20,12 +20,13 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, Intel Corporation. */ #ifndef _SYS_SPA_IMPL_H @@ -220,6 +221,8 @@ struct spa { boolean_t spa_is_initializing; /* true while opening pool */ metaslab_class_t *spa_normal_class; /* normal data class */ metaslab_class_t *spa_log_class; /* intent log data class */ + metaslab_class_t *spa_special_class; /* special allocation class */ + metaslab_class_t *spa_dedup_class; /* dedup allocation class */ uint64_t spa_first_txg; /* first txg after spa_open() */ uint64_t spa_final_txg; /* txg of export/destroy */ uint64_t spa_freeze_txg; /* freeze pool at this txg */ diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 6d31d61b5..b37b60bdd 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -22,6 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2017, Intel Corporation. */ #ifndef _SYS_VDEV_H @@ -108,6 +109,8 @@ extern boolean_t vdev_children_are_offline(vdev_t *vd); extern void vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta, int64_t space_delta); +extern int64_t vdev_deflated_space(vdev_t *vd, int64_t space); + extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize); extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux); diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 701328ea6..e055161e8 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2017, Intel Corporation. */ #ifndef _SYS_VDEV_IMPL_H @@ -140,6 +141,14 @@ struct vdev_queue { kmutex_t vq_lock; }; +typedef enum vdev_alloc_bias { + VDEV_BIAS_NONE, + VDEV_BIAS_LOG, /* dedicated to ZIL data (SLOG) */ + VDEV_BIAS_SPECIAL, /* dedicated to ddt, metadata, and small blks */ + VDEV_BIAS_DEDUP /* dedicated to dedup metadata */ +} vdev_alloc_bias_t; + + /* * On-disk indirect vdev state. * @@ -236,6 +245,7 @@ struct vdev { boolean_t vdev_ishole; /* is a hole in the namespace */ kmutex_t vdev_queue_lock; /* protects vdev_queue_depth */ uint64_t vdev_top_zap; + vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */ /* pool checkpoint related */ space_map_t *vdev_checkpoint_sm; /* contains reserved blocks */ diff --git a/include/sys/zio.h b/include/sys/zio.h index d32a42833..322006649 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -353,6 +353,7 @@ typedef struct zio_prop { uint8_t zp_salt[ZIO_DATA_SALT_LEN]; uint8_t zp_iv[ZIO_DATA_IV_LEN]; uint8_t zp_mac[ZIO_DATA_MAC_LEN]; + uint32_t zp_zpl_smallblk; } zio_prop_t; typedef struct zio_cksum_report zio_cksum_report_t; @@ -473,6 +474,7 @@ struct zio { vdev_t *io_vd; void *io_vsd; const zio_vsd_ops_t *io_vsd_ops; + metaslab_class_t *io_metaslab_class; /* dva throttle class */ uint64_t io_offset; hrtime_t io_timestamp; /* submitted at */ diff --git a/include/zfeature_common.h b/include/zfeature_common.h index 8d31309e8..84bc7f816 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -23,6 +23,7 @@ * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2017, Intel Corporation. */ #ifndef _ZFEATURE_COMMON_H @@ -63,6 +64,7 @@ typedef enum spa_feature { SPA_FEATURE_OBSOLETE_COUNTS, SPA_FEATURE_POOL_CHECKPOINT, SPA_FEATURE_SPACEMAP_V2, + SPA_FEATURE_ALLOCATION_CLASSES, SPA_FEATURES } spa_feature_t; |