diff options
Diffstat (limited to 'lib/libzfs/libzfs_graph.c')
-rw-r--r-- | lib/libzfs/libzfs_graph.c | 662 |
1 files changed, 662 insertions, 0 deletions
diff --git a/lib/libzfs/libzfs_graph.c b/lib/libzfs/libzfs_graph.c new file mode 100644 index 000000000..e7cbf2386 --- /dev/null +++ b/lib/libzfs/libzfs_graph.c @@ -0,0 +1,662 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Iterate over all children of the current object. This includes the normal + * dataset hierarchy, but also arbitrary hierarchies due to clones. We want to + * walk all datasets in the pool, and construct a directed graph of the form: + * + * home + * | + * +----+----+ + * | | + * v v ws + * bar baz | + * | | + * v v + * @yesterday ----> foo + * + * In order to construct this graph, we have to walk every dataset in the pool, + * because the clone parent is stored as a property of the child, not the + * parent. The parent only keeps track of the number of clones. + * + * In the normal case (without clones) this would be rather expensive. To avoid + * unnecessary computation, we first try a walk of the subtree hierarchy + * starting from the initial node. At each dataset, we construct a node in the + * graph and an edge leading from its parent. If we don't see any snapshots + * with a non-zero clone count, then we are finished. + * + * If we do find a cloned snapshot, then we finish the walk of the current + * subtree, but indicate that we need to do a complete walk. We then perform a + * global walk of all datasets, avoiding the subtree we already processed. + * + * At the end of this, we'll end up with a directed graph of all relevant (and + * possible some irrelevant) datasets in the system. We need to both find our + * limiting subgraph and determine a safe ordering in which to destroy the + * datasets. We do a topological ordering of our graph starting at our target + * dataset, and then walk the results in reverse. + * + * It's possible for the graph to have cycles if, for example, the user renames + * a clone to be the parent of its origin snapshot. The user can request to + * generate an error in this case, or ignore the cycle and continue. + * + * When removing datasets, we want to destroy the snapshots in chronological + * order (because this is the most efficient method). In order to accomplish + * this, we store the creation transaction group with each vertex and keep each + * vertex's edges sorted according to this value. The topological sort will + * automatically walk the snapshots in the correct order. + */ + +#include <assert.h> +#include <libintl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <unistd.h> + +#include <libzfs.h> + +#include "libzfs_impl.h" +#include "zfs_namecheck.h" + +#define MIN_EDGECOUNT 4 + +/* + * Vertex structure. Indexed by dataset name, this structure maintains a list + * of edges to other vertices. + */ +struct zfs_edge; +typedef struct zfs_vertex { + char zv_dataset[ZFS_MAXNAMELEN]; + struct zfs_vertex *zv_next; + int zv_visited; + uint64_t zv_txg; + struct zfs_edge **zv_edges; + int zv_edgecount; + int zv_edgealloc; +} zfs_vertex_t; + +enum { + VISIT_SEEN = 1, + VISIT_SORT_PRE, + VISIT_SORT_POST +}; + +/* + * Edge structure. Simply maintains a pointer to the destination vertex. There + * is no need to store the source vertex, since we only use edges in the context + * of the source vertex. + */ +typedef struct zfs_edge { + zfs_vertex_t *ze_dest; + struct zfs_edge *ze_next; +} zfs_edge_t; + +#define ZFS_GRAPH_SIZE 1027 /* this could be dynamic some day */ + +/* + * Graph structure. Vertices are maintained in a hash indexed by dataset name. + */ +typedef struct zfs_graph { + zfs_vertex_t **zg_hash; + size_t zg_size; + size_t zg_nvertex; + const char *zg_root; + int zg_clone_count; +} zfs_graph_t; + +/* + * Allocate a new edge pointing to the target vertex. + */ +static zfs_edge_t * +zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest) +{ + zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t)); + + if (zep == NULL) + return (NULL); + + zep->ze_dest = dest; + + return (zep); +} + +/* + * Destroy an edge. + */ +static void +zfs_edge_destroy(zfs_edge_t *zep) +{ + free(zep); +} + +/* + * Allocate a new vertex with the given name. + */ +static zfs_vertex_t * +zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset) +{ + zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t)); + + if (zvp == NULL) + return (NULL); + + assert(strlen(dataset) < ZFS_MAXNAMELEN); + + (void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset)); + + if ((zvp->zv_edges = zfs_alloc(hdl, + MIN_EDGECOUNT * sizeof (void *))) == NULL) { + free(zvp); + return (NULL); + } + + zvp->zv_edgealloc = MIN_EDGECOUNT; + + return (zvp); +} + +/* + * Destroy a vertex. Frees up any associated edges. + */ +static void +zfs_vertex_destroy(zfs_vertex_t *zvp) +{ + int i; + + for (i = 0; i < zvp->zv_edgecount; i++) + zfs_edge_destroy(zvp->zv_edges[i]); + + free(zvp->zv_edges); + free(zvp); +} + +/* + * Given a vertex, add an edge to the destination vertex. + */ +static int +zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp, + zfs_vertex_t *dest) +{ + zfs_edge_t *zep = zfs_edge_create(hdl, dest); + + if (zep == NULL) + return (-1); + + if (zvp->zv_edgecount == zvp->zv_edgealloc) { + void *ptr; + + if ((ptr = zfs_realloc(hdl, zvp->zv_edges, + zvp->zv_edgealloc * sizeof (void *), + zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL) + return (-1); + + zvp->zv_edges = ptr; + zvp->zv_edgealloc *= 2; + } + + zvp->zv_edges[zvp->zv_edgecount++] = zep; + + return (0); +} + +static int +zfs_edge_compare(const void *a, const void *b) +{ + const zfs_edge_t *ea = *((zfs_edge_t **)a); + const zfs_edge_t *eb = *((zfs_edge_t **)b); + + if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg) + return (-1); + if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg) + return (1); + return (0); +} + +/* + * Sort the given vertex edges according to the creation txg of each vertex. + */ +static void +zfs_vertex_sort_edges(zfs_vertex_t *zvp) +{ + if (zvp->zv_edgecount == 0) + return; + + qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *), + zfs_edge_compare); +} + +/* + * Construct a new graph object. We allow the size to be specified as a + * parameter so in the future we can size the hash according to the number of + * datasets in the pool. + */ +static zfs_graph_t * +zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size) +{ + zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t)); + + if (zgp == NULL) + return (NULL); + + zgp->zg_size = size; + if ((zgp->zg_hash = zfs_alloc(hdl, + size * sizeof (zfs_vertex_t *))) == NULL) { + free(zgp); + return (NULL); + } + + zgp->zg_root = dataset; + zgp->zg_clone_count = 0; + + return (zgp); +} + +/* + * Destroy a graph object. We have to iterate over all the hash chains, + * destroying each vertex in the process. + */ +static void +zfs_graph_destroy(zfs_graph_t *zgp) +{ + int i; + zfs_vertex_t *current, *next; + + for (i = 0; i < zgp->zg_size; i++) { + current = zgp->zg_hash[i]; + while (current != NULL) { + next = current->zv_next; + zfs_vertex_destroy(current); + current = next; + } + } + + free(zgp->zg_hash); + free(zgp); +} + +/* + * Graph hash function. Classic bernstein k=33 hash function, taken from + * usr/src/cmd/sgs/tools/common/strhash.c + */ +static size_t +zfs_graph_hash(zfs_graph_t *zgp, const char *str) +{ + size_t hash = 5381; + int c; + + while ((c = *str++) != 0) + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + + return (hash % zgp->zg_size); +} + +/* + * Given a dataset name, finds the associated vertex, creating it if necessary. + */ +static zfs_vertex_t * +zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset, + uint64_t txg) +{ + size_t idx = zfs_graph_hash(zgp, dataset); + zfs_vertex_t *zvp; + + for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) { + if (strcmp(zvp->zv_dataset, dataset) == 0) { + if (zvp->zv_txg == 0) + zvp->zv_txg = txg; + return (zvp); + } + } + + if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL) + return (NULL); + + zvp->zv_next = zgp->zg_hash[idx]; + zvp->zv_txg = txg; + zgp->zg_hash[idx] = zvp; + zgp->zg_nvertex++; + + return (zvp); +} + +/* + * Given two dataset names, create an edge between them. For the source vertex, + * mark 'zv_visited' to indicate that we have seen this vertex, and not simply + * created it as a destination of another edge. If 'dest' is NULL, then this + * is an individual vertex (i.e. the starting vertex), so don't add an edge. + */ +static int +zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source, + const char *dest, uint64_t txg) +{ + zfs_vertex_t *svp, *dvp; + + if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL) + return (-1); + svp->zv_visited = VISIT_SEEN; + if (dest != NULL) { + dvp = zfs_graph_lookup(hdl, zgp, dest, txg); + if (dvp == NULL) + return (-1); + if (zfs_vertex_add_edge(hdl, svp, dvp) != 0) + return (-1); + } + + return (0); +} + +/* + * Iterate over all children of the given dataset, adding any vertices + * as necessary. Returns -1 if there was an error, or 0 otherwise. + * This is a simple recursive algorithm - the ZFS namespace typically + * is very flat. We manually invoke the necessary ioctl() calls to + * avoid the overhead and additional semantics of zfs_open(). + */ +static int +iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) +{ + zfs_cmd_t zc = { 0 }; + zfs_vertex_t *zvp; + + /* + * Look up the source vertex, and avoid it if we've seen it before. + */ + zvp = zfs_graph_lookup(hdl, zgp, dataset, 0); + if (zvp == NULL) + return (-1); + if (zvp->zv_visited == VISIT_SEEN) + return (0); + + /* + * Iterate over all children + */ + for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); + ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0; + (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) { + + /* + * Ignore private dataset names. + */ + if (dataset_name_hidden(zc.zc_name)) + continue; + + /* + * Get statistics for this dataset, to determine the type of the + * dataset and clone statistics. If this fails, the dataset has + * since been removed, and we're pretty much screwed anyway. + */ + zc.zc_objset_stats.dds_origin[0] = '\0'; + if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) + continue; + + if (zc.zc_objset_stats.dds_origin[0] != '\0') { + if (zfs_graph_add(hdl, zgp, + zc.zc_objset_stats.dds_origin, zc.zc_name, + zc.zc_objset_stats.dds_creation_txg) != 0) + return (-1); + /* + * Count origins only if they are contained in the graph + */ + if (isa_child_of(zc.zc_objset_stats.dds_origin, + zgp->zg_root)) + zgp->zg_clone_count--; + } + + /* + * Add an edge between the parent and the child. + */ + if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name, + zc.zc_objset_stats.dds_creation_txg) != 0) + return (-1); + + /* + * Recursively visit child + */ + if (iterate_children(hdl, zgp, zc.zc_name)) + return (-1); + } + + /* + * Now iterate over all snapshots. + */ + bzero(&zc, sizeof (zc)); + + for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); + ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0; + (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) { + + /* + * Get statistics for this dataset, to determine the type of the + * dataset and clone statistics. If this fails, the dataset has + * since been removed, and we're pretty much screwed anyway. + */ + if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) + continue; + + /* + * Add an edge between the parent and the child. + */ + if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name, + zc.zc_objset_stats.dds_creation_txg) != 0) + return (-1); + + zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones; + } + + zvp->zv_visited = VISIT_SEEN; + + return (0); +} + +/* + * Returns false if there are no snapshots with dependent clones in this + * subtree or if all of those clones are also in this subtree. Returns + * true if there is an error or there are external dependents. + */ +static boolean_t +external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) +{ + zfs_cmd_t zc = { 0 }; + + /* + * Check whether this dataset is a clone or has clones since + * iterate_children() only checks the children. + */ + (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); + if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) + return (B_TRUE); + + if (zc.zc_objset_stats.dds_origin[0] != '\0') { + if (zfs_graph_add(hdl, zgp, + zc.zc_objset_stats.dds_origin, zc.zc_name, + zc.zc_objset_stats.dds_creation_txg) != 0) + return (B_TRUE); + if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset)) + zgp->zg_clone_count--; + } + + if ((zc.zc_objset_stats.dds_num_clones) || + iterate_children(hdl, zgp, dataset)) + return (B_TRUE); + + return (zgp->zg_clone_count != 0); +} + +/* + * Construct a complete graph of all necessary vertices. First, iterate over + * only our object's children. If no cloned snapshots are found, or all of + * the cloned snapshots are in this subtree then return a graph of the subtree. + * Otherwise, start at the root of the pool and iterate over all datasets. + */ +static zfs_graph_t * +construct_graph(libzfs_handle_t *hdl, const char *dataset) +{ + zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE); + int ret = 0; + + if (zgp == NULL) + return (zgp); + + if ((strchr(dataset, '/') == NULL) || + (external_dependents(hdl, zgp, dataset))) { + /* + * Determine pool name and try again. + */ + int len = strcspn(dataset, "/@") + 1; + char *pool = zfs_alloc(hdl, len); + + if (pool == NULL) { + zfs_graph_destroy(zgp); + return (NULL); + } + (void) strlcpy(pool, dataset, len); + + if (iterate_children(hdl, zgp, pool) == -1 || + zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) { + free(pool); + zfs_graph_destroy(zgp); + return (NULL); + } + free(pool); + } + + if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) { + zfs_graph_destroy(zgp); + return (NULL); + } + + return (zgp); +} + +/* + * Given a graph, do a recursive topological sort into the given array. This is + * really just a depth first search, so that the deepest nodes appear first. + * hijack the 'zv_visited' marker to avoid visiting the same vertex twice. + */ +static int +topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result, + size_t *idx, zfs_vertex_t *zgv) +{ + int i; + + if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) { + /* + * If we've already seen this vertex as part of our depth-first + * search, then we have a cyclic dependency, and we must return + * an error. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "recursive dependency at '%s'"), + zgv->zv_dataset); + return (zfs_error(hdl, EZFS_RECURSIVE, + dgettext(TEXT_DOMAIN, + "cannot determine dependent datasets"))); + } else if (zgv->zv_visited >= VISIT_SORT_PRE) { + /* + * If we've already processed this as part of the topological + * sort, then don't bother doing so again. + */ + return (0); + } + + zgv->zv_visited = VISIT_SORT_PRE; + + /* avoid doing a search if we don't have to */ + zfs_vertex_sort_edges(zgv); + for (i = 0; i < zgv->zv_edgecount; i++) { + if (topo_sort(hdl, allowrecursion, result, idx, + zgv->zv_edges[i]->ze_dest) != 0) + return (-1); + } + + /* we may have visited this in the course of the above */ + if (zgv->zv_visited == VISIT_SORT_POST) + return (0); + + if ((result[*idx] = zfs_alloc(hdl, + strlen(zgv->zv_dataset) + 1)) == NULL) + return (-1); + + (void) strcpy(result[*idx], zgv->zv_dataset); + *idx += 1; + zgv->zv_visited = VISIT_SORT_POST; + return (0); +} + +/* + * The only public interface for this file. Do the dirty work of constructing a + * child list for the given object. Construct the graph, do the toplogical + * sort, and then return the array of strings to the caller. + * + * The 'allowrecursion' parameter controls behavior when cycles are found. If + * it is set, the the cycle is ignored and the results returned as if the cycle + * did not exist. If it is not set, then the routine will generate an error if + * a cycle is found. + */ +int +get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion, + const char *dataset, char ***result, size_t *count) +{ + zfs_graph_t *zgp; + zfs_vertex_t *zvp; + + if ((zgp = construct_graph(hdl, dataset)) == NULL) + return (-1); + + if ((*result = zfs_alloc(hdl, + zgp->zg_nvertex * sizeof (char *))) == NULL) { + zfs_graph_destroy(zgp); + return (-1); + } + + if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) { + free(*result); + zfs_graph_destroy(zgp); + return (-1); + } + + *count = 0; + if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) { + free(*result); + zfs_graph_destroy(zgp); + return (-1); + } + + /* + * Get rid of the last entry, which is our starting vertex and not + * strictly a dependent. + */ + assert(*count > 0); + free((*result)[*count - 1]); + (*count)--; + + zfs_graph_destroy(zgp); + + return (0); +} |