summaryrefslogtreecommitdiffstats
path: root/lib/libzfs/libzfs_graph.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libzfs/libzfs_graph.c')
-rw-r--r--lib/libzfs/libzfs_graph.c653
1 files changed, 0 insertions, 653 deletions
diff --git a/lib/libzfs/libzfs_graph.c b/lib/libzfs/libzfs_graph.c
deleted file mode 100644
index 63d9138ef..000000000
--- a/lib/libzfs/libzfs_graph.c
+++ /dev/null
@@ -1,653 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * Iterate over all children of the current object. This includes the normal
- * dataset hierarchy, but also arbitrary hierarchies due to clones. We want to
- * walk all datasets in the pool, and construct a directed graph of the form:
- *
- * home
- * |
- * +----+----+
- * | |
- * v v ws
- * bar baz |
- * | |
- * v v
- * @yesterday ----> foo
- *
- * In order to construct this graph, we have to walk every dataset in the pool,
- * because the clone parent is stored as a property of the child, not the
- * parent. The parent only keeps track of the number of clones.
- *
- * In the normal case (without clones) this would be rather expensive. To avoid
- * unnecessary computation, we first try a walk of the subtree hierarchy
- * starting from the initial node. At each dataset, we construct a node in the
- * graph and an edge leading from its parent. If we don't see any snapshots
- * with a non-zero clone count, then we are finished.
- *
- * If we do find a cloned snapshot, then we finish the walk of the current
- * subtree, but indicate that we need to do a complete walk. We then perform a
- * global walk of all datasets, avoiding the subtree we already processed.
- *
- * At the end of this, we'll end up with a directed graph of all relevant (and
- * possible some irrelevant) datasets in the system. We need to both find our
- * limiting subgraph and determine a safe ordering in which to destroy the
- * datasets. We do a topological ordering of our graph starting at our target
- * dataset, and then walk the results in reverse.
- *
- * It's possible for the graph to have cycles if, for example, the user renames
- * a clone to be the parent of its origin snapshot. The user can request to
- * generate an error in this case, or ignore the cycle and continue.
- *
- * When removing datasets, we want to destroy the snapshots in chronological
- * order (because this is the most efficient method). In order to accomplish
- * this, we store the creation transaction group with each vertex and keep each
- * vertex's edges sorted according to this value. The topological sort will
- * automatically walk the snapshots in the correct order.
- */
-
-#include <assert.h>
-#include <libintl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <strings.h>
-#include <unistd.h>
-
-#include <libzfs.h>
-
-#include "libzfs_impl.h"
-#include "zfs_namecheck.h"
-
-#define MIN_EDGECOUNT 4
-
-/*
- * Vertex structure. Indexed by dataset name, this structure maintains a list
- * of edges to other vertices.
- */
-struct zfs_edge;
-typedef struct zfs_vertex {
- char zv_dataset[ZFS_MAXNAMELEN];
- struct zfs_vertex *zv_next;
- int zv_visited;
- uint64_t zv_txg;
- struct zfs_edge **zv_edges;
- int zv_edgecount;
- int zv_edgealloc;
-} zfs_vertex_t;
-
-enum {
- VISIT_SEEN = 1,
- VISIT_SORT_PRE,
- VISIT_SORT_POST
-};
-
-/*
- * Edge structure. Simply maintains a pointer to the destination vertex. There
- * is no need to store the source vertex, since we only use edges in the context
- * of the source vertex.
- */
-typedef struct zfs_edge {
- zfs_vertex_t *ze_dest;
- struct zfs_edge *ze_next;
-} zfs_edge_t;
-
-#define ZFS_GRAPH_SIZE 1027 /* this could be dynamic some day */
-
-/*
- * Graph structure. Vertices are maintained in a hash indexed by dataset name.
- */
-typedef struct zfs_graph {
- zfs_vertex_t **zg_hash;
- size_t zg_size;
- size_t zg_nvertex;
- const char *zg_root;
- int zg_clone_count;
-} zfs_graph_t;
-
-/*
- * Allocate a new edge pointing to the target vertex.
- */
-static zfs_edge_t *
-zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
-{
- zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
-
- if (zep == NULL)
- return (NULL);
-
- zep->ze_dest = dest;
-
- return (zep);
-}
-
-/*
- * Destroy an edge.
- */
-static void
-zfs_edge_destroy(zfs_edge_t *zep)
-{
- free(zep);
-}
-
-/*
- * Allocate a new vertex with the given name.
- */
-static zfs_vertex_t *
-zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
-{
- zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
-
- if (zvp == NULL)
- return (NULL);
-
- assert(strlen(dataset) < ZFS_MAXNAMELEN);
-
- (void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
-
- if ((zvp->zv_edges = zfs_alloc(hdl,
- MIN_EDGECOUNT * sizeof (void *))) == NULL) {
- free(zvp);
- return (NULL);
- }
-
- zvp->zv_edgealloc = MIN_EDGECOUNT;
-
- return (zvp);
-}
-
-/*
- * Destroy a vertex. Frees up any associated edges.
- */
-static void
-zfs_vertex_destroy(zfs_vertex_t *zvp)
-{
- int i;
-
- for (i = 0; i < zvp->zv_edgecount; i++)
- zfs_edge_destroy(zvp->zv_edges[i]);
-
- free(zvp->zv_edges);
- free(zvp);
-}
-
-/*
- * Given a vertex, add an edge to the destination vertex.
- */
-static int
-zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
- zfs_vertex_t *dest)
-{
- zfs_edge_t *zep = zfs_edge_create(hdl, dest);
-
- if (zep == NULL)
- return (-1);
-
- if (zvp->zv_edgecount == zvp->zv_edgealloc) {
- void *ptr;
-
- if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
- zvp->zv_edgealloc * sizeof (void *),
- zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
- return (-1);
-
- zvp->zv_edges = ptr;
- zvp->zv_edgealloc *= 2;
- }
-
- zvp->zv_edges[zvp->zv_edgecount++] = zep;
-
- return (0);
-}
-
-static int
-zfs_edge_compare(const void *a, const void *b)
-{
- const zfs_edge_t *ea = *((zfs_edge_t **)a);
- const zfs_edge_t *eb = *((zfs_edge_t **)b);
-
- if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
- return (-1);
- if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
- return (1);
- return (0);
-}
-
-/*
- * Sort the given vertex edges according to the creation txg of each vertex.
- */
-static void
-zfs_vertex_sort_edges(zfs_vertex_t *zvp)
-{
- if (zvp->zv_edgecount == 0)
- return;
-
- qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
- zfs_edge_compare);
-}
-
-/*
- * Construct a new graph object. We allow the size to be specified as a
- * parameter so in the future we can size the hash according to the number of
- * datasets in the pool.
- */
-static zfs_graph_t *
-zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size)
-{
- zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
-
- if (zgp == NULL)
- return (NULL);
-
- zgp->zg_size = size;
- if ((zgp->zg_hash = zfs_alloc(hdl,
- size * sizeof (zfs_vertex_t *))) == NULL) {
- free(zgp);
- return (NULL);
- }
-
- zgp->zg_root = dataset;
- zgp->zg_clone_count = 0;
-
- return (zgp);
-}
-
-/*
- * Destroy a graph object. We have to iterate over all the hash chains,
- * destroying each vertex in the process.
- */
-static void
-zfs_graph_destroy(zfs_graph_t *zgp)
-{
- int i;
- zfs_vertex_t *current, *next;
-
- for (i = 0; i < zgp->zg_size; i++) {
- current = zgp->zg_hash[i];
- while (current != NULL) {
- next = current->zv_next;
- zfs_vertex_destroy(current);
- current = next;
- }
- }
-
- free(zgp->zg_hash);
- free(zgp);
-}
-
-/*
- * Graph hash function. Classic bernstein k=33 hash function, taken from
- * usr/src/cmd/sgs/tools/common/strhash.c
- */
-static size_t
-zfs_graph_hash(zfs_graph_t *zgp, const char *str)
-{
- size_t hash = 5381;
- int c;
-
- while ((c = *str++) != 0)
- hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
-
- return (hash % zgp->zg_size);
-}
-
-/*
- * Given a dataset name, finds the associated vertex, creating it if necessary.
- */
-static zfs_vertex_t *
-zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
- uint64_t txg)
-{
- size_t idx = zfs_graph_hash(zgp, dataset);
- zfs_vertex_t *zvp;
-
- for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
- if (strcmp(zvp->zv_dataset, dataset) == 0) {
- if (zvp->zv_txg == 0)
- zvp->zv_txg = txg;
- return (zvp);
- }
- }
-
- if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
- return (NULL);
-
- zvp->zv_next = zgp->zg_hash[idx];
- zvp->zv_txg = txg;
- zgp->zg_hash[idx] = zvp;
- zgp->zg_nvertex++;
-
- return (zvp);
-}
-
-/*
- * Given two dataset names, create an edge between them. For the source vertex,
- * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
- * created it as a destination of another edge. If 'dest' is NULL, then this
- * is an individual vertex (i.e. the starting vertex), so don't add an edge.
- */
-static int
-zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
- const char *dest, uint64_t txg)
-{
- zfs_vertex_t *svp, *dvp;
-
- if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
- return (-1);
- svp->zv_visited = VISIT_SEEN;
- if (dest != NULL) {
- dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
- if (dvp == NULL)
- return (-1);
- if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
- return (-1);
- }
-
- return (0);
-}
-
-/*
- * Iterate over all children of the given dataset, adding any vertices
- * as necessary. Returns -1 if there was an error, or 0 otherwise.
- * This is a simple recursive algorithm - the ZFS namespace typically
- * is very flat. We manually invoke the necessary ioctl() calls to
- * avoid the overhead and additional semantics of zfs_open().
- */
-static int
-iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
-{
- zfs_cmd_t zc = {"\0"};
- zfs_vertex_t *zvp;
-
- /*
- * Look up the source vertex, and avoid it if we've seen it before.
- */
- zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
- if (zvp == NULL)
- return (-1);
- if (zvp->zv_visited == VISIT_SEEN)
- return (0);
-
- /*
- * Iterate over all children
- */
- for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
- ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
- /*
- * Get statistics for this dataset, to determine the type of the
- * dataset and clone statistics. If this fails, the dataset has
- * since been removed, and we're pretty much screwed anyway.
- */
- zc.zc_objset_stats.dds_origin[0] = '\0';
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
- continue;
-
- if (zc.zc_objset_stats.dds_origin[0] != '\0') {
- if (zfs_graph_add(hdl, zgp,
- zc.zc_objset_stats.dds_origin, zc.zc_name,
- zc.zc_objset_stats.dds_creation_txg) != 0)
- return (-1);
- /*
- * Count origins only if they are contained in the graph
- */
- if (isa_child_of(zc.zc_objset_stats.dds_origin,
- zgp->zg_root))
- zgp->zg_clone_count--;
- }
-
- /*
- * Add an edge between the parent and the child.
- */
- if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
- zc.zc_objset_stats.dds_creation_txg) != 0)
- return (-1);
-
- /*
- * Recursively visit child
- */
- if (iterate_children(hdl, zgp, zc.zc_name))
- return (-1);
- }
-
- /*
- * Now iterate over all snapshots.
- */
- bzero(&zc, sizeof (zc));
-
- for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
- ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
-
- /*
- * Get statistics for this dataset, to determine the type of the
- * dataset and clone statistics. If this fails, the dataset has
- * since been removed, and we're pretty much screwed anyway.
- */
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
- continue;
-
- /*
- * Add an edge between the parent and the child.
- */
- if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
- zc.zc_objset_stats.dds_creation_txg) != 0)
- return (-1);
-
- zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones;
- }
-
- zvp->zv_visited = VISIT_SEEN;
-
- return (0);
-}
-
-/*
- * Returns false if there are no snapshots with dependent clones in this
- * subtree or if all of those clones are also in this subtree. Returns
- * true if there is an error or there are external dependents.
- */
-static boolean_t
-external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
-{
- zfs_cmd_t zc = {"\0"};
-
- /*
- * Check whether this dataset is a clone or has clones since
- * iterate_children() only checks the children.
- */
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
- return (B_TRUE);
-
- if (zc.zc_objset_stats.dds_origin[0] != '\0') {
- if (zfs_graph_add(hdl, zgp,
- zc.zc_objset_stats.dds_origin, zc.zc_name,
- zc.zc_objset_stats.dds_creation_txg) != 0)
- return (B_TRUE);
- if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset))
- zgp->zg_clone_count--;
- }
-
- if ((zc.zc_objset_stats.dds_num_clones) ||
- iterate_children(hdl, zgp, dataset))
- return (B_TRUE);
-
- return (zgp->zg_clone_count != 0);
-}
-
-/*
- * Construct a complete graph of all necessary vertices. First, iterate over
- * only our object's children. If no cloned snapshots are found, or all of
- * the cloned snapshots are in this subtree then return a graph of the subtree.
- * Otherwise, start at the root of the pool and iterate over all datasets.
- */
-static zfs_graph_t *
-construct_graph(libzfs_handle_t *hdl, const char *dataset)
-{
- zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE);
- int ret = 0;
-
- if (zgp == NULL)
- return (zgp);
-
- if ((strchr(dataset, '/') == NULL) ||
- (external_dependents(hdl, zgp, dataset))) {
- /*
- * Determine pool name and try again.
- */
- int len = strcspn(dataset, "/@") + 1;
- char *pool = zfs_alloc(hdl, len);
-
- if (pool == NULL) {
- zfs_graph_destroy(zgp);
- return (NULL);
- }
- (void) strlcpy(pool, dataset, len);
-
- if (iterate_children(hdl, zgp, pool) == -1 ||
- zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
- free(pool);
- zfs_graph_destroy(zgp);
- return (NULL);
- }
- free(pool);
- }
-
- if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
- zfs_graph_destroy(zgp);
- return (NULL);
- }
-
- return (zgp);
-}
-
-/*
- * Given a graph, do a recursive topological sort into the given array. This is
- * really just a depth first search, so that the deepest nodes appear first.
- * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
- */
-static int
-topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
- size_t *idx, zfs_vertex_t *zgv)
-{
- int i;
-
- if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
- /*
- * If we've already seen this vertex as part of our depth-first
- * search, then we have a cyclic dependency, and we must return
- * an error.
- */
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "recursive dependency at '%s'"),
- zgv->zv_dataset);
- return (zfs_error(hdl, EZFS_RECURSIVE,
- dgettext(TEXT_DOMAIN,
- "cannot determine dependent datasets")));
- } else if (zgv->zv_visited >= VISIT_SORT_PRE) {
- /*
- * If we've already processed this as part of the topological
- * sort, then don't bother doing so again.
- */
- return (0);
- }
-
- zgv->zv_visited = VISIT_SORT_PRE;
-
- /* avoid doing a search if we don't have to */
- zfs_vertex_sort_edges(zgv);
- for (i = 0; i < zgv->zv_edgecount; i++) {
- if (topo_sort(hdl, allowrecursion, result, idx,
- zgv->zv_edges[i]->ze_dest) != 0)
- return (-1);
- }
-
- /* we may have visited this in the course of the above */
- if (zgv->zv_visited == VISIT_SORT_POST)
- return (0);
-
- if ((result[*idx] = zfs_alloc(hdl,
- strlen(zgv->zv_dataset) + 1)) == NULL)
- return (-1);
-
- (void) strcpy(result[*idx], zgv->zv_dataset);
- *idx += 1;
- zgv->zv_visited = VISIT_SORT_POST;
- return (0);
-}
-
-/*
- * The only public interface for this file. Do the dirty work of constructing a
- * child list for the given object. Construct the graph, do the toplogical
- * sort, and then return the array of strings to the caller.
- *
- * The 'allowrecursion' parameter controls behavior when cycles are found. If
- * it is set, the the cycle is ignored and the results returned as if the cycle
- * did not exist. If it is not set, then the routine will generate an error if
- * a cycle is found.
- */
-int
-get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
- const char *dataset, char ***result, size_t *count)
-{
- zfs_graph_t *zgp;
- zfs_vertex_t *zvp;
-
- if ((zgp = construct_graph(hdl, dataset)) == NULL)
- return (-1);
-
- if ((*result = zfs_alloc(hdl,
- zgp->zg_nvertex * sizeof (char *))) == NULL) {
- zfs_graph_destroy(zgp);
- return (-1);
- }
-
- if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
- free(*result);
- zfs_graph_destroy(zgp);
- return (-1);
- }
-
- *count = 0;
- if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
- free(*result);
- zfs_graph_destroy(zgp);
- return (-1);
- }
-
- /*
- * Get rid of the last entry, which is our starting vertex and not
- * strictly a dependent.
- */
- assert(*count > 0);
- free((*result)[*count - 1]);
- (*count)--;
-
- zfs_graph_destroy(zgp);
-
- return (0);
-}