diff options
author | Matthew Macy <[email protected]> | 2020-04-14 11:36:28 -0700 |
---|---|---|
committer | GitHub <[email protected]> | 2020-04-14 11:36:28 -0700 |
commit | 9f0a21e6411aa0bac23fba0ddb220342a48c7cc7 (patch) | |
tree | 63e77f57396f27dbe5c69dc11e7aeb37f9008bfc /module/os/freebsd/spl | |
parent | 75c62019f3938e7bc81becb4fb2d5b5eb523e79a (diff) |
Add FreeBSD support to OpenZFS
Add the FreeBSD platform code to the OpenZFS repository. As of this
commit the source can be compiled and tested on FreeBSD 11 and 12.
Subsequent commits are now required to compile on FreeBSD and Linux.
Additionally, they must pass the ZFS Test Suite on FreeBSD which is
being run by the CI. As of this commit 1230 tests pass on FreeBSD
and there are no unexpected failures.
Reviewed-by: Sean Eric Fagan <[email protected]>
Reviewed-by: Jorgen Lundman <[email protected]>
Reviewed-by: Richard Laager <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Co-authored-by: Ryan Moeller <[email protected]>
Signed-off-by: Matt Macy <[email protected]>
Signed-off-by: Ryan Moeller <[email protected]>
Closes #898
Closes #8987
Diffstat (limited to 'module/os/freebsd/spl')
28 files changed, 7269 insertions, 0 deletions
diff --git a/module/os/freebsd/spl/acl_common.c b/module/os/freebsd/spl/acl_common.c new file mode 100644 index 000000000..8eea4695e --- /dev/null +++ b/module/os/freebsd/spl/acl_common.c @@ -0,0 +1,1731 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/avl.h> +#include <sys/misc.h> +#if defined(_KERNEL) +#include <sys/kmem.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <acl/acl_common.h> +#include <sys/debug.h> +#else +#include <errno.h> +#include <stdlib.h> +#include <stddef.h> +#include <strings.h> +#include <unistd.h> +#include <assert.h> +#include <grp.h> +#include <pwd.h> +#include <acl_common.h> +#define ASSERT assert +#endif + +#define ACE_POSIX_SUPPORTED_BITS (ACE_READ_DATA | \ + ACE_WRITE_DATA | ACE_APPEND_DATA | ACE_EXECUTE | \ + ACE_READ_ATTRIBUTES | ACE_READ_ACL | ACE_WRITE_ACL) + + +#define ACL_SYNCHRONIZE_SET_DENY 0x0000001 +#define ACL_SYNCHRONIZE_SET_ALLOW 0x0000002 +#define ACL_SYNCHRONIZE_ERR_DENY 0x0000004 +#define ACL_SYNCHRONIZE_ERR_ALLOW 0x0000008 + +#define ACL_WRITE_OWNER_SET_DENY 0x0000010 +#define ACL_WRITE_OWNER_SET_ALLOW 0x0000020 +#define ACL_WRITE_OWNER_ERR_DENY 0x0000040 +#define ACL_WRITE_OWNER_ERR_ALLOW 0x0000080 + +#define ACL_DELETE_SET_DENY 0x0000100 +#define ACL_DELETE_SET_ALLOW 0x0000200 +#define ACL_DELETE_ERR_DENY 0x0000400 +#define ACL_DELETE_ERR_ALLOW 0x0000800 + +#define ACL_WRITE_ATTRS_OWNER_SET_DENY 0x0001000 +#define ACL_WRITE_ATTRS_OWNER_SET_ALLOW 0x0002000 +#define ACL_WRITE_ATTRS_OWNER_ERR_DENY 0x0004000 +#define ACL_WRITE_ATTRS_OWNER_ERR_ALLOW 0x0008000 + +#define ACL_WRITE_ATTRS_WRITER_SET_DENY 0x0010000 +#define ACL_WRITE_ATTRS_WRITER_SET_ALLOW 0x0020000 +#define ACL_WRITE_ATTRS_WRITER_ERR_DENY 0x0040000 +#define ACL_WRITE_ATTRS_WRITER_ERR_ALLOW 0x0080000 + +#define ACL_WRITE_NAMED_WRITER_SET_DENY 0x0100000 +#define ACL_WRITE_NAMED_WRITER_SET_ALLOW 0x0200000 +#define ACL_WRITE_NAMED_WRITER_ERR_DENY 0x0400000 +#define ACL_WRITE_NAMED_WRITER_ERR_ALLOW 0x0800000 + +#define ACL_READ_NAMED_READER_SET_DENY 0x1000000 +#define ACL_READ_NAMED_READER_SET_ALLOW 0x2000000 +#define ACL_READ_NAMED_READER_ERR_DENY 0x4000000 +#define ACL_READ_NAMED_READER_ERR_ALLOW 0x8000000 + + +#define ACE_VALID_MASK_BITS (\ + ACE_READ_DATA | \ + ACE_LIST_DIRECTORY | \ + ACE_WRITE_DATA | \ + ACE_ADD_FILE | \ + ACE_APPEND_DATA | \ + ACE_ADD_SUBDIRECTORY | \ + ACE_READ_NAMED_ATTRS | \ + ACE_WRITE_NAMED_ATTRS | \ + ACE_EXECUTE | \ + ACE_DELETE_CHILD | \ + ACE_READ_ATTRIBUTES | \ + ACE_WRITE_ATTRIBUTES | \ + ACE_DELETE | \ + ACE_READ_ACL | \ + ACE_WRITE_ACL | \ + ACE_WRITE_OWNER | \ + ACE_SYNCHRONIZE) + +#define ACE_MASK_UNDEFINED 0x80000000 + +#define ACE_VALID_FLAG_BITS (ACE_FILE_INHERIT_ACE | \ + ACE_DIRECTORY_INHERIT_ACE | \ + ACE_NO_PROPAGATE_INHERIT_ACE | ACE_INHERIT_ONLY_ACE | \ + ACE_SUCCESSFUL_ACCESS_ACE_FLAG | ACE_FAILED_ACCESS_ACE_FLAG | \ + ACE_IDENTIFIER_GROUP | ACE_OWNER | ACE_GROUP | ACE_EVERYONE) + +/* + * ACL conversion helpers + */ + +typedef enum { + ace_unused, + ace_user_obj, + ace_user, + ace_group, /* includes GROUP and GROUP_OBJ */ + ace_other_obj +} ace_to_aent_state_t; + +typedef struct acevals { + uid_t key; + avl_node_t avl; + uint32_t mask; + uint32_t allowed; + uint32_t denied; + int aent_type; +} acevals_t; + +typedef struct ace_list { + acevals_t user_obj; + avl_tree_t user; + int numusers; + acevals_t group_obj; + avl_tree_t group; + int numgroups; + acevals_t other_obj; + uint32_t acl_mask; + int hasmask; + int dfacl_flag; + ace_to_aent_state_t state; + int seen; /* bitmask of all aclent_t a_type values seen */ +} ace_list_t; + +/* + * Generic shellsort, from K&R (1st ed, p 58.), somewhat modified. + * v = Ptr to array/vector of objs + * n = # objs in the array + * s = size of each obj (must be multiples of a word size) + * f = ptr to function to compare two objs + * returns (-1 = less than, 0 = equal, 1 = greater than + */ +void +ksort(caddr_t v, int n, int s, int (*f)()) +{ + int g, i, j, ii; + unsigned int *p1, *p2; + unsigned int tmp; + + /* No work to do */ + if (v == NULL || n <= 1) + return; + + /* Sanity check on arguments */ + ASSERT(((uintptr_t)v & 0x3) == 0 && (s & 0x3) == 0); + ASSERT(s > 0); + for (g = n / 2; g > 0; g /= 2) { + for (i = g; i < n; i++) { + for (j = i - g; j >= 0 && + (*f)(v + j * s, v + (j + g) * s) == 1; + j -= g) { + p1 = (void *)(v + j * s); + p2 = (void *)(v + (j + g) * s); + for (ii = 0; ii < s / 4; ii++) { + tmp = *p1; + *p1++ = *p2; + *p2++ = tmp; + } + } + } + } +} + +/* + * Compare two acls, all fields. Returns: + * -1 (less than) + * 0 (equal) + * +1 (greater than) + */ +int +cmp2acls(void *a, void *b) +{ + aclent_t *x = (aclent_t *)a; + aclent_t *y = (aclent_t *)b; + + /* Compare types */ + if (x->a_type < y->a_type) + return (-1); + if (x->a_type > y->a_type) + return (1); + /* Equal types; compare id's */ + if (x->a_id < y->a_id) + return (-1); + if (x->a_id > y->a_id) + return (1); + /* Equal ids; compare perms */ + if (x->a_perm < y->a_perm) + return (-1); + if (x->a_perm > y->a_perm) + return (1); + /* Totally equal */ + return (0); +} + +static int +cacl_malloc(void **ptr, size_t size) +{ + *ptr = kmem_zalloc(size, KM_SLEEP); + return (0); +} + + +#if !defined(_KERNEL) +acl_t * +acl_alloc(enum acl_type type) +{ + acl_t *aclp; + + if (cacl_malloc((void **)&aclp, sizeof (acl_t)) != 0) + return (NULL); + + aclp->acl_aclp = NULL; + aclp->acl_cnt = 0; + + switch (type) { + case ACE_T: + aclp->acl_type = ACE_T; + aclp->acl_entry_size = sizeof (ace_t); + break; + case ACLENT_T: + aclp->acl_type = ACLENT_T; + aclp->acl_entry_size = sizeof (aclent_t); + break; + default: + acl_free(aclp); + aclp = NULL; + } + return (aclp); +} + +/* + * Free acl_t structure + */ +void +acl_free(acl_t *aclp) +{ + int acl_size; + + if (aclp == NULL) + return; + + if (aclp->acl_aclp) { + acl_size = aclp->acl_cnt * aclp->acl_entry_size; + cacl_free(aclp->acl_aclp, acl_size); + } + + cacl_free(aclp, sizeof (acl_t)); +} + +static uint32_t +access_mask_set(int haswriteperm, int hasreadperm, int isowner, int isallow) +{ + uint32_t access_mask = 0; + int acl_produce; + int synchronize_set = 0, write_owner_set = 0; + int delete_set = 0, write_attrs_set = 0; + int read_named_set = 0, write_named_set = 0; + + acl_produce = (ACL_SYNCHRONIZE_SET_ALLOW | + ACL_WRITE_ATTRS_OWNER_SET_ALLOW | + ACL_WRITE_ATTRS_WRITER_SET_DENY); + + if (isallow) { + synchronize_set = ACL_SYNCHRONIZE_SET_ALLOW; + write_owner_set = ACL_WRITE_OWNER_SET_ALLOW; + delete_set = ACL_DELETE_SET_ALLOW; + if (hasreadperm) + read_named_set = ACL_READ_NAMED_READER_SET_ALLOW; + if (haswriteperm) + write_named_set = ACL_WRITE_NAMED_WRITER_SET_ALLOW; + if (isowner) + write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_ALLOW; + else if (haswriteperm) + write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_ALLOW; + } else { + + synchronize_set = ACL_SYNCHRONIZE_SET_DENY; + write_owner_set = ACL_WRITE_OWNER_SET_DENY; + delete_set = ACL_DELETE_SET_DENY; + if (hasreadperm) + read_named_set = ACL_READ_NAMED_READER_SET_DENY; + if (haswriteperm) + write_named_set = ACL_WRITE_NAMED_WRITER_SET_DENY; + if (isowner) + write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_DENY; + else if (haswriteperm) + write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_DENY; + else + /* + * If the entity is not the owner and does not + * have write permissions ACE_WRITE_ATTRIBUTES will + * always go in the DENY ACE. + */ + access_mask |= ACE_WRITE_ATTRIBUTES; + } + + if (acl_produce & synchronize_set) + access_mask |= ACE_SYNCHRONIZE; + if (acl_produce & write_owner_set) + access_mask |= ACE_WRITE_OWNER; + if (acl_produce & delete_set) + access_mask |= ACE_DELETE; + if (acl_produce & write_attrs_set) + access_mask |= ACE_WRITE_ATTRIBUTES; + if (acl_produce & read_named_set) + access_mask |= ACE_READ_NAMED_ATTRS; + if (acl_produce & write_named_set) + access_mask |= ACE_WRITE_NAMED_ATTRS; + + return (access_mask); +} + +/* + * Given an mode_t, convert it into an access_mask as used + * by nfsace, assuming aclent_t -> nfsace semantics. + */ +static uint32_t +mode_to_ace_access(mode_t mode, boolean_t isdir, int isowner, int isallow) +{ + uint32_t access = 0; + int haswriteperm = 0; + int hasreadperm = 0; + + if (isallow) { + haswriteperm = (mode & S_IWOTH); + hasreadperm = (mode & S_IROTH); + } else { + haswriteperm = !(mode & S_IWOTH); + hasreadperm = !(mode & S_IROTH); + } + + /* + * The following call takes care of correctly setting the following + * mask bits in the access_mask: + * ACE_SYNCHRONIZE, ACE_WRITE_OWNER, ACE_DELETE, + * ACE_WRITE_ATTRIBUTES, ACE_WRITE_NAMED_ATTRS, ACE_READ_NAMED_ATTRS + */ + access = access_mask_set(haswriteperm, hasreadperm, isowner, isallow); + + if (isallow) { + access |= ACE_READ_ACL | ACE_READ_ATTRIBUTES; + if (isowner) + access |= ACE_WRITE_ACL; + } else { + if (! isowner) + access |= ACE_WRITE_ACL; + } + + /* read */ + if (mode & S_IROTH) { + access |= ACE_READ_DATA; + } + /* write */ + if (mode & S_IWOTH) { + access |= ACE_WRITE_DATA | + ACE_APPEND_DATA; + if (isdir) + access |= ACE_DELETE_CHILD; + } + /* exec */ + if (mode & S_IXOTH) { + access |= ACE_EXECUTE; + } + + return (access); +} + +/* + * Given an nfsace (presumably an ALLOW entry), make a + * corresponding DENY entry at the address given. + */ +static void +ace_make_deny(ace_t *allow, ace_t *deny, int isdir, int isowner) +{ + (void) memcpy(deny, allow, sizeof (ace_t)); + + deny->a_who = allow->a_who; + + deny->a_type = ACE_ACCESS_DENIED_ACE_TYPE; + deny->a_access_mask ^= ACE_POSIX_SUPPORTED_BITS; + if (isdir) + deny->a_access_mask ^= ACE_DELETE_CHILD; + + deny->a_access_mask &= ~(ACE_SYNCHRONIZE | ACE_WRITE_OWNER | + ACE_DELETE | ACE_WRITE_ATTRIBUTES | ACE_READ_NAMED_ATTRS | + ACE_WRITE_NAMED_ATTRS); + deny->a_access_mask |= access_mask_set((allow->a_access_mask & + ACE_WRITE_DATA), (allow->a_access_mask & ACE_READ_DATA), isowner, + B_FALSE); +} +/* + * Make an initial pass over an array of aclent_t's. Gather + * information such as an ACL_MASK (if any), number of users, + * number of groups, and whether the array needs to be sorted. + */ +static int +ln_aent_preprocess(aclent_t *aclent, int n, + int *hasmask, mode_t *mask, + int *numuser, int *numgroup, int *needsort) +{ + int error = 0; + int i; + int curtype = 0; + + *hasmask = 0; + *mask = 07; + *needsort = 0; + *numuser = 0; + *numgroup = 0; + + for (i = 0; i < n; i++) { + if (aclent[i].a_type < curtype) + *needsort = 1; + else if (aclent[i].a_type > curtype) + curtype = aclent[i].a_type; + if (aclent[i].a_type & USER) + (*numuser)++; + if (aclent[i].a_type & (GROUP | GROUP_OBJ)) + (*numgroup)++; + if (aclent[i].a_type & CLASS_OBJ) { + if (*hasmask) { + error = EINVAL; + goto out; + } else { + *hasmask = 1; + *mask = aclent[i].a_perm; + } + } + } + + if ((! *hasmask) && (*numuser + *numgroup > 1)) { + error = EINVAL; + goto out; + } + +out: + return (error); +} + +/* + * Convert an array of aclent_t into an array of nfsace entries, + * following POSIX draft -> nfsv4 conversion semantics as outlined in + * the IETF draft. + */ +static int +ln_aent_to_ace(aclent_t *aclent, int n, ace_t **acepp, int *rescount, int isdir) +{ + int error = 0; + mode_t mask; + int numuser, numgroup, needsort; + int resultsize = 0; + int i, groupi = 0, skip; + ace_t *acep, *result = NULL; + int hasmask; + + error = ln_aent_preprocess(aclent, n, &hasmask, &mask, + &numuser, &numgroup, &needsort); + if (error != 0) + goto out; + + /* allow + deny for each aclent */ + resultsize = n * 2; + if (hasmask) { + /* + * stick extra deny on the group_obj and on each + * user|group for the mask (the group_obj was added + * into the count for numgroup) + */ + resultsize += numuser + numgroup; + /* ... and don't count the mask itself */ + resultsize -= 2; + } + + /* sort the source if necessary */ + if (needsort) + ksort((caddr_t)aclent, n, sizeof (aclent_t), cmp2acls); + + if (cacl_malloc((void **)&result, resultsize * sizeof (ace_t)) != 0) + goto out; + + acep = result; + + for (i = 0; i < n; i++) { + /* + * don't process CLASS_OBJ (mask); mask was grabbed in + * ln_aent_preprocess() + */ + if (aclent[i].a_type & CLASS_OBJ) + continue; + + /* If we need an ACL_MASK emulator, prepend it now */ + if ((hasmask) && + (aclent[i].a_type & (USER | GROUP | GROUP_OBJ))) { + acep->a_type = ACE_ACCESS_DENIED_ACE_TYPE; + acep->a_flags = 0; + if (aclent[i].a_type & GROUP_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= + (ACE_IDENTIFIER_GROUP|ACE_GROUP); + } else if (aclent[i].a_type & USER) { + acep->a_who = aclent[i].a_id; + } else { + acep->a_who = aclent[i].a_id; + acep->a_flags |= ACE_IDENTIFIER_GROUP; + } + if (aclent[i].a_type & ACL_DEFAULT) { + acep->a_flags |= ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE; + } + /* + * Set the access mask for the prepended deny + * ace. To do this, we invert the mask (found + * in ln_aent_preprocess()) then convert it to an + * DENY ace access_mask. + */ + acep->a_access_mask = mode_to_ace_access((mask ^ 07), + isdir, 0, 0); + acep += 1; + } + + /* handle a_perm -> access_mask */ + acep->a_access_mask = mode_to_ace_access(aclent[i].a_perm, + isdir, aclent[i].a_type & USER_OBJ, 1); + + /* emulate a default aclent */ + if (aclent[i].a_type & ACL_DEFAULT) { + acep->a_flags |= ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE; + } + + /* + * handle a_perm and a_id + * + * this must be done last, since it involves the + * corresponding deny aces, which are handled + * differently for each different a_type. + */ + if (aclent[i].a_type & USER_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= ACE_OWNER; + ace_make_deny(acep, acep + 1, isdir, B_TRUE); + acep += 2; + } else if (aclent[i].a_type & USER) { + acep->a_who = aclent[i].a_id; + ace_make_deny(acep, acep + 1, isdir, B_FALSE); + acep += 2; + } else if (aclent[i].a_type & (GROUP_OBJ | GROUP)) { + if (aclent[i].a_type & GROUP_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= ACE_GROUP; + } else { + acep->a_who = aclent[i].a_id; + } + acep->a_flags |= ACE_IDENTIFIER_GROUP; + /* + * Set the corresponding deny for the group ace. + * + * The deny aces go after all of the groups, unlike + * everything else, where they immediately follow + * the allow ace. + * + * We calculate "skip", the number of slots to + * skip ahead for the deny ace, here. + * + * The pattern is: + * MD1 A1 MD2 A2 MD3 A3 D1 D2 D3 + * thus, skip is + * (2 * numgroup) - 1 - groupi + * (2 * numgroup) to account for MD + A + * - 1 to account for the fact that we're on the + * access (A), not the mask (MD) + * - groupi to account for the fact that we have + * passed up groupi number of MD's. + */ + skip = (2 * numgroup) - 1 - groupi; + ace_make_deny(acep, acep + skip, isdir, B_FALSE); + /* + * If we just did the last group, skip acep past + * all of the denies; else, just move ahead one. + */ + if (++groupi >= numgroup) + acep += numgroup + 1; + else + acep += 1; + } else if (aclent[i].a_type & OTHER_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= ACE_EVERYONE; + ace_make_deny(acep, acep + 1, isdir, B_FALSE); + acep += 2; + } else { + error = EINVAL; + goto out; + } + } + + *acepp = result; + *rescount = resultsize; + +out: + if (error != 0) { + if ((result != NULL) && (resultsize > 0)) { + cacl_free(result, resultsize * sizeof (ace_t)); + } + } + + return (error); +} + +static int +convert_aent_to_ace(aclent_t *aclentp, int aclcnt, boolean_t isdir, + ace_t **retacep, int *retacecnt) +{ + ace_t *acep; + ace_t *dfacep; + int acecnt = 0; + int dfacecnt = 0; + int dfaclstart = 0; + int dfaclcnt = 0; + aclent_t *aclp; + int i; + int error; + int acesz, dfacesz; + + ksort((caddr_t)aclentp, aclcnt, sizeof (aclent_t), cmp2acls); + + for (i = 0, aclp = aclentp; i < aclcnt; aclp++, i++) { + if (aclp->a_type & ACL_DEFAULT) + break; + } + + if (i < aclcnt) { + dfaclstart = i; + dfaclcnt = aclcnt - i; + } + + if (dfaclcnt && !isdir) { + return (EINVAL); + } + + error = ln_aent_to_ace(aclentp, i, &acep, &acecnt, isdir); + if (error) + return (error); + + if (dfaclcnt) { + error = ln_aent_to_ace(&aclentp[dfaclstart], dfaclcnt, + &dfacep, &dfacecnt, isdir); + if (error) { + if (acep) { + cacl_free(acep, acecnt * sizeof (ace_t)); + } + return (error); + } + } + + if (dfacecnt != 0) { + acesz = sizeof (ace_t) * acecnt; + dfacesz = sizeof (ace_t) * dfacecnt; + acep = cacl_realloc(acep, acesz, acesz + dfacesz); + if (acep == NULL) + return (ENOMEM); + if (dfaclcnt) { + (void) memcpy(acep + acecnt, dfacep, dfacesz); + } + } + if (dfaclcnt) + cacl_free(dfacep, dfacecnt * sizeof (ace_t)); + + *retacecnt = acecnt + dfacecnt; + *retacep = acep; + return (0); +} + +static int +ace_mask_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir) +{ + int error = 0; + o_mode_t mode = 0; + uint32_t bits, wantbits; + + /* read */ + if (mask & ACE_READ_DATA) + mode |= S_IROTH; + + /* write */ + wantbits = (ACE_WRITE_DATA | ACE_APPEND_DATA); + if (isdir) + wantbits |= ACE_DELETE_CHILD; + bits = mask & wantbits; + if (bits != 0) { + if (bits != wantbits) { + error = ENOTSUP; + goto out; + } + mode |= S_IWOTH; + } + + /* exec */ + if (mask & ACE_EXECUTE) { + mode |= S_IXOTH; + } + + *modep = mode; + +out: + return (error); +} + +static void +acevals_init(acevals_t *vals, uid_t key) +{ + bzero(vals, sizeof (*vals)); + vals->allowed = ACE_MASK_UNDEFINED; + vals->denied = ACE_MASK_UNDEFINED; + vals->mask = ACE_MASK_UNDEFINED; + vals->key = key; +} + +static void +ace_list_init(ace_list_t *al, int dfacl_flag) +{ + acevals_init(&al->user_obj, 0); + acevals_init(&al->group_obj, 0); + acevals_init(&al->other_obj, 0); + al->numusers = 0; + al->numgroups = 0; + al->acl_mask = 0; + al->hasmask = 0; + al->state = ace_unused; + al->seen = 0; + al->dfacl_flag = dfacl_flag; +} + +/* + * Find or create an acevals holder for a given id and avl tree. + * + * Note that only one thread will ever touch these avl trees, so + * there is no need for locking. + */ +static acevals_t * +acevals_find(ace_t *ace, avl_tree_t *avl, int *num) +{ + acevals_t key, *rc; + avl_index_t where; + + key.key = ace->a_who; + rc = avl_find(avl, &key, &where); + if (rc != NULL) + return (rc); + + /* this memory is freed by ln_ace_to_aent()->ace_list_free() */ + if (cacl_malloc((void **)&rc, sizeof (acevals_t)) != 0) + return (NULL); + + acevals_init(rc, ace->a_who); + avl_insert(avl, rc, where); + (*num)++; + + return (rc); +} + +static int +access_mask_check(ace_t *acep, int mask_bit, int isowner) +{ + int set_deny, err_deny; + int set_allow, err_allow; + int acl_consume; + int haswriteperm, hasreadperm; + + if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) { + haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 0 : 1; + hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 0 : 1; + } else { + haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 1 : 0; + hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 1 : 0; + } + + acl_consume = (ACL_SYNCHRONIZE_ERR_DENY | + ACL_DELETE_ERR_DENY | + ACL_WRITE_OWNER_ERR_DENY | + ACL_WRITE_OWNER_ERR_ALLOW | + ACL_WRITE_ATTRS_OWNER_SET_ALLOW | + ACL_WRITE_ATTRS_OWNER_ERR_DENY | + ACL_WRITE_ATTRS_WRITER_SET_DENY | + ACL_WRITE_ATTRS_WRITER_ERR_ALLOW | + ACL_WRITE_NAMED_WRITER_ERR_DENY | + ACL_READ_NAMED_READER_ERR_DENY); + + if (mask_bit == ACE_SYNCHRONIZE) { + set_deny = ACL_SYNCHRONIZE_SET_DENY; + err_deny = ACL_SYNCHRONIZE_ERR_DENY; + set_allow = ACL_SYNCHRONIZE_SET_ALLOW; + err_allow = ACL_SYNCHRONIZE_ERR_ALLOW; + } else if (mask_bit == ACE_WRITE_OWNER) { + set_deny = ACL_WRITE_OWNER_SET_DENY; + err_deny = ACL_WRITE_OWNER_ERR_DENY; + set_allow = ACL_WRITE_OWNER_SET_ALLOW; + err_allow = ACL_WRITE_OWNER_ERR_ALLOW; + } else if (mask_bit == ACE_DELETE) { + set_deny = ACL_DELETE_SET_DENY; + err_deny = ACL_DELETE_ERR_DENY; + set_allow = ACL_DELETE_SET_ALLOW; + err_allow = ACL_DELETE_ERR_ALLOW; + } else if (mask_bit == ACE_WRITE_ATTRIBUTES) { + if (isowner) { + set_deny = ACL_WRITE_ATTRS_OWNER_SET_DENY; + err_deny = ACL_WRITE_ATTRS_OWNER_ERR_DENY; + set_allow = ACL_WRITE_ATTRS_OWNER_SET_ALLOW; + err_allow = ACL_WRITE_ATTRS_OWNER_ERR_ALLOW; + } else if (haswriteperm) { + set_deny = ACL_WRITE_ATTRS_WRITER_SET_DENY; + err_deny = ACL_WRITE_ATTRS_WRITER_ERR_DENY; + set_allow = ACL_WRITE_ATTRS_WRITER_SET_ALLOW; + err_allow = ACL_WRITE_ATTRS_WRITER_ERR_ALLOW; + } else { + if ((acep->a_access_mask & mask_bit) && + (acep->a_type & ACE_ACCESS_ALLOWED_ACE_TYPE)) { + return (ENOTSUP); + } + return (0); + } + } else if (mask_bit == ACE_READ_NAMED_ATTRS) { + if (!hasreadperm) + return (0); + + set_deny = ACL_READ_NAMED_READER_SET_DENY; + err_deny = ACL_READ_NAMED_READER_ERR_DENY; + set_allow = ACL_READ_NAMED_READER_SET_ALLOW; + err_allow = ACL_READ_NAMED_READER_ERR_ALLOW; + } else if (mask_bit == ACE_WRITE_NAMED_ATTRS) { + if (!haswriteperm) + return (0); + + set_deny = ACL_WRITE_NAMED_WRITER_SET_DENY; + err_deny = ACL_WRITE_NAMED_WRITER_ERR_DENY; + set_allow = ACL_WRITE_NAMED_WRITER_SET_ALLOW; + err_allow = ACL_WRITE_NAMED_WRITER_ERR_ALLOW; + } else { + return (EINVAL); + } + + if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) { + if (acl_consume & set_deny) { + if (!(acep->a_access_mask & mask_bit)) { + return (ENOTSUP); + } + } else if (acl_consume & err_deny) { + if (acep->a_access_mask & mask_bit) { + return (ENOTSUP); + } + } + } else { + /* ACE_ACCESS_ALLOWED_ACE_TYPE */ + if (acl_consume & set_allow) { + if (!(acep->a_access_mask & mask_bit)) { + return (ENOTSUP); + } + } else if (acl_consume & err_allow) { + if (acep->a_access_mask & mask_bit) { + return (ENOTSUP); + } + } + } + return (0); +} + +static int +ace_to_aent_legal(ace_t *acep) +{ + int error = 0; + int isowner; + + /* only ALLOW or DENY */ + if ((acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE) && + (acep->a_type != ACE_ACCESS_DENIED_ACE_TYPE)) { + error = ENOTSUP; + goto out; + } + + /* check for invalid flags */ + if (acep->a_flags & ~(ACE_VALID_FLAG_BITS)) { + error = EINVAL; + goto out; + } + + /* some flags are illegal */ + if (acep->a_flags & (ACE_SUCCESSFUL_ACCESS_ACE_FLAG | + ACE_FAILED_ACCESS_ACE_FLAG | + ACE_NO_PROPAGATE_INHERIT_ACE)) { + error = ENOTSUP; + goto out; + } + + /* check for invalid masks */ + if (acep->a_access_mask & ~(ACE_VALID_MASK_BITS)) { + error = EINVAL; + goto out; + } + + if ((acep->a_flags & ACE_OWNER)) { + isowner = 1; + } else { + isowner = 0; + } + + error = access_mask_check(acep, ACE_SYNCHRONIZE, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_WRITE_OWNER, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_DELETE, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_WRITE_ATTRIBUTES, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_READ_NAMED_ATTRS, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_WRITE_NAMED_ATTRS, isowner); + if (error) + goto out; + + /* more detailed checking of masks */ + if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) { + if (! (acep->a_access_mask & ACE_READ_ATTRIBUTES)) { + error = ENOTSUP; + goto out; + } + if ((acep->a_access_mask & ACE_WRITE_DATA) && + (! (acep->a_access_mask & ACE_APPEND_DATA))) { + error = ENOTSUP; + goto out; + } + if ((! (acep->a_access_mask & ACE_WRITE_DATA)) && + (acep->a_access_mask & ACE_APPEND_DATA)) { + error = ENOTSUP; + goto out; + } + } + + /* ACL enforcement */ + if ((acep->a_access_mask & ACE_READ_ACL) && + (acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE)) { + error = ENOTSUP; + goto out; + } + if (acep->a_access_mask & ACE_WRITE_ACL) { + if ((acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) && + (isowner)) { + error = ENOTSUP; + goto out; + } + if ((acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) && + (! isowner)) { + error = ENOTSUP; + goto out; + } + } + +out: + return (error); +} + +static int +ace_allow_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir) +{ + /* ACE_READ_ACL and ACE_READ_ATTRIBUTES must both be set */ + if ((mask & (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) != + (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) { + return (ENOTSUP); + } + + return (ace_mask_to_mode(mask, modep, isdir)); +} + +static int +acevals_to_aent(acevals_t *vals, aclent_t *dest, ace_list_t *list, + uid_t owner, gid_t group, boolean_t isdir) +{ + int error; + uint32_t flips = ACE_POSIX_SUPPORTED_BITS; + + if (isdir) + flips |= ACE_DELETE_CHILD; + if (vals->allowed != (vals->denied ^ flips)) { + error = ENOTSUP; + goto out; + } + if ((list->hasmask) && (list->acl_mask != vals->mask) && + (vals->aent_type & (USER | GROUP | GROUP_OBJ))) { + error = ENOTSUP; + goto out; + } + error = ace_allow_to_mode(vals->allowed, &dest->a_perm, isdir); + if (error != 0) + goto out; + dest->a_type = vals->aent_type; + if (dest->a_type & (USER | GROUP)) { + dest->a_id = vals->key; + } else if (dest->a_type & USER_OBJ) { + dest->a_id = owner; + } else if (dest->a_type & GROUP_OBJ) { + dest->a_id = group; + } else if (dest->a_type & OTHER_OBJ) { + dest->a_id = 0; + } else { + error = EINVAL; + goto out; + } + +out: + return (error); +} + + +static int +ace_list_to_aent(ace_list_t *list, aclent_t **aclentp, int *aclcnt, + uid_t owner, gid_t group, boolean_t isdir) +{ + int error = 0; + aclent_t *aent, *result = NULL; + acevals_t *vals; + int resultcount; + + if ((list->seen & (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) != + (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) { + error = ENOTSUP; + goto out; + } + if ((! list->hasmask) && (list->numusers + list->numgroups > 0)) { + error = ENOTSUP; + goto out; + } + + resultcount = 3 + list->numusers + list->numgroups; + /* + * This must be the same condition as below, when we add the CLASS_OBJ + * (aka ACL mask) + */ + if ((list->hasmask) || (! list->dfacl_flag)) + resultcount += 1; + + if (cacl_malloc((void **)&result, + resultcount * sizeof (aclent_t)) != 0) { + error = ENOMEM; + goto out; + } + aent = result; + + /* USER_OBJ */ + if (!(list->user_obj.aent_type & USER_OBJ)) { + error = EINVAL; + goto out; + } + + error = acevals_to_aent(&list->user_obj, aent, list, owner, group, + isdir); + + if (error != 0) + goto out; + ++aent; + /* USER */ + vals = NULL; + for (vals = avl_first(&list->user); vals != NULL; + vals = AVL_NEXT(&list->user, vals)) { + if (!(vals->aent_type & USER)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(vals, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + } + /* GROUP_OBJ */ + if (!(list->group_obj.aent_type & GROUP_OBJ)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(&list->group_obj, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + /* GROUP */ + vals = NULL; + for (vals = avl_first(&list->group); vals != NULL; + vals = AVL_NEXT(&list->group, vals)) { + if (!(vals->aent_type & GROUP)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(vals, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + } + /* + * CLASS_OBJ (aka ACL_MASK) + * + * An ACL_MASK is not fabricated if the ACL is a default ACL. + * This is to follow UFS's behavior. + */ + if ((list->hasmask) || (! list->dfacl_flag)) { + if (list->hasmask) { + uint32_t flips = ACE_POSIX_SUPPORTED_BITS; + if (isdir) + flips |= ACE_DELETE_CHILD; + error = ace_mask_to_mode(list->acl_mask ^ flips, + &aent->a_perm, isdir); + if (error != 0) + goto out; + } else { + /* fabricate the ACL_MASK from the group permissions */ + error = ace_mask_to_mode(list->group_obj.allowed, + &aent->a_perm, isdir); + if (error != 0) + goto out; + } + aent->a_id = 0; + aent->a_type = CLASS_OBJ | list->dfacl_flag; + ++aent; + } + /* OTHER_OBJ */ + if (!(list->other_obj.aent_type & OTHER_OBJ)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(&list->other_obj, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + + *aclentp = result; + *aclcnt = resultcount; + +out: + if (error != 0) { + if (result != NULL) + cacl_free(result, resultcount * sizeof (aclent_t)); + } + + return (error); +} + + +/* + * free all data associated with an ace_list + */ +static void +ace_list_free(ace_list_t *al) +{ + acevals_t *node; + void *cookie; + + if (al == NULL) + return; + + cookie = NULL; + while ((node = avl_destroy_nodes(&al->user, &cookie)) != NULL) + cacl_free(node, sizeof (acevals_t)); + cookie = NULL; + while ((node = avl_destroy_nodes(&al->group, &cookie)) != NULL) + cacl_free(node, sizeof (acevals_t)); + + avl_destroy(&al->user); + avl_destroy(&al->group); + + /* free the container itself */ + cacl_free(al, sizeof (ace_list_t)); +} + +static int +acevals_compare(const void *va, const void *vb) +{ + const acevals_t *a = va, *b = vb; + + if (a->key == b->key) + return (0); + + if (a->key > b->key) + return (1); + + else + return (-1); +} + +/* + * Convert a list of ace_t entries to equivalent regular and default + * aclent_t lists. Return error (ENOTSUP) when conversion is not possible. + */ +static int +ln_ace_to_aent(ace_t *ace, int n, uid_t owner, gid_t group, + aclent_t **aclentp, int *aclcnt, aclent_t **dfaclentp, int *dfaclcnt, + boolean_t isdir) +{ + int error = 0; + ace_t *acep; + uint32_t bits; + int i; + ace_list_t *normacl = NULL, *dfacl = NULL, *acl; + acevals_t *vals; + + *aclentp = NULL; + *aclcnt = 0; + *dfaclentp = NULL; + *dfaclcnt = 0; + + /* we need at least user_obj, group_obj, and other_obj */ + if (n < 6) { + error = ENOTSUP; + goto out; + } + if (ace == NULL) { + error = EINVAL; + goto out; + } + + error = cacl_malloc((void **)&normacl, sizeof (ace_list_t)); + if (error != 0) + goto out; + + avl_create(&normacl->user, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + avl_create(&normacl->group, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + + ace_list_init(normacl, 0); + + error = cacl_malloc((void **)&dfacl, sizeof (ace_list_t)); + if (error != 0) + goto out; + + avl_create(&dfacl->user, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + avl_create(&dfacl->group, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + ace_list_init(dfacl, ACL_DEFAULT); + + /* process every ace_t... */ + for (i = 0; i < n; i++) { + acep = &ace[i]; + + /* rule out certain cases quickly */ + error = ace_to_aent_legal(acep); + if (error != 0) + goto out; + + /* + * Turn off these bits in order to not have to worry about + * them when doing the checks for compliments. + */ + acep->a_access_mask &= ~(ACE_WRITE_OWNER | ACE_DELETE | + ACE_SYNCHRONIZE | ACE_WRITE_ATTRIBUTES | + ACE_READ_NAMED_ATTRS | ACE_WRITE_NAMED_ATTRS); + + /* see if this should be a regular or default acl */ + bits = acep->a_flags & + (ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE); + if (bits != 0) { + /* all or nothing on these inherit bits */ + if (bits != (ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE)) { + error = ENOTSUP; + goto out; + } + acl = dfacl; + } else { + acl = normacl; + } + + if ((acep->a_flags & ACE_OWNER)) { + if (acl->state > ace_user_obj) { + error = ENOTSUP; + goto out; + } + acl->state = ace_user_obj; + acl->seen |= USER_OBJ; + vals = &acl->user_obj; + vals->aent_type = USER_OBJ | acl->dfacl_flag; + } else if ((acep->a_flags & ACE_EVERYONE)) { + acl->state = ace_other_obj; + acl->seen |= OTHER_OBJ; + vals = &acl->other_obj; + vals->aent_type = OTHER_OBJ | acl->dfacl_flag; + } else if (acep->a_flags & ACE_IDENTIFIER_GROUP) { + if (acl->state > ace_group) { + error = ENOTSUP; + goto out; + } + if ((acep->a_flags & ACE_GROUP)) { + acl->seen |= GROUP_OBJ; + vals = &acl->group_obj; + vals->aent_type = GROUP_OBJ | acl->dfacl_flag; + } else { + acl->seen |= GROUP; + vals = acevals_find(acep, &acl->group, + &acl->numgroups); + if (vals == NULL) { + error = ENOMEM; + goto out; + } + vals->aent_type = GROUP | acl->dfacl_flag; + } + acl->state = ace_group; + } else { + if (acl->state > ace_user) { + error = ENOTSUP; + goto out; + } + acl->state = ace_user; + acl->seen |= USER; + vals = acevals_find(acep, &acl->user, + &acl->numusers); + if (vals == NULL) { + error = ENOMEM; + goto out; + } + vals->aent_type = USER | acl->dfacl_flag; + } + + if (!(acl->state > ace_unused)) { + error = EINVAL; + goto out; + } + + if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) { + /* no more than one allowed per aclent_t */ + if (vals->allowed != ACE_MASK_UNDEFINED) { + error = ENOTSUP; + goto out; + } + vals->allowed = acep->a_access_mask; + } else { + /* + * it's a DENY; if there was a previous DENY, it + * must have been an ACL_MASK. + */ + if (vals->denied != ACE_MASK_UNDEFINED) { + /* ACL_MASK is for USER and GROUP only */ + if ((acl->state != ace_user) && + (acl->state != ace_group)) { + error = ENOTSUP; + goto out; + } + + if (! acl->hasmask) { + acl->hasmask = 1; + acl->acl_mask = vals->denied; + /* check for mismatched ACL_MASK emulations */ + } else if (acl->acl_mask != vals->denied) { + error = ENOTSUP; + goto out; + } + vals->mask = vals->denied; + } + vals->denied = acep->a_access_mask; + } + } + + /* done collating; produce the aclent_t lists */ + if (normacl->state != ace_unused) { + error = ace_list_to_aent(normacl, aclentp, aclcnt, + owner, group, isdir); + if (error != 0) { + goto out; + } + } + if (dfacl->state != ace_unused) { + error = ace_list_to_aent(dfacl, dfaclentp, dfaclcnt, + owner, group, isdir); + if (error != 0) { + goto out; + } + } + +out: + if (normacl != NULL) + ace_list_free(normacl); + if (dfacl != NULL) + ace_list_free(dfacl); + + return (error); +} + +static int +convert_ace_to_aent(ace_t *acebufp, int acecnt, boolean_t isdir, + uid_t owner, gid_t group, aclent_t **retaclentp, int *retaclcnt) +{ + int error = 0; + aclent_t *aclentp, *dfaclentp; + int aclcnt, dfaclcnt; + int aclsz, dfaclsz; + + error = ln_ace_to_aent(acebufp, acecnt, owner, group, + &aclentp, &aclcnt, &dfaclentp, &dfaclcnt, isdir); + + if (error) + return (error); + + + if (dfaclcnt != 0) { + /* + * Slap aclentp and dfaclentp into a single array. + */ + aclsz = sizeof (aclent_t) * aclcnt; + dfaclsz = sizeof (aclent_t) * dfaclcnt; + aclentp = cacl_realloc(aclentp, aclsz, aclsz + dfaclsz); + if (aclentp != NULL) { + (void) memcpy(aclentp + aclcnt, dfaclentp, dfaclsz); + } else { + error = ENOMEM; + } + } + + if (aclentp) { + *retaclentp = aclentp; + *retaclcnt = aclcnt + dfaclcnt; + } + + if (dfaclentp) + cacl_free(dfaclentp, dfaclsz); + + return (error); +} + + +int +acl_translate(acl_t *aclp, int target_flavor, boolean_t isdir, uid_t owner, + gid_t group) +{ + int aclcnt; + void *acldata; + int error; + + /* + * See if we need to translate + */ + if ((target_flavor == _ACL_ACE_ENABLED && aclp->acl_type == ACE_T) || + (target_flavor == _ACL_ACLENT_ENABLED && + aclp->acl_type == ACLENT_T)) + return (0); + + if (target_flavor == -1) { + error = EINVAL; + goto out; + } + + if (target_flavor == _ACL_ACE_ENABLED && + aclp->acl_type == ACLENT_T) { + error = convert_aent_to_ace(aclp->acl_aclp, + aclp->acl_cnt, isdir, (ace_t **)&acldata, &aclcnt); + if (error) + goto out; + + } else if (target_flavor == _ACL_ACLENT_ENABLED && + aclp->acl_type == ACE_T) { + error = convert_ace_to_aent(aclp->acl_aclp, aclp->acl_cnt, + isdir, owner, group, (aclent_t **)&acldata, &aclcnt); + if (error) + goto out; + } else { + error = ENOTSUP; + goto out; + } + + /* + * replace old acl with newly translated acl + */ + cacl_free(aclp->acl_aclp, aclp->acl_cnt * aclp->acl_entry_size); + aclp->acl_aclp = acldata; + aclp->acl_cnt = aclcnt; + if (target_flavor == _ACL_ACE_ENABLED) { + aclp->acl_type = ACE_T; + aclp->acl_entry_size = sizeof (ace_t); + } else { + aclp->acl_type = ACLENT_T; + aclp->acl_entry_size = sizeof (aclent_t); + } + return (0); + +out: + +#if !defined(_KERNEL) + errno = error; + return (-1); +#else + return (error); +#endif +} +#endif /* !_KERNEL */ + +#define SET_ACE(acl, index, who, mask, type, flags) { \ + acl[0][index].a_who = (uint32_t)who; \ + acl[0][index].a_type = type; \ + acl[0][index].a_flags = flags; \ + acl[0][index++].a_access_mask = mask; \ +} + +void +acl_trivial_access_masks(mode_t mode, boolean_t isdir, trivial_acl_t *masks) +{ + uint32_t read_mask = ACE_READ_DATA; + uint32_t write_mask = ACE_WRITE_DATA|ACE_APPEND_DATA; + uint32_t execute_mask = ACE_EXECUTE; + + (void) isdir; /* will need this later */ + + masks->deny1 = 0; + if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH))) + masks->deny1 |= read_mask; + if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH))) + masks->deny1 |= write_mask; + if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH))) + masks->deny1 |= execute_mask; + + masks->deny2 = 0; + if (!(mode & S_IRGRP) && (mode & S_IROTH)) + masks->deny2 |= read_mask; + if (!(mode & S_IWGRP) && (mode & S_IWOTH)) + masks->deny2 |= write_mask; + if (!(mode & S_IXGRP) && (mode & S_IXOTH)) + masks->deny2 |= execute_mask; + + masks->allow0 = 0; + if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH))) + masks->allow0 |= read_mask; + if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH))) + masks->allow0 |= write_mask; + if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH))) + masks->allow0 |= execute_mask; + + masks->owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL| + ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES| + ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE; + if (mode & S_IRUSR) + masks->owner |= read_mask; + if (mode & S_IWUSR) + masks->owner |= write_mask; + if (mode & S_IXUSR) + masks->owner |= execute_mask; + + masks->group = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS| + ACE_SYNCHRONIZE; + if (mode & S_IRGRP) + masks->group |= read_mask; + if (mode & S_IWGRP) + masks->group |= write_mask; + if (mode & S_IXGRP) + masks->group |= execute_mask; + + masks->everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS| + ACE_SYNCHRONIZE; + if (mode & S_IROTH) + masks->everyone |= read_mask; + if (mode & S_IWOTH) + masks->everyone |= write_mask; + if (mode & S_IXOTH) + masks->everyone |= execute_mask; +} + +int +acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count) +{ + int index = 0; + int error; + trivial_acl_t masks; + + *count = 3; + acl_trivial_access_masks(mode, isdir, &masks); + + if (masks.allow0) + (*count)++; + if (masks.deny1) + (*count)++; + if (masks.deny2) + (*count)++; + + if ((error = cacl_malloc((void **)acl, *count * sizeof (ace_t))) != 0) + return (error); + + if (masks.allow0) { + SET_ACE(acl, index, -1, masks.allow0, + ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER); + } + if (masks.deny1) { + SET_ACE(acl, index, -1, masks.deny1, + ACE_ACCESS_DENIED_ACE_TYPE, ACE_OWNER); + } + if (masks.deny2) { + SET_ACE(acl, index, -1, masks.deny2, + ACE_ACCESS_DENIED_ACE_TYPE, ACE_GROUP|ACE_IDENTIFIER_GROUP); + } + + SET_ACE(acl, index, -1, masks.owner, ACE_ACCESS_ALLOWED_ACE_TYPE, + ACE_OWNER); + SET_ACE(acl, index, -1, masks.group, ACE_ACCESS_ALLOWED_ACE_TYPE, + ACE_IDENTIFIER_GROUP|ACE_GROUP); + SET_ACE(acl, index, -1, masks.everyone, ACE_ACCESS_ALLOWED_ACE_TYPE, + ACE_EVERYONE); + + return (0); +} + +/* + * ace_trivial: + * determine whether an ace_t acl is trivial + * + * Trivialness implies that the acl is composed of only + * owner, group, everyone entries. ACL can't + * have read_acl denied, and write_owner/write_acl/write_attributes + * can only be owner@ entry. + */ +int +ace_trivial_common(void *acep, int aclcnt, + uint64_t (*walk)(void *, uint64_t, int aclcnt, + uint16_t *, uint16_t *, uint32_t *)) +{ + uint16_t flags; + uint32_t mask; + uint16_t type; + uint64_t cookie = 0; + + while ((cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask))) { + switch (flags & ACE_TYPE_FLAGS) { + case ACE_OWNER: + case ACE_GROUP|ACE_IDENTIFIER_GROUP: + case ACE_EVERYONE: + break; + default: + return (1); + + } + + if (flags & (ACE_FILE_INHERIT_ACE| + ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE| + ACE_INHERIT_ONLY_ACE)) + return (1); + + /* + * Special check for some special bits + * + * Don't allow anybody to deny reading basic + * attributes or a files ACL. + */ + if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) && + (type == ACE_ACCESS_DENIED_ACE_TYPE)) + return (1); + + /* + * Delete permissions are never set by default + */ + if (mask & (ACE_DELETE|ACE_DELETE_CHILD)) + return (1); + /* + * only allow owner@ to have + * write_acl/write_owner/write_attributes/write_xattr/ + */ + if (type == ACE_ACCESS_ALLOWED_ACE_TYPE && + (!(flags & ACE_OWNER) && (mask & + (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES| + ACE_WRITE_NAMED_ATTRS)))) + return (1); + + } + return (0); +} + +uint64_t +ace_walk(void *datap, uint64_t cookie, int aclcnt, uint16_t *flags, + uint16_t *type, uint32_t *mask) +{ + ace_t *acep = datap; + + if (cookie >= aclcnt) + return (0); + + *flags = acep[cookie].a_flags; + *type = acep[cookie].a_type; + *mask = acep[cookie++].a_access_mask; + + return (cookie); +} + +int +ace_trivial(ace_t *acep, int aclcnt) +{ + return (ace_trivial_common(acep, aclcnt, ace_walk)); +} diff --git a/module/os/freebsd/spl/callb.c b/module/os/freebsd/spl/callb.c new file mode 100644 index 000000000..d4a0e141c --- /dev/null +++ b/module/os/freebsd/spl/callb.c @@ -0,0 +1,372 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/mutex.h> +#include <sys/condvar.h> +#include <sys/callb.h> +#include <sys/kmem.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/kobj.h> +#include <sys/systm.h> /* for delay() */ +#include <sys/taskq.h> /* For TASKQ_NAMELEN */ +#include <sys/kernel.h> + +#define CB_MAXNAME TASKQ_NAMELEN + +/* + * The callb mechanism provides generic event scheduling/echoing. + * A callb function is registered and called on behalf of the event. + */ +typedef struct callb { + struct callb *c_next; /* next in class or on freelist */ + kthread_id_t c_thread; /* ptr to caller's thread struct */ + char c_flag; /* info about the callb state */ + uchar_t c_class; /* this callb's class */ + kcondvar_t c_done_cv; /* signal callb completion */ + boolean_t (*c_func)(); /* cb function: returns true if ok */ + void *c_arg; /* arg to c_func */ + char c_name[CB_MAXNAME+1]; /* debug:max func name length */ +} callb_t; + +/* + * callb c_flag bitmap definitions + */ +#define CALLB_FREE 0x0 +#define CALLB_TAKEN 0x1 +#define CALLB_EXECUTING 0x2 + +/* + * Basic structure for a callb table. + * All callbs are organized into different class groups described + * by ct_class array. + * The callbs within a class are single-linked and normally run by a + * serial execution. + */ +typedef struct callb_table { + kmutex_t ct_lock; /* protect all callb states */ + callb_t *ct_freelist; /* free callb structures */ + int ct_busy; /* != 0 prevents additions */ + kcondvar_t ct_busy_cv; /* to wait for not busy */ + int ct_ncallb; /* num of callbs allocated */ + callb_t *ct_first_cb[NCBCLASS]; /* ptr to 1st callb in a class */ +} callb_table_t; + +int callb_timeout_sec = CPR_KTHREAD_TIMEOUT_SEC; + +static callb_id_t callb_add_common(boolean_t (*)(void *, int), + void *, int, char *, kthread_id_t); + +static callb_table_t callb_table; /* system level callback table */ +static callb_table_t *ct = &callb_table; +static kmutex_t callb_safe_mutex; +callb_cpr_t callb_cprinfo_safe = { + &callb_safe_mutex, CALLB_CPR_ALWAYS_SAFE, 0, {0, 0} }; + +/* + * Init all callb tables in the system. + */ +void +callb_init(void *dummy __unused) +{ + callb_table.ct_busy = 0; /* mark table open for additions */ + mutex_init(&callb_safe_mutex, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&callb_table.ct_lock, NULL, MUTEX_DEFAULT, NULL); +} + +void +callb_fini(void *dummy __unused) +{ + callb_t *cp; + int i; + + mutex_enter(&ct->ct_lock); + for (i = 0; i < 16; i++) { + while ((cp = ct->ct_freelist) != NULL) { + ct->ct_freelist = cp->c_next; + ct->ct_ncallb--; + kmem_free(cp, sizeof (callb_t)); + } + if (ct->ct_ncallb == 0) + break; + /* Not all callbacks finished, waiting for the rest. */ + mutex_exit(&ct->ct_lock); + tsleep(ct, 0, "callb", hz / 4); + mutex_enter(&ct->ct_lock); + } + if (ct->ct_ncallb > 0) + printf("%s: Leaked %d callbacks!\n", __func__, ct->ct_ncallb); + mutex_exit(&ct->ct_lock); + mutex_destroy(&callb_safe_mutex); + mutex_destroy(&callb_table.ct_lock); +} + +/* + * callout_add() is called to register func() be called later. + */ +static callb_id_t +callb_add_common(boolean_t (*func)(void *arg, int code), + void *arg, int class, char *name, kthread_id_t t) +{ + callb_t *cp; + + ASSERT(class < NCBCLASS); + + mutex_enter(&ct->ct_lock); + while (ct->ct_busy) + cv_wait(&ct->ct_busy_cv, &ct->ct_lock); + if ((cp = ct->ct_freelist) == NULL) { + ct->ct_ncallb++; + cp = (callb_t *)kmem_zalloc(sizeof (callb_t), KM_SLEEP); + } + ct->ct_freelist = cp->c_next; + cp->c_thread = t; + cp->c_func = func; + cp->c_arg = arg; + cp->c_class = (uchar_t)class; + cp->c_flag |= CALLB_TAKEN; +#ifdef DEBUG + if (strlen(name) > CB_MAXNAME) + cmn_err(CE_WARN, "callb_add: name of callback function '%s' " + "too long -- truncated to %d chars", + name, CB_MAXNAME); +#endif + (void) strncpy(cp->c_name, name, CB_MAXNAME); + cp->c_name[CB_MAXNAME] = '\0'; + + /* + * Insert the new callb at the head of its class list. + */ + cp->c_next = ct->ct_first_cb[class]; + ct->ct_first_cb[class] = cp; + + mutex_exit(&ct->ct_lock); + return ((callb_id_t)cp); +} + +/* + * The default function to add an entry to the callback table. Since + * it uses curthread as the thread identifier to store in the table, + * it should be used for the normal case of a thread which is calling + * to add ITSELF to the table. + */ +callb_id_t +callb_add(boolean_t (*func)(void *arg, int code), + void *arg, int class, char *name) +{ + return (callb_add_common(func, arg, class, name, curthread)); +} + +/* + * A special version of callb_add() above for use by threads which + * might be adding an entry to the table on behalf of some other + * thread (for example, one which is constructed but not yet running). + * In this version the thread id is an argument. + */ +callb_id_t +callb_add_thread(boolean_t (*func)(void *arg, int code), + void *arg, int class, char *name, kthread_id_t t) +{ + return (callb_add_common(func, arg, class, name, t)); +} + +/* + * callout_delete() is called to remove an entry identified by id + * that was originally placed there by a call to callout_add(). + * return -1 if fail to delete a callb entry otherwise return 0. + */ +int +callb_delete(callb_id_t id) +{ + callb_t **pp; + callb_t *me = (callb_t *)id; + + mutex_enter(&ct->ct_lock); + + for (;;) { + pp = &ct->ct_first_cb[me->c_class]; + while (*pp != NULL && *pp != me) + pp = &(*pp)->c_next; + +#ifdef DEBUG + if (*pp != me) { + cmn_err(CE_WARN, "callb delete bogus entry 0x%p", + (void *)me); + mutex_exit(&ct->ct_lock); + return (-1); + } +#endif /* DEBUG */ + + /* + * It is not allowed to delete a callb in the middle of + * executing otherwise, the callb_execute() will be confused. + */ + if (!(me->c_flag & CALLB_EXECUTING)) + break; + + cv_wait(&me->c_done_cv, &ct->ct_lock); + } + /* relink the class list */ + *pp = me->c_next; + + /* clean up myself and return the free callb to the head of freelist */ + me->c_flag = CALLB_FREE; + me->c_next = ct->ct_freelist; + ct->ct_freelist = me; + + mutex_exit(&ct->ct_lock); + return (0); +} + +/* + * class: indicates to execute all callbs in the same class; + * code: optional argument for the callb functions. + * return: = 0: success + * != 0: ptr to string supplied when callback was registered + */ +void * +callb_execute_class(int class, int code) +{ + callb_t *cp; + void *ret = NULL; + + ASSERT(class < NCBCLASS); + + mutex_enter(&ct->ct_lock); + + for (cp = ct->ct_first_cb[class]; + cp != NULL && ret == 0; cp = cp->c_next) { + while (cp->c_flag & CALLB_EXECUTING) + cv_wait(&cp->c_done_cv, &ct->ct_lock); + /* + * cont if the callb is deleted while we're sleeping + */ + if (cp->c_flag == CALLB_FREE) + continue; + cp->c_flag |= CALLB_EXECUTING; + +#ifdef CALLB_DEBUG + printf("callb_execute: name=%s func=%p arg=%p\n", + cp->c_name, (void *)cp->c_func, (void *)cp->c_arg); +#endif /* CALLB_DEBUG */ + + mutex_exit(&ct->ct_lock); + /* If callback function fails, pass back client's name */ + if (!(*cp->c_func)(cp->c_arg, code)) + ret = cp->c_name; + mutex_enter(&ct->ct_lock); + + cp->c_flag &= ~CALLB_EXECUTING; + cv_broadcast(&cp->c_done_cv); + } + mutex_exit(&ct->ct_lock); + return (ret); +} + +/* + * callers make sure no recursive entries to this func. + * dp->cc_lockp is registered by callb_add to protect callb_cpr_t structure. + * + * When calling to stop a kernel thread (code == CB_CODE_CPR_CHKPT) we + * use a cv_timedwait() in case the kernel thread is blocked. + * + * Note that this is a generic callback handler for daemon CPR and + * should NOT be changed to accommodate any specific requirement in a daemon. + * Individual daemons that require changes to the handler shall write + * callback routines in their own daemon modules. + */ +boolean_t +callb_generic_cpr(void *arg, int code) +{ + callb_cpr_t *cp = (callb_cpr_t *)arg; + clock_t ret = 0; /* assume success */ + + mutex_enter(cp->cc_lockp); + + switch (code) { + case CB_CODE_CPR_CHKPT: + cp->cc_events |= CALLB_CPR_START; +#ifdef CPR_NOT_THREAD_SAFE + while (!(cp->cc_events & CALLB_CPR_SAFE)) + /* cv_timedwait() returns -1 if it times out. */ + if ((ret = cv_reltimedwait(&cp->cc_callb_cv, + cp->cc_lockp, (callb_timeout_sec * hz), + TR_CLOCK_TICK)) == -1) + break; +#endif + break; + + case CB_CODE_CPR_RESUME: + cp->cc_events &= ~CALLB_CPR_START; + cv_signal(&cp->cc_stop_cv); + break; + } + mutex_exit(cp->cc_lockp); + return (ret != -1); +} + +/* + * The generic callback function associated with kernel threads which + * are always considered safe. + */ +/* ARGSUSED */ +boolean_t +callb_generic_cpr_safe(void *arg, int code) +{ + return (B_TRUE); +} +/* + * Prevent additions to callback table. + */ +void +callb_lock_table(void) +{ + mutex_enter(&ct->ct_lock); + ASSERT(ct->ct_busy == 0); + ct->ct_busy = 1; + mutex_exit(&ct->ct_lock); +} + +/* + * Allow additions to callback table. + */ +void +callb_unlock_table(void) +{ + mutex_enter(&ct->ct_lock); + ASSERT(ct->ct_busy != 0); + ct->ct_busy = 0; + cv_broadcast(&ct->ct_busy_cv); + mutex_exit(&ct->ct_lock); +} + +SYSINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_init, NULL); +SYSUNINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_fini, NULL); diff --git a/module/os/freebsd/spl/list.c b/module/os/freebsd/spl/list.c new file mode 100644 index 000000000..e8db13a5c --- /dev/null +++ b/module/os/freebsd/spl/list.c @@ -0,0 +1,245 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Generic doubly-linked list implementation + */ + +#include <sys/list.h> +#include <sys/list_impl.h> +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> + +#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset)) +#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset)) +#define list_empty(a) ((a)->list_head.list_next == &(a)->list_head) + +#define list_insert_after_node(list, node, object) { \ + list_node_t *lnew = list_d2l(list, object); \ + lnew->list_prev = (node); \ + lnew->list_next = (node)->list_next; \ + (node)->list_next->list_prev = lnew; \ + (node)->list_next = lnew; \ +} + +#define list_insert_before_node(list, node, object) { \ + list_node_t *lnew = list_d2l(list, object); \ + lnew->list_next = (node); \ + lnew->list_prev = (node)->list_prev; \ + (node)->list_prev->list_next = lnew; \ + (node)->list_prev = lnew; \ +} + +#define list_remove_node(node) \ + (node)->list_prev->list_next = (node)->list_next; \ + (node)->list_next->list_prev = (node)->list_prev; \ + (node)->list_next = (node)->list_prev = NULL + +void +list_create(list_t *list, size_t size, size_t offset) +{ + ASSERT(list); + ASSERT(size > 0); + ASSERT(size >= offset + sizeof (list_node_t)); + + list->list_size = size; + list->list_offset = offset; + list->list_head.list_next = list->list_head.list_prev = + &list->list_head; +} + +void +list_destroy(list_t *list) +{ + list_node_t *node = &list->list_head; + + ASSERT(list); + ASSERT(list->list_head.list_next == node); + ASSERT(list->list_head.list_prev == node); + + node->list_next = node->list_prev = NULL; +} + +void +list_insert_after(list_t *list, void *object, void *nobject) +{ + if (object == NULL) { + list_insert_head(list, nobject); + } else { + list_node_t *lold = list_d2l(list, object); + list_insert_after_node(list, lold, nobject); + } +} + +void +list_insert_before(list_t *list, void *object, void *nobject) +{ + if (object == NULL) { + list_insert_tail(list, nobject); + } else { + list_node_t *lold = list_d2l(list, object); + list_insert_before_node(list, lold, nobject); + } +} + +void +list_insert_head(list_t *list, void *object) +{ + list_node_t *lold = &list->list_head; + list_insert_after_node(list, lold, object); +} + +void +list_insert_tail(list_t *list, void *object) +{ + list_node_t *lold = &list->list_head; + list_insert_before_node(list, lold, object); +} + +void +list_remove(list_t *list, void *object) +{ + list_node_t *lold = list_d2l(list, object); + ASSERT(!list_empty(list)); + ASSERT(lold->list_next != NULL); + list_remove_node(lold); +} + +void * +list_remove_head(list_t *list) +{ + list_node_t *head = list->list_head.list_next; + if (head == &list->list_head) + return (NULL); + list_remove_node(head); + return (list_object(list, head)); +} + +void * +list_remove_tail(list_t *list) +{ + list_node_t *tail = list->list_head.list_prev; + if (tail == &list->list_head) + return (NULL); + list_remove_node(tail); + return (list_object(list, tail)); +} + +void * +list_head(list_t *list) +{ + if (list_empty(list)) + return (NULL); + return (list_object(list, list->list_head.list_next)); +} + +void * +list_tail(list_t *list) +{ + if (list_empty(list)) + return (NULL); + return (list_object(list, list->list_head.list_prev)); +} + +void * +list_next(list_t *list, void *object) +{ + list_node_t *node = list_d2l(list, object); + + if (node->list_next != &list->list_head) + return (list_object(list, node->list_next)); + + return (NULL); +} + +void * +list_prev(list_t *list, void *object) +{ + list_node_t *node = list_d2l(list, object); + + if (node->list_prev != &list->list_head) + return (list_object(list, node->list_prev)); + + return (NULL); +} + +/* + * Insert src list after dst list. Empty src list thereafter. + */ +void +list_move_tail(list_t *dst, list_t *src) +{ + list_node_t *dstnode = &dst->list_head; + list_node_t *srcnode = &src->list_head; + + ASSERT(dst->list_size == src->list_size); + ASSERT(dst->list_offset == src->list_offset); + + if (list_empty(src)) + return; + + dstnode->list_prev->list_next = srcnode->list_next; + srcnode->list_next->list_prev = dstnode->list_prev; + dstnode->list_prev = srcnode->list_prev; + srcnode->list_prev->list_next = dstnode; + + /* empty src list */ + srcnode->list_next = srcnode->list_prev = srcnode; +} + +void +list_link_replace(list_node_t *lold, list_node_t *lnew) +{ + ASSERT(list_link_active(lold)); + ASSERT(!list_link_active(lnew)); + + lnew->list_next = lold->list_next; + lnew->list_prev = lold->list_prev; + lold->list_prev->list_next = lnew; + lold->list_next->list_prev = lnew; + lold->list_next = lold->list_prev = NULL; +} + +void +list_link_init(list_node_t *link) +{ + link->list_next = NULL; + link->list_prev = NULL; +} + +int +list_link_active(list_node_t *link) +{ + return (link->list_next != NULL); +} + +int +list_is_empty(list_t *list) +{ + return (list_empty(list)); +} diff --git a/module/os/freebsd/spl/sha224.h b/module/os/freebsd/spl/sha224.h new file mode 100644 index 000000000..0abd43068 --- /dev/null +++ b/module/os/freebsd/spl/sha224.h @@ -0,0 +1,96 @@ +/* + * Copyright 2005 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SHA224_H_ +#define _SHA224_H_ + +#ifndef _KERNEL +#include <sys/types.h> +#endif + +#define SHA224_BLOCK_LENGTH 64 +#define SHA224_DIGEST_LENGTH 28 +#define SHA224_DIGEST_STRING_LENGTH (SHA224_DIGEST_LENGTH * 2 + 1) + +typedef struct SHA224Context { + uint32_t state[8]; + uint64_t count; + uint8_t buf[SHA224_BLOCK_LENGTH]; +} SHA224_CTX; + +__BEGIN_DECLS + +/* Ensure libmd symbols do not clash with libcrypto */ + +#ifndef SHA224_Init +#define SHA224_Init _libmd_SHA224_Init +#endif +#ifndef SHA224_Update +#define SHA224_Update _libmd_SHA224_Update +#endif +#ifndef SHA224_Final +#define SHA224_Final _libmd_SHA224_Final +#endif +#ifndef SHA224_End +#define SHA224_End _libmd_SHA224_End +#endif +#ifndef SHA224_Fd +#define SHA224_Fd _libmd_SHA224_Fd +#endif +#ifndef SHA224_FdChunk +#define SHA224_FdChunk _libmd_SHA224_FdChunk +#endif +#ifndef SHA224_File +#define SHA224_File _libmd_SHA224_File +#endif +#ifndef SHA224_FileChunk +#define SHA224_FileChunk _libmd_SHA224_FileChunk +#endif +#ifndef SHA224_Data +#define SHA224_Data _libmd_SHA224_Data +#endif + +#ifndef SHA224_version +#define SHA224_version _libmd_SHA224_version +#endif + +void SHA224_Init(SHA224_CTX *); +void SHA224_Update(SHA224_CTX *, const void *, size_t); +void SHA224_Final(unsigned char [__min_size(SHA224_DIGEST_LENGTH)], + SHA224_CTX *); +#ifndef _KERNEL +char *SHA224_End(SHA224_CTX *, char *); +char *SHA224_Data(const void *, unsigned int, char *); +char *SHA224_Fd(int, char *); +char *SHA224_FdChunk(int, char *, off_t, off_t); +char *SHA224_File(const char *, char *); +char *SHA224_FileChunk(const char *, char *, off_t, off_t); +#endif +__END_DECLS + +#endif /* !_SHA224_H_ */ diff --git a/module/os/freebsd/spl/sha256.h b/module/os/freebsd/spl/sha256.h new file mode 100644 index 000000000..193c0c025 --- /dev/null +++ b/module/os/freebsd/spl/sha256.h @@ -0,0 +1,99 @@ +/* + * Copyright 2005 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SHA256_H_ +#define _SHA256_H_ + +#ifndef _KERNEL +#include <sys/types.h> +#endif + +#define SHA256_BLOCK_LENGTH 64 +#define SHA256_DIGEST_LENGTH 32 +#define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1) + +typedef struct SHA256Context { + uint32_t state[8]; + uint64_t count; + uint8_t buf[SHA256_BLOCK_LENGTH]; +} SHA256_CTX; + +__BEGIN_DECLS + +/* Ensure libmd symbols do not clash with libcrypto */ + +#ifndef SHA256_Init +#define SHA256_Init _libmd_SHA256_Init +#endif +#ifndef SHA256_Update +#define SHA256_Update _libmd_SHA256_Update +#endif +#ifndef SHA256_Final +#define SHA256_Final _libmd_SHA256_Final +#endif +#ifndef SHA256_End +#define SHA256_End _libmd_SHA256_End +#endif +#ifndef SHA256_Fd +#define SHA256_Fd _libmd_SHA256_Fd +#endif +#ifndef SHA256_FdChunk +#define SHA256_FdChunk _libmd_SHA256_FdChunk +#endif +#ifndef SHA256_File +#define SHA256_File _libmd_SHA256_File +#endif +#ifndef SHA256_FileChunk +#define SHA256_FileChunk _libmd_SHA256_FileChunk +#endif +#ifndef SHA256_Data +#define SHA256_Data _libmd_SHA256_Data +#endif + +#ifndef SHA256_Transform +#define SHA256_Transform _libmd_SHA256_Transform +#endif +#ifndef SHA256_version +#define SHA256_version _libmd_SHA256_version +#endif + +void SHA256_Init(SHA256_CTX *); +void SHA256_Update(SHA256_CTX *, const void *, size_t); +void SHA256_Final(unsigned char [__min_size(SHA256_DIGEST_LENGTH)], + SHA256_CTX *); +#ifndef _KERNEL +char *SHA256_End(SHA256_CTX *, char *); +char *SHA256_Data(const void *, unsigned int, char *); +char *SHA256_Fd(int, char *); +char *SHA256_FdChunk(int, char *, off_t, off_t); +char *SHA256_File(const char *, char *); +char *SHA256_FileChunk(const char *, char *, off_t, off_t); +#endif +__END_DECLS + +#endif /* !_SHA256_H_ */ diff --git a/module/os/freebsd/spl/sha256c.c b/module/os/freebsd/spl/sha256c.c new file mode 100644 index 000000000..241cf8c9a --- /dev/null +++ b/module/os/freebsd/spl/sha256c.c @@ -0,0 +1,378 @@ +/* + * Copyright 2005 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> + +#ifdef _KERNEL +#include <sys/systm.h> +#else +#include <string.h> +#endif + + +#include <sys/byteorder.h> +#include <sys/endian.h> +#include "sha224.h" +#include "sha256.h" + +#if BYTE_ORDER == BIG_ENDIAN + +/* Copy a vector of big-endian uint32_t into a vector of bytes */ +#define be32enc_vect(dst, src, len) \ + memcpy((void *)dst, (const void *)src, (size_t)len) + +/* Copy a vector of bytes into a vector of big-endian uint32_t */ +#define be32dec_vect(dst, src, len) \ + memcpy((void *)dst, (const void *)src, (size_t)len) + +#else /* BYTE_ORDER != BIG_ENDIAN */ + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static void +be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 4; i++) + be32enc(dst + i * 4, src[i]); +} + +/* + * Decode a big-endian length len vector of (unsigned char) into a length + * len/4 vector of (uint32_t). Assumes len is a multiple of 4. + */ +static void +be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 4; i++) + dst[i] = be32dec(src + i * 4); +} + +#endif /* BYTE_ORDER != BIG_ENDIAN */ + +/* SHA256 round constants. */ +static const uint32_t K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +/* Elementary functions used by SHA256 */ +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define SHR(x, n) (x >> n) +#define ROTR(x, n) ((x >> n) | (x << (32 - n))) +#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) +#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) + +/* SHA256 round function */ +#define RND(a, b, c, d, e, f, g, h, k) \ + h += S1(e) + Ch(e, f, g) + k; \ + d += h; \ + h += S0(a) + Maj(a, b, c); + +/* Adjusted round function for rotating state */ +#define RNDr(S, W, i, ii) \ + RND(S[(64 - i) % 8], S[(65 - i) % 8], \ + S[(66 - i) % 8], S[(67 - i) % 8], \ + S[(68 - i) % 8], S[(69 - i) % 8], \ + S[(70 - i) % 8], S[(71 - i) % 8], \ + W[i + ii] + K[i + ii]) + +/* Message schedule computation */ +#define MSCH(W, ii, i) \ + W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + \ + s0(W[i + ii + 1]) + W[i + ii] + +/* + * SHA256 block compression function. The 256-bit state is transformed via + * the 512-bit input block to produce a new state. + */ +static void +SHA256_Transform(uint32_t *state, const unsigned char block[64]) +{ + uint32_t W[64]; + uint32_t S[8]; + int i; + + /* 1. Prepare the first part of the message schedule W. */ + be32dec_vect(W, block, 64); + + /* 2. Initialize working variables. */ + memcpy(S, state, 32); + + /* 3. Mix. */ + for (i = 0; i < 64; i += 16) { + RNDr(S, W, 0, i); + RNDr(S, W, 1, i); + RNDr(S, W, 2, i); + RNDr(S, W, 3, i); + RNDr(S, W, 4, i); + RNDr(S, W, 5, i); + RNDr(S, W, 6, i); + RNDr(S, W, 7, i); + RNDr(S, W, 8, i); + RNDr(S, W, 9, i); + RNDr(S, W, 10, i); + RNDr(S, W, 11, i); + RNDr(S, W, 12, i); + RNDr(S, W, 13, i); + RNDr(S, W, 14, i); + RNDr(S, W, 15, i); + + if (i == 48) + break; + MSCH(W, 0, i); + MSCH(W, 1, i); + MSCH(W, 2, i); + MSCH(W, 3, i); + MSCH(W, 4, i); + MSCH(W, 5, i); + MSCH(W, 6, i); + MSCH(W, 7, i); + MSCH(W, 8, i); + MSCH(W, 9, i); + MSCH(W, 10, i); + MSCH(W, 11, i); + MSCH(W, 12, i); + MSCH(W, 13, i); + MSCH(W, 14, i); + MSCH(W, 15, i); + } + + /* 4. Mix local working variables into global state */ + for (i = 0; i < 8; i++) + state[i] += S[i]; +} + +static unsigned char PAD[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Add padding and terminating bit-count. */ +static void +SHA256_Pad(SHA256_CTX * ctx) +{ + size_t r; + + /* Figure out how many bytes we have buffered. */ + r = (ctx->count >> 3) & 0x3f; + + /* Pad to 56 mod 64, transforming if we finish a block en route. */ + if (r < 56) { + /* Pad to 56 mod 64. */ + memcpy(&ctx->buf[r], PAD, 56 - r); + } else { + /* Finish the current block and mix. */ + memcpy(&ctx->buf[r], PAD, 64 - r); + SHA256_Transform(ctx->state, ctx->buf); + + /* The start of the final block is all zeroes. */ + memset(&ctx->buf[0], 0, 56); + } + + /* Add the terminating bit-count. */ + be64enc(&ctx->buf[56], ctx->count); + + /* Mix in the final block. */ + SHA256_Transform(ctx->state, ctx->buf); +} + +/* SHA-256 initialization. Begins a SHA-256 operation. */ +void +SHA256_Init(SHA256_CTX * ctx) +{ + + /* Zero bits processed so far */ + ctx->count = 0; + + /* Magic initialization constants */ + ctx->state[0] = 0x6A09E667; + ctx->state[1] = 0xBB67AE85; + ctx->state[2] = 0x3C6EF372; + ctx->state[3] = 0xA54FF53A; + ctx->state[4] = 0x510E527F; + ctx->state[5] = 0x9B05688C; + ctx->state[6] = 0x1F83D9AB; + ctx->state[7] = 0x5BE0CD19; +} + +/* Add bytes into the hash */ +void +SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len) +{ + uint64_t bitlen; + uint32_t r; + const unsigned char *src = in; + + /* Number of bytes left in the buffer from previous updates */ + r = (ctx->count >> 3) & 0x3f; + + /* Convert the length into a number of bits */ + bitlen = len << 3; + + /* Update number of bits */ + ctx->count += bitlen; + + /* Handle the case where we don't need to perform any transforms */ + if (len < 64 - r) { + memcpy(&ctx->buf[r], src, len); + return; + } + + /* Finish the current block */ + memcpy(&ctx->buf[r], src, 64 - r); + SHA256_Transform(ctx->state, ctx->buf); + src += 64 - r; + len -= 64 - r; + + /* Perform complete blocks */ + while (len >= 64) { + SHA256_Transform(ctx->state, src); + src += 64; + len -= 64; + } + + /* Copy left over data into buffer */ + memcpy(ctx->buf, src, len); +} + +/* + * SHA-256 finalization. Pads the input data, exports the hash value, + * and clears the context state. + */ +void +SHA256_Final(unsigned char digest[static SHA256_DIGEST_LENGTH], SHA256_CTX *ctx) +{ + + /* Add padding */ + SHA256_Pad(ctx); + + /* Write the hash */ + be32enc_vect(digest, ctx->state, SHA256_DIGEST_LENGTH); + + /* Clear the context state */ + explicit_bzero(ctx, sizeof (*ctx)); +} + +/* SHA-224: ******************************************************* */ +/* + * the SHA224 and SHA256 transforms are identical + */ + +/* SHA-224 initialization. Begins a SHA-224 operation. */ +void +SHA224_Init(SHA224_CTX * ctx) +{ + + /* Zero bits processed so far */ + ctx->count = 0; + + /* Magic initialization constants */ + ctx->state[0] = 0xC1059ED8; + ctx->state[1] = 0x367CD507; + ctx->state[2] = 0x3070DD17; + ctx->state[3] = 0xF70E5939; + ctx->state[4] = 0xFFC00B31; + ctx->state[5] = 0x68581511; + ctx->state[6] = 0x64f98FA7; + ctx->state[7] = 0xBEFA4FA4; +} + +/* Add bytes into the SHA-224 hash */ +void +SHA224_Update(SHA224_CTX * ctx, const void *in, size_t len) +{ + + SHA256_Update((SHA256_CTX *)ctx, in, len); +} + +/* + * SHA-224 finalization. Pads the input data, exports the hash value, + * and clears the context state. + */ +void +SHA224_Final(unsigned char digest[static SHA224_DIGEST_LENGTH], SHA224_CTX *ctx) +{ + + /* Add padding */ + SHA256_Pad((SHA256_CTX *)ctx); + + /* Write the hash */ + be32enc_vect(digest, ctx->state, SHA224_DIGEST_LENGTH); + + /* Clear the context state */ + explicit_bzero(ctx, sizeof (*ctx)); +} + +#ifdef WEAK_REFS +/* + * When building libmd, provide weak references. Note: this is not + * activated in the context of compiling these sources for internal + * use in libcrypt. + */ +#undef SHA256_Init +__weak_reference(_libmd_SHA256_Init, SHA256_Init); +#undef SHA256_Update +__weak_reference(_libmd_SHA256_Update, SHA256_Update); +#undef SHA256_Final +__weak_reference(_libmd_SHA256_Final, SHA256_Final); +#undef SHA256_Transform +__weak_reference(_libmd_SHA256_Transform, SHA256_Transform); + +#undef SHA224_Init +__weak_reference(_libmd_SHA224_Init, SHA224_Init); +#undef SHA224_Update +__weak_reference(_libmd_SHA224_Update, SHA224_Update); +#undef SHA224_Final +__weak_reference(_libmd_SHA224_Final, SHA224_Final); +#endif diff --git a/module/os/freebsd/spl/sha384.h b/module/os/freebsd/spl/sha384.h new file mode 100644 index 000000000..67250cee0 --- /dev/null +++ b/module/os/freebsd/spl/sha384.h @@ -0,0 +1,96 @@ +/* + * Copyright 2005 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SHA384_H_ +#define _SHA384_H_ + +#ifndef _KERNEL +#include <sys/types.h> +#endif + +#define SHA384_BLOCK_LENGTH 128 +#define SHA384_DIGEST_LENGTH 48 +#define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1) + +typedef struct SHA384Context { + uint64_t state[8]; + uint64_t count[2]; + uint8_t buf[SHA384_BLOCK_LENGTH]; +} SHA384_CTX; + +__BEGIN_DECLS + +/* Ensure libmd symbols do not clash with libcrypto */ +#ifndef SHA384_Init +#define SHA384_Init _libmd_SHA384_Init +#endif +#ifndef SHA384_Update +#define SHA384_Update _libmd_SHA384_Update +#endif +#ifndef SHA384_Final +#define SHA384_Final _libmd_SHA384_Final +#endif +#ifndef SHA384_End +#define SHA384_End _libmd_SHA384_End +#endif +#ifndef SHA384_Fd +#define SHA384_Fd _libmd_SHA384_Fd +#endif +#ifndef SHA384_FdChunk +#define SHA384_FdChunk _libmd_SHA384_FdChunk +#endif +#ifndef SHA384_File +#define SHA384_File _libmd_SHA384_File +#endif +#ifndef SHA384_FileChunk +#define SHA384_FileChunk _libmd_SHA384_FileChunk +#endif +#ifndef SHA384_Data +#define SHA384_Data _libmd_SHA384_Data +#endif + +#ifndef SHA384_version +#define SHA384_version _libmd_SHA384_version +#endif + +void SHA384_Init(SHA384_CTX *); +void SHA384_Update(SHA384_CTX *, const void *, size_t); +void SHA384_Final(unsigned char [__min_size(SHA384_DIGEST_LENGTH)], + SHA384_CTX *); +#ifndef _KERNEL +char *SHA384_End(SHA384_CTX *, char *); +char *SHA384_Data(const void *, unsigned int, char *); +char *SHA384_Fd(int, char *); +char *SHA384_FdChunk(int, char *, off_t, off_t); +char *SHA384_File(const char *, char *); +char *SHA384_FileChunk(const char *, char *, off_t, off_t); +#endif + +__END_DECLS + +#endif /* !_SHA384_H_ */ diff --git a/module/os/freebsd/spl/sha512.h b/module/os/freebsd/spl/sha512.h new file mode 100644 index 000000000..b6fb733ca --- /dev/null +++ b/module/os/freebsd/spl/sha512.h @@ -0,0 +1,101 @@ +/* + * Copyright 2005 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SHA512_H_ +#define _SHA512_H_ + +#ifndef _KERNEL +#include <sys/types.h> +#endif + +#define SHA512_BLOCK_LENGTH 128 +#define SHA512_DIGEST_LENGTH 64 +#define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1) + +typedef struct SHA512Context { + uint64_t state[8]; + uint64_t count[2]; + uint8_t buf[SHA512_BLOCK_LENGTH]; +} SHA512_CTX; + +__BEGIN_DECLS + +/* Ensure libmd symbols do not clash with libcrypto */ +#if 0 +#ifndef SHA512_Init +#define SHA512_Init _libmd_SHA512_Init +#endif +#ifndef SHA512_Update +#define SHA512_Update _libmd_SHA512_Update +#endif +#ifndef SHA512_Final +#define SHA512_Final _libmd_SHA512_Final +#endif +#endif +#ifndef SHA512_End +#define SHA512_End _libmd_SHA512_End +#endif +#ifndef SHA512_Fd +#define SHA512_Fd _libmd_SHA512_Fd +#endif +#ifndef SHA512_FdChunk +#define SHA512_FdChunk _libmd_SHA512_FdChunk +#endif +#ifndef SHA512_File +#define SHA512_File _libmd_SHA512_File +#endif +#ifndef SHA512_FileChunk +#define SHA512_FileChunk _libmd_SHA512_FileChunk +#endif +#ifndef SHA512_Data +#define SHA512_Data _libmd_SHA512_Data +#endif + +#ifndef SHA512_Transform +#define SHA512_Transform _libmd_SHA512_Transform +#endif +#ifndef SHA512_version +#define SHA512_version _libmd_SHA512_version +#endif + +void SHA512_Init(SHA512_CTX *); +void SHA512_Update(SHA512_CTX *, const void *, size_t); +void SHA512_Final(unsigned char [__min_size(SHA512_DIGEST_LENGTH)], + SHA512_CTX *); +#ifndef _KERNEL +char *SHA512_End(SHA512_CTX *, char *); +char *SHA512_Data(const void *, unsigned int, char *); +char *SHA512_Fd(int, char *); +char *SHA512_FdChunk(int, char *, off_t, off_t); +char *SHA512_File(const char *, char *); +char *SHA512_FileChunk(const char *, char *, off_t, off_t); +#endif + +__END_DECLS + +#endif /* !_SHA512_H_ */ diff --git a/module/os/freebsd/spl/sha512c.c b/module/os/freebsd/spl/sha512c.c new file mode 100644 index 000000000..146f338f0 --- /dev/null +++ b/module/os/freebsd/spl/sha512c.c @@ -0,0 +1,508 @@ +/* + * Copyright 2005 Colin Percival + * Copyright (c) 2015 Allan Jude <[email protected]> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/endian.h> +#include <sys/types.h> + +#ifdef _KERNEL +#include <sys/systm.h> +#else +#include <string.h> +#endif + +#include "sha512.h" +#include "sha512t.h" +#include "sha384.h" + +#if BYTE_ORDER == BIG_ENDIAN + +/* Copy a vector of big-endian uint64_t into a vector of bytes */ +#define be64enc_vect(dst, src, len) \ + memcpy((void *)dst, (const void *)src, (size_t)len) + +/* Copy a vector of bytes into a vector of big-endian uint64_t */ +#define be64dec_vect(dst, src, len) \ + memcpy((void *)dst, (const void *)src, (size_t)len) + +#else /* BYTE_ORDER != BIG_ENDIAN */ + +/* + * Encode a length len/4 vector of (uint64_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 8. + */ +static void +be64enc_vect(unsigned char *dst, const uint64_t *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 8; i++) + be64enc(dst + i * 8, src[i]); +} + +/* + * Decode a big-endian length len vector of (unsigned char) into a length + * len/4 vector of (uint64_t). Assumes len is a multiple of 8. + */ +static void +be64dec_vect(uint64_t *dst, const unsigned char *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 8; i++) + dst[i] = be64dec(src + i * 8); +} + +#endif /* BYTE_ORDER != BIG_ENDIAN */ + +/* SHA512 round constants. */ +static const uint64_t K[80] = { + 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, + 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, + 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, + 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, + 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, + 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, + 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, + 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, + 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, + 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, + 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, + 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, + 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, + 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, + 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, + 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, + 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, + 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, + 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, + 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, + 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, + 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, + 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, + 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, + 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, + 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, + 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, + 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, + 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, + 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, + 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, + 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, + 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, + 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, + 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, + 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, + 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, + 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, + 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL +}; + +/* Elementary functions used by SHA512 */ +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define SHR(x, n) (x >> n) +#define ROTR(x, n) ((x >> n) | (x << (64 - n))) +#define S0(x) (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39)) +#define S1(x) (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41)) +#define s0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x, 7)) +#define s1(x) (ROTR(x, 19) ^ ROTR(x, 61) ^ SHR(x, 6)) + +/* SHA512 round function */ +#define RND(a, b, c, d, e, f, g, h, k) \ + h += S1(e) + Ch(e, f, g) + k; \ + d += h; \ + h += S0(a) + Maj(a, b, c); + +/* Adjusted round function for rotating state */ +#define RNDr(S, W, i, ii) \ + RND(S[(80 - i) % 8], S[(81 - i) % 8], \ + S[(82 - i) % 8], S[(83 - i) % 8], \ + S[(84 - i) % 8], S[(85 - i) % 8], \ + S[(86 - i) % 8], S[(87 - i) % 8], \ + W[i + ii] + K[i + ii]) + +/* Message schedule computation */ +#define MSCH(W, ii, i) \ + W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + \ + s0(W[i + ii + 1]) + W[i + ii] + +/* + * SHA512 block compression function. The 512-bit state is transformed via + * the 512-bit input block to produce a new state. + */ +static void +SHA512_Transform(uint64_t *state, + const unsigned char block[SHA512_BLOCK_LENGTH]) +{ + uint64_t W[80]; + uint64_t S[8]; + int i; + + /* 1. Prepare the first part of the message schedule W. */ + be64dec_vect(W, block, SHA512_BLOCK_LENGTH); + + /* 2. Initialize working variables. */ + memcpy(S, state, SHA512_DIGEST_LENGTH); + + /* 3. Mix. */ + for (i = 0; i < 80; i += 16) { + RNDr(S, W, 0, i); + RNDr(S, W, 1, i); + RNDr(S, W, 2, i); + RNDr(S, W, 3, i); + RNDr(S, W, 4, i); + RNDr(S, W, 5, i); + RNDr(S, W, 6, i); + RNDr(S, W, 7, i); + RNDr(S, W, 8, i); + RNDr(S, W, 9, i); + RNDr(S, W, 10, i); + RNDr(S, W, 11, i); + RNDr(S, W, 12, i); + RNDr(S, W, 13, i); + RNDr(S, W, 14, i); + RNDr(S, W, 15, i); + + if (i == 64) + break; + MSCH(W, 0, i); + MSCH(W, 1, i); + MSCH(W, 2, i); + MSCH(W, 3, i); + MSCH(W, 4, i); + MSCH(W, 5, i); + MSCH(W, 6, i); + MSCH(W, 7, i); + MSCH(W, 8, i); + MSCH(W, 9, i); + MSCH(W, 10, i); + MSCH(W, 11, i); + MSCH(W, 12, i); + MSCH(W, 13, i); + MSCH(W, 14, i); + MSCH(W, 15, i); + } + + /* 4. Mix local working variables into global state */ + for (i = 0; i < 8; i++) + state[i] += S[i]; +} + +static unsigned char PAD[SHA512_BLOCK_LENGTH] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Add padding and terminating bit-count. */ +static void +SHA512_Pad(SHA512_CTX * ctx) +{ + size_t r; + + /* Figure out how many bytes we have buffered. */ + r = (ctx->count[1] >> 3) & 0x7f; + + /* Pad to 112 mod 128, transforming if we finish a block en route. */ + if (r < 112) { + /* Pad to 112 mod 128. */ + memcpy(&ctx->buf[r], PAD, 112 - r); + } else { + /* Finish the current block and mix. */ + memcpy(&ctx->buf[r], PAD, 128 - r); + SHA512_Transform(ctx->state, ctx->buf); + + /* The start of the final block is all zeroes. */ + memset(&ctx->buf[0], 0, 112); + } + + /* Add the terminating bit-count. */ + be64enc_vect(&ctx->buf[112], ctx->count, 16); + + /* Mix in the final block. */ + SHA512_Transform(ctx->state, ctx->buf); +} + +/* SHA-512 initialization. Begins a SHA-512 operation. */ +void +SHA512_Init(SHA512_CTX * ctx) +{ + + /* Zero bits processed so far */ + ctx->count[0] = ctx->count[1] = 0; + + /* Magic initialization constants */ + ctx->state[0] = 0x6a09e667f3bcc908ULL; + ctx->state[1] = 0xbb67ae8584caa73bULL; + ctx->state[2] = 0x3c6ef372fe94f82bULL; + ctx->state[3] = 0xa54ff53a5f1d36f1ULL; + ctx->state[4] = 0x510e527fade682d1ULL; + ctx->state[5] = 0x9b05688c2b3e6c1fULL; + ctx->state[6] = 0x1f83d9abfb41bd6bULL; + ctx->state[7] = 0x5be0cd19137e2179ULL; +} + +/* Add bytes into the hash */ +void +SHA512_Update(SHA512_CTX * ctx, const void *in, size_t len) +{ + uint64_t bitlen[2]; + uint64_t r; + const unsigned char *src = in; + + /* Number of bytes left in the buffer from previous updates */ + r = (ctx->count[1] >> 3) & 0x7f; + + /* Convert the length into a number of bits */ + bitlen[1] = ((uint64_t)len) << 3; + bitlen[0] = ((uint64_t)len) >> 61; + + /* Update number of bits */ + if ((ctx->count[1] += bitlen[1]) < bitlen[1]) + ctx->count[0]++; + ctx->count[0] += bitlen[0]; + + /* Handle the case where we don't need to perform any transforms */ + if (len < SHA512_BLOCK_LENGTH - r) { + memcpy(&ctx->buf[r], src, len); + return; + } + + /* Finish the current block */ + memcpy(&ctx->buf[r], src, SHA512_BLOCK_LENGTH - r); + SHA512_Transform(ctx->state, ctx->buf); + src += SHA512_BLOCK_LENGTH - r; + len -= SHA512_BLOCK_LENGTH - r; + + /* Perform complete blocks */ + while (len >= SHA512_BLOCK_LENGTH) { + SHA512_Transform(ctx->state, src); + src += SHA512_BLOCK_LENGTH; + len -= SHA512_BLOCK_LENGTH; + } + + /* Copy left over data into buffer */ + memcpy(ctx->buf, src, len); +} + +/* + * SHA-512 finalization. Pads the input data, exports the hash value, + * and clears the context state. + */ +void +SHA512_Final(unsigned char digest[static SHA512_DIGEST_LENGTH], SHA512_CTX *ctx) +{ + + /* Add padding */ + SHA512_Pad(ctx); + + /* Write the hash */ + be64enc_vect(digest, ctx->state, SHA512_DIGEST_LENGTH); + + /* Clear the context state */ + explicit_bzero(ctx, sizeof (*ctx)); +} + +/* SHA-512t: ******************************************************** */ +/* + * the SHA512t transforms are identical to SHA512 so reuse the existing function + */ +void +SHA512_224_Init(SHA512_CTX * ctx) +{ + + /* Zero bits processed so far */ + ctx->count[0] = ctx->count[1] = 0; + + /* Magic initialization constants */ + ctx->state[0] = 0x8c3d37c819544da2ULL; + ctx->state[1] = 0x73e1996689dcd4d6ULL; + ctx->state[2] = 0x1dfab7ae32ff9c82ULL; + ctx->state[3] = 0x679dd514582f9fcfULL; + ctx->state[4] = 0x0f6d2b697bd44da8ULL; + ctx->state[5] = 0x77e36f7304c48942ULL; + ctx->state[6] = 0x3f9d85a86a1d36c8ULL; + ctx->state[7] = 0x1112e6ad91d692a1ULL; +} + +void +SHA512_224_Update(SHA512_CTX * ctx, const void *in, size_t len) +{ + + SHA512_Update(ctx, in, len); +} + +void +SHA512_224_Final(unsigned char digest[static SHA512_224_DIGEST_LENGTH], + SHA512_CTX *ctx) +{ + + /* Add padding */ + SHA512_Pad(ctx); + + /* Write the hash */ + be64enc_vect(digest, ctx->state, SHA512_224_DIGEST_LENGTH); + + /* Clear the context state */ + explicit_bzero(ctx, sizeof (*ctx)); +} + +void +SHA512_256_Init(SHA512_CTX * ctx) +{ + + /* Zero bits processed so far */ + ctx->count[0] = ctx->count[1] = 0; + + /* Magic initialization constants */ + ctx->state[0] = 0x22312194fc2bf72cULL; + ctx->state[1] = 0x9f555fa3c84c64c2ULL; + ctx->state[2] = 0x2393b86b6f53b151ULL; + ctx->state[3] = 0x963877195940eabdULL; + ctx->state[4] = 0x96283ee2a88effe3ULL; + ctx->state[5] = 0xbe5e1e2553863992ULL; + ctx->state[6] = 0x2b0199fc2c85b8aaULL; + ctx->state[7] = 0x0eb72ddc81c52ca2ULL; +} + +void +SHA512_256_Update(SHA512_CTX * ctx, const void *in, size_t len) +{ + + SHA512_Update(ctx, in, len); +} + +void +SHA512_256_Final(unsigned char digest[static SHA512_256_DIGEST_LENGTH], + SHA512_CTX * ctx) +{ + + /* Add padding */ + SHA512_Pad(ctx); + + /* Write the hash */ + be64enc_vect(digest, ctx->state, SHA512_256_DIGEST_LENGTH); + + /* Clear the context state */ + explicit_bzero(ctx, sizeof (*ctx)); +} + +/* ** SHA-384: ******************************************************** */ +/* + * the SHA384 and SHA512 transforms are identical, so SHA384 is skipped + */ + +/* SHA-384 initialization. Begins a SHA-384 operation. */ +void +SHA384_Init(SHA384_CTX * ctx) +{ + + /* Zero bits processed so far */ + ctx->count[0] = ctx->count[1] = 0; + + /* Magic initialization constants */ + ctx->state[0] = 0xcbbb9d5dc1059ed8ULL; + ctx->state[1] = 0x629a292a367cd507ULL; + ctx->state[2] = 0x9159015a3070dd17ULL; + ctx->state[3] = 0x152fecd8f70e5939ULL; + ctx->state[4] = 0x67332667ffc00b31ULL; + ctx->state[5] = 0x8eb44a8768581511ULL; + ctx->state[6] = 0xdb0c2e0d64f98fa7ULL; + ctx->state[7] = 0x47b5481dbefa4fa4ULL; +} + +/* Add bytes into the SHA-384 hash */ +void +SHA384_Update(SHA384_CTX * ctx, const void *in, size_t len) +{ + + SHA512_Update((SHA512_CTX *)ctx, in, len); +} + +/* + * SHA-384 finalization. Pads the input data, exports the hash value, + * and clears the context state. + */ +void +SHA384_Final(unsigned char digest[static SHA384_DIGEST_LENGTH], SHA384_CTX *ctx) +{ + + /* Add padding */ + SHA512_Pad((SHA512_CTX *)ctx); + + /* Write the hash */ + be64enc_vect(digest, ctx->state, SHA384_DIGEST_LENGTH); + + /* Clear the context state */ + explicit_bzero(ctx, sizeof (*ctx)); +} + +#if 0 +/* + * When building libmd, provide weak references. Note: this is not + * activated in the context of compiling these sources for internal + * use in libcrypt. + */ +#undef SHA512_Init +__weak_reference(_libmd_SHA512_Init, SHA512_Init); +#undef SHA512_Update +__weak_reference(_libmd_SHA512_Update, SHA512_Update); +#undef SHA512_Final +__weak_reference(_libmd_SHA512_Final, SHA512_Final); +#undef SHA512_Transform +__weak_reference(_libmd_SHA512_Transform, SHA512_Transform); + +#undef SHA512_224_Init +__weak_reference(_libmd_SHA512_224_Init, SHA512_224_Init); +#undef SHA512_224_Update +__weak_reference(_libmd_SHA512_224_Update, SHA512_224_Update); +#undef SHA512_224_Final +__weak_reference(_libmd_SHA512_224_Final, SHA512_224_Final); + +#undef SHA512_256_Init +__weak_reference(_libmd_SHA512_256_Init, SHA512_256_Init); +#undef SHA512_256_Update +__weak_reference(_libmd_SHA512_256_Update, SHA512_256_Update); +#undef SHA512_256_Final +__weak_reference(_libmd_SHA512_256_Final, SHA512_256_Final); + +#undef SHA384_Init +__weak_reference(_libmd_SHA384_Init, SHA384_Init); +#undef SHA384_Update +__weak_reference(_libmd_SHA384_Update, SHA384_Update); +#undef SHA384_Final +__weak_reference(_libmd_SHA384_Final, SHA384_Final); +#endif diff --git a/module/os/freebsd/spl/sha512t.h b/module/os/freebsd/spl/sha512t.h new file mode 100644 index 000000000..703867fc0 --- /dev/null +++ b/module/os/freebsd/spl/sha512t.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2015 Allan Jude <[email protected]> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SHA512T_H_ +#define _SHA512T_H_ + +#include "sha512.h" + +#ifndef _KERNEL +#include <sys/types.h> +#endif + +#define SHA512_224_DIGEST_LENGTH 28 +#define SHA512_224_DIGEST_STRING_LENGTH (SHA512_224_DIGEST_LENGTH * 2 + 1) +#define SHA512_256_DIGEST_LENGTH 32 +#define SHA512_256_DIGEST_STRING_LENGTH (SHA512_256_DIGEST_LENGTH * 2 + 1) + +__BEGIN_DECLS + +/* Ensure libmd symbols do not clash with libcrypto */ +#ifndef SHA512_224_Init +#define SHA512_224_Init _libmd_SHA512_224_Init +#endif +#ifndef SHA512_224_Update +#define SHA512_224_Update _libmd_SHA512_224_Update +#endif +#ifndef SHA512_224_Final +#define SHA512_224_Final _libmd_SHA512_224_Final +#endif +#ifndef SHA512_224_End +#define SHA512_224_End _libmd_SHA512_224_End +#endif +#ifndef SHA512_224_Fd +#define SHA512_224_Fd _libmd_SHA512_224_Fd +#endif +#ifndef SHA512_224_FdChunk +#define SHA512_224_FdChunk _libmd_SHA512_224_FdChunk +#endif +#ifndef SHA512_224_File +#define SHA512_224_File _libmd_SHA512_224_File +#endif +#ifndef SHA512_224_FileChunk +#define SHA512_224_FileChunk _libmd_SHA512_224_FileChunk +#endif +#ifndef SHA512_224_Data +#define SHA512_224_Data _libmd_SHA512_224_Data +#endif + +#ifndef SHA512_224_Transform +#define SHA512_224_Transform _libmd_SHA512_224_Transform +#endif +#ifndef SHA512_224_version +#define SHA512_224_version _libmd_SHA512_224_version +#endif + +#ifndef SHA512_256_Init +#define SHA512_256_Init _libmd_SHA512_256_Init +#endif +#ifndef SHA512_256_Update +#define SHA512_256_Update _libmd_SHA512_256_Update +#endif +#ifndef SHA512_256_Final +#define SHA512_256_Final _libmd_SHA512_256_Final +#endif +#ifndef SHA512_256_End +#define SHA512_256_End _libmd_SHA512_256_End +#endif +#ifndef SHA512_256_Fd +#define SHA512_256_Fd _libmd_SHA512_256_Fd +#endif +#ifndef SHA512_256_FdChunk +#define SHA512_256_FdChunk _libmd_SHA512_256_FdChunk +#endif +#ifndef SHA512_256_File +#define SHA512_256_File _libmd_SHA512_256_File +#endif +#ifndef SHA512_256_FileChunk +#define SHA512_256_FileChunk _libmd_SHA512_256_FileChunk +#endif +#ifndef SHA512_256_Data +#define SHA512_256_Data _libmd_SHA512_256_Data +#endif + +#ifndef SHA512_256_Transform +#define SHA512_256_Transform _libmd_SHA512_256_Transform +#endif +#ifndef SHA512_256_version +#define SHA512_256_version _libmd_SHA512_256_version +#endif + +void SHA512_224_Init(SHA512_CTX *); +void SHA512_224_Update(SHA512_CTX *, const void *, size_t); +void SHA512_224_Final(unsigned char [__min_size(SHA512_224_DIGEST_LENGTH)], + SHA512_CTX *); +#ifndef _KERNEL +char *SHA512_224_End(SHA512_CTX *, char *); +char *SHA512_224_Data(const void *, unsigned int, char *); +char *SHA512_224_Fd(int, char *); +char *SHA512_224_FdChunk(int, char *, off_t, off_t); +char *SHA512_224_File(const char *, char *); +char *SHA512_224_FileChunk(const char *, char *, off_t, off_t); +#endif +void SHA512_256_Init(SHA512_CTX *); +void SHA512_256_Update(SHA512_CTX *, const void *, size_t); +void SHA512_256_Final(unsigned char [__min_size(SHA512_256_DIGEST_LENGTH)], + SHA512_CTX *); +#ifndef _KERNEL +char *SHA512_256_End(SHA512_CTX *, char *); +char *SHA512_256_Data(const void *, unsigned int, char *); +char *SHA512_256_Fd(int, char *); +char *SHA512_256_FdChunk(int, char *, off_t, off_t); +char *SHA512_256_File(const char *, char *); +char *SHA512_256_FileChunk(const char *, char *, off_t, off_t); +#endif + +__END_DECLS + +#endif /* !_SHA512T_H_ */ diff --git a/module/os/freebsd/spl/spl_acl.c b/module/os/freebsd/spl/spl_acl.c new file mode 100644 index 000000000..bb4c30728 --- /dev/null +++ b/module/os/freebsd/spl/spl_acl.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2008, 2009 Edward Tomasz Napierała <[email protected]> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/malloc.h> +#include <sys/errno.h> +#include <sys/zfs_acl.h> +#include <sys/acl.h> + +struct zfs2bsd { + uint32_t zb_zfs; + int zb_bsd; +}; + +struct zfs2bsd perms[] = {{ACE_READ_DATA, ACL_READ_DATA}, + {ACE_WRITE_DATA, ACL_WRITE_DATA}, + {ACE_EXECUTE, ACL_EXECUTE}, + {ACE_APPEND_DATA, ACL_APPEND_DATA}, + {ACE_DELETE_CHILD, ACL_DELETE_CHILD}, + {ACE_DELETE, ACL_DELETE}, + {ACE_READ_ATTRIBUTES, ACL_READ_ATTRIBUTES}, + {ACE_WRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES}, + {ACE_READ_NAMED_ATTRS, ACL_READ_NAMED_ATTRS}, + {ACE_WRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS}, + {ACE_READ_ACL, ACL_READ_ACL}, + {ACE_WRITE_ACL, ACL_WRITE_ACL}, + {ACE_WRITE_OWNER, ACL_WRITE_OWNER}, + {ACE_SYNCHRONIZE, ACL_SYNCHRONIZE}, + {0, 0}}; + +struct zfs2bsd flags[] = {{ACE_FILE_INHERIT_ACE, + ACL_ENTRY_FILE_INHERIT}, + {ACE_DIRECTORY_INHERIT_ACE, + ACL_ENTRY_DIRECTORY_INHERIT}, + {ACE_NO_PROPAGATE_INHERIT_ACE, + ACL_ENTRY_NO_PROPAGATE_INHERIT}, + {ACE_INHERIT_ONLY_ACE, + ACL_ENTRY_INHERIT_ONLY}, + {ACE_INHERITED_ACE, + ACL_ENTRY_INHERITED}, + {ACE_SUCCESSFUL_ACCESS_ACE_FLAG, + ACL_ENTRY_SUCCESSFUL_ACCESS}, + {ACE_FAILED_ACCESS_ACE_FLAG, + ACL_ENTRY_FAILED_ACCESS}, + {0, 0}}; + +static int +_bsd_from_zfs(uint32_t zfs, const struct zfs2bsd *table) +{ + const struct zfs2bsd *tmp; + int bsd = 0; + + for (tmp = table; tmp->zb_zfs != 0; tmp++) { + if (zfs & tmp->zb_zfs) + bsd |= tmp->zb_bsd; + } + + return (bsd); +} + +static uint32_t +_zfs_from_bsd(int bsd, const struct zfs2bsd *table) +{ + const struct zfs2bsd *tmp; + uint32_t zfs = 0; + + for (tmp = table; tmp->zb_bsd != 0; tmp++) { + if (bsd & tmp->zb_bsd) + zfs |= tmp->zb_zfs; + } + + return (zfs); +} + +int +acl_from_aces(struct acl *aclp, const ace_t *aces, int nentries) +{ + int i; + struct acl_entry *entry; + const ace_t *ace; + + if (nentries < 1) { + printf("acl_from_aces: empty ZFS ACL; returning EINVAL.\n"); + return (EINVAL); + } + + if (nentries > ACL_MAX_ENTRIES) { + /* + * I believe it may happen only when moving a pool + * from SunOS to FreeBSD. + */ + printf("acl_from_aces: ZFS ACL too big to fit " + "into 'struct acl'; returning EINVAL.\n"); + return (EINVAL); + } + + bzero(aclp, sizeof (*aclp)); + aclp->acl_maxcnt = ACL_MAX_ENTRIES; + aclp->acl_cnt = nentries; + + for (i = 0; i < nentries; i++) { + entry = &(aclp->acl_entry[i]); + ace = &(aces[i]); + + if (ace->a_flags & ACE_OWNER) + entry->ae_tag = ACL_USER_OBJ; + else if (ace->a_flags & ACE_GROUP) + entry->ae_tag = ACL_GROUP_OBJ; + else if (ace->a_flags & ACE_EVERYONE) + entry->ae_tag = ACL_EVERYONE; + else if (ace->a_flags & ACE_IDENTIFIER_GROUP) + entry->ae_tag = ACL_GROUP; + else + entry->ae_tag = ACL_USER; + + if (entry->ae_tag == ACL_USER || entry->ae_tag == ACL_GROUP) + entry->ae_id = ace->a_who; + else + entry->ae_id = ACL_UNDEFINED_ID; + + entry->ae_perm = _bsd_from_zfs(ace->a_access_mask, perms); + entry->ae_flags = _bsd_from_zfs(ace->a_flags, flags); + + switch (ace->a_type) { + case ACE_ACCESS_ALLOWED_ACE_TYPE: + entry->ae_entry_type = ACL_ENTRY_TYPE_ALLOW; + break; + case ACE_ACCESS_DENIED_ACE_TYPE: + entry->ae_entry_type = ACL_ENTRY_TYPE_DENY; + break; + case ACE_SYSTEM_AUDIT_ACE_TYPE: + entry->ae_entry_type = ACL_ENTRY_TYPE_AUDIT; + break; + case ACE_SYSTEM_ALARM_ACE_TYPE: + entry->ae_entry_type = ACL_ENTRY_TYPE_ALARM; + break; + default: + panic("acl_from_aces: a_type is 0x%x", ace->a_type); + } + } + + return (0); +} + +void +aces_from_acl(ace_t *aces, int *nentries, const struct acl *aclp) +{ + int i; + const struct acl_entry *entry; + ace_t *ace; + + bzero(aces, sizeof (*aces) * aclp->acl_cnt); + + *nentries = aclp->acl_cnt; + + for (i = 0; i < aclp->acl_cnt; i++) { + entry = &(aclp->acl_entry[i]); + ace = &(aces[i]); + + ace->a_who = entry->ae_id; + + if (entry->ae_tag == ACL_USER_OBJ) + ace->a_flags = ACE_OWNER; + else if (entry->ae_tag == ACL_GROUP_OBJ) + ace->a_flags = (ACE_GROUP | ACE_IDENTIFIER_GROUP); + else if (entry->ae_tag == ACL_GROUP) + ace->a_flags = ACE_IDENTIFIER_GROUP; + else if (entry->ae_tag == ACL_EVERYONE) + ace->a_flags = ACE_EVERYONE; + else /* ACL_USER */ + ace->a_flags = 0; + + ace->a_access_mask = _zfs_from_bsd(entry->ae_perm, perms); + ace->a_flags |= _zfs_from_bsd(entry->ae_flags, flags); + + switch (entry->ae_entry_type) { + case ACL_ENTRY_TYPE_ALLOW: + ace->a_type = ACE_ACCESS_ALLOWED_ACE_TYPE; + break; + case ACL_ENTRY_TYPE_DENY: + ace->a_type = ACE_ACCESS_DENIED_ACE_TYPE; + break; + case ACL_ENTRY_TYPE_ALARM: + ace->a_type = ACE_SYSTEM_ALARM_ACE_TYPE; + break; + case ACL_ENTRY_TYPE_AUDIT: + ace->a_type = ACE_SYSTEM_AUDIT_ACE_TYPE; + break; + default: + panic("aces_from_acl: ae_entry_type is 0x%x", + entry->ae_entry_type); + } + } +} diff --git a/module/os/freebsd/spl/spl_atomic.c b/module/os/freebsd/spl/spl_atomic.c new file mode 100644 index 000000000..e82fed847 --- /dev/null +++ b/module/os/freebsd/spl/spl_atomic.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2007 Pawel Jakub Dawidek <[email protected]> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/atomic.h> + +#ifdef _KERNEL +#include <sys/kernel.h> + +struct mtx atomic_mtx; +MTX_SYSINIT(atomic, &atomic_mtx, "atomic", MTX_DEF); +#else +#include <pthread.h> + +#define mtx_lock(lock) pthread_mutex_lock(lock) +#define mtx_unlock(lock) pthread_mutex_unlock(lock) + +static pthread_mutex_t atomic_mtx; + +static __attribute__((constructor)) void +atomic_init(void) +{ + pthread_mutex_init(&atomic_mtx, NULL); +} +#endif + +#if !defined(__LP64__) && !defined(__mips_n32) && \ + !defined(ARM_HAVE_ATOMIC64) && !defined(I386_HAVE_ATOMIC64) +void +atomic_add_64(volatile uint64_t *target, int64_t delta) +{ + + mtx_lock(&atomic_mtx); + *target += delta; + mtx_unlock(&atomic_mtx); +} + +void +atomic_dec_64(volatile uint64_t *target) +{ + + mtx_lock(&atomic_mtx); + *target -= 1; + mtx_unlock(&atomic_mtx); +} +#endif + +uint64_t +atomic_add_64_nv(volatile uint64_t *target, int64_t delta) +{ + uint64_t newval; + + mtx_lock(&atomic_mtx); + newval = (*target += delta); + mtx_unlock(&atomic_mtx); + return (newval); +} + +#if defined(__powerpc__) || defined(__arm__) || defined(__mips__) +void +atomic_or_8(volatile uint8_t *target, uint8_t value) +{ + mtx_lock(&atomic_mtx); + *target |= value; + mtx_unlock(&atomic_mtx); +} +#endif + +uint8_t +atomic_or_8_nv(volatile uint8_t *target, uint8_t value) +{ + uint8_t newval; + + mtx_lock(&atomic_mtx); + newval = (*target |= value); + mtx_unlock(&atomic_mtx); + return (newval); +} + +uint64_t +atomic_cas_64(volatile uint64_t *target, uint64_t cmp, uint64_t newval) +{ + uint64_t oldval; + + mtx_lock(&atomic_mtx); + oldval = *target; + if (oldval == cmp) + *target = newval; + mtx_unlock(&atomic_mtx); + return (oldval); +} + +uint32_t +atomic_cas_32(volatile uint32_t *target, uint32_t cmp, uint32_t newval) +{ + uint32_t oldval; + + mtx_lock(&atomic_mtx); + oldval = *target; + if (oldval == cmp) + *target = newval; + mtx_unlock(&atomic_mtx); + return (oldval); +} + +void +membar_producer(void) +{ + /* nothing */ +} diff --git a/module/os/freebsd/spl/spl_cmn_err.c b/module/os/freebsd/spl/spl_cmn_err.c new file mode 100644 index 000000000..23566603f --- /dev/null +++ b/module/os/freebsd/spl/spl_cmn_err.c @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2007 John Birrell <[email protected]>. All rights reserved. + * Copyright 2012 Martin Matuska <[email protected]>. All rights reserved. + */ + +#include <sys/cmn_err.h> + +void +vcmn_err(int ce, const char *fmt, va_list adx) +{ + char buf[256]; + const char *prefix; + + prefix = NULL; /* silence unwitty compilers */ + switch (ce) { + case CE_CONT: + prefix = "Solaris(cont): "; + break; + case CE_NOTE: + prefix = "Solaris: NOTICE: "; + break; + case CE_WARN: + prefix = "Solaris: WARNING: "; + break; + case CE_PANIC: + prefix = "Solaris(panic): "; + break; + case CE_IGNORE: + break; + default: + panic("Solaris: unknown severity level"); + } + if (ce == CE_PANIC) { + vsnprintf(buf, sizeof (buf), fmt, adx); + panic("%s%s", prefix, buf); + } + if (ce != CE_IGNORE) { + printf("%s", prefix); + vprintf(fmt, adx); + printf("\n"); + } +} + +void +cmn_err(int type, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vcmn_err(type, fmt, ap); + va_end(ap); +} diff --git a/module/os/freebsd/spl/spl_dtrace.c b/module/os/freebsd/spl/spl_dtrace.c new file mode 100644 index 000000000..e7b2ff823 --- /dev/null +++ b/module/os/freebsd/spl/spl_dtrace.c @@ -0,0 +1,37 @@ +/* + * Copyright 2014 The FreeBSD Project. + * All rights reserved. + * + * This software was developed by Steven Hartland. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/queue.h> +#include <sys/sdt.h> + +/* CSTYLED */ +SDT_PROBE_DEFINE1(sdt, , , set__error, "int"); diff --git a/module/os/freebsd/spl/spl_kmem.c b/module/os/freebsd/spl/spl_kmem.c new file mode 100644 index 000000000..af3747c27 --- /dev/null +++ b/module/os/freebsd/spl/spl_kmem.c @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2006-2007 Pawel Jakub Dawidek <[email protected]> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/byteorder.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/kmem.h> +#include <sys/kmem_cache.h> +#include <sys/debug.h> +#include <sys/mutex.h> +#include <sys/vmmeter.h> + + +#include <vm/vm_page.h> +#include <vm/vm_object.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> + +#ifdef KMEM_DEBUG +#include <sys/queue.h> +#include <sys/stack.h> +#endif + +#ifdef _KERNEL +MALLOC_DEFINE(M_SOLARIS, "solaris", "Solaris"); +#else +#define malloc(size, type, flags) malloc(size) +#define free(addr, type) free(addr) +#endif + +#ifdef KMEM_DEBUG +struct kmem_item { + struct stack stack; + LIST_ENTRY(kmem_item) next; +}; +static LIST_HEAD(, kmem_item) kmem_items; +static struct mtx kmem_items_mtx; +MTX_SYSINIT(kmem_items_mtx, &kmem_items_mtx, "kmem_items", MTX_DEF); +#endif /* KMEM_DEBUG */ + +#include <sys/vmem.h> + +void * +zfs_kmem_alloc(size_t size, int kmflags) +{ + void *p; +#ifdef KMEM_DEBUG + struct kmem_item *i; + + size += sizeof (struct kmem_item); +#endif + p = malloc(MAX(size, 16), M_SOLARIS, kmflags); +#ifndef _KERNEL + if (kmflags & KM_SLEEP) + assert(p != NULL); +#endif +#ifdef KMEM_DEBUG + if (p != NULL) { + i = p; + p = (uint8_t *)p + sizeof (struct kmem_item); + stack_save(&i->stack); + mtx_lock(&kmem_items_mtx); + LIST_INSERT_HEAD(&kmem_items, i, next); + mtx_unlock(&kmem_items_mtx); + } +#endif + return (p); +} + +void +zfs_kmem_free(void *buf, size_t size __unused) +{ +#ifdef KMEM_DEBUG + if (buf == NULL) { + printf("%s: attempt to free NULL\n", __func__); + return; + } + struct kmem_item *i; + + buf = (uint8_t *)buf - sizeof (struct kmem_item); + mtx_lock(&kmem_items_mtx); + LIST_FOREACH(i, &kmem_items, next) { + if (i == buf) + break; + } + ASSERT(i != NULL); + LIST_REMOVE(i, next); + mtx_unlock(&kmem_items_mtx); + memset(buf, 0xDC, MAX(size, 16)); +#endif + free(buf, M_SOLARIS); +} + +static uint64_t kmem_size_val; + +static void +kmem_size_init(void *unused __unused) +{ + + kmem_size_val = (uint64_t)vm_cnt.v_page_count * PAGE_SIZE; + if (kmem_size_val > vm_kmem_size) + kmem_size_val = vm_kmem_size; +} +SYSINIT(kmem_size_init, SI_SUB_KMEM, SI_ORDER_ANY, kmem_size_init, NULL); + +uint64_t +kmem_size(void) +{ + + return (kmem_size_val); +} + +static int +kmem_std_constructor(void *mem, int size __unused, void *private, int flags) +{ + struct kmem_cache *cache = private; + + return (cache->kc_constructor(mem, cache->kc_private, flags)); +} + +static void +kmem_std_destructor(void *mem, int size __unused, void *private) +{ + struct kmem_cache *cache = private; + + cache->kc_destructor(mem, cache->kc_private); +} + +kmem_cache_t * +kmem_cache_create(char *name, size_t bufsize, size_t align, + int (*constructor)(void *, void *, int), void (*destructor)(void *, void *), + void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags) +{ + kmem_cache_t *cache; + + ASSERT(vmp == NULL); + + cache = kmem_alloc(sizeof (*cache), KM_SLEEP); + strlcpy(cache->kc_name, name, sizeof (cache->kc_name)); + cache->kc_constructor = constructor; + cache->kc_destructor = destructor; + cache->kc_private = private; +#if defined(_KERNEL) && !defined(KMEM_DEBUG) + cache->kc_zone = uma_zcreate(cache->kc_name, bufsize, + constructor != NULL ? kmem_std_constructor : NULL, + destructor != NULL ? kmem_std_destructor : NULL, + NULL, NULL, align > 0 ? align - 1 : 0, cflags); +#else + cache->kc_size = bufsize; +#endif + + return (cache); +} + +void +kmem_cache_destroy(kmem_cache_t *cache) +{ +#if defined(_KERNEL) && !defined(KMEM_DEBUG) + uma_zdestroy(cache->kc_zone); +#endif + kmem_free(cache, sizeof (*cache)); +} + +void * +kmem_cache_alloc(kmem_cache_t *cache, int flags) +{ +#if defined(_KERNEL) && !defined(KMEM_DEBUG) + return (uma_zalloc_arg(cache->kc_zone, cache, flags)); +#else + void *p; + + p = kmem_alloc(cache->kc_size, flags); + if (p != NULL && cache->kc_constructor != NULL) + kmem_std_constructor(p, cache->kc_size, cache, flags); + return (p); +#endif +} + +void +kmem_cache_free(kmem_cache_t *cache, void *buf) +{ +#if defined(_KERNEL) && !defined(KMEM_DEBUG) + uma_zfree_arg(cache->kc_zone, buf, cache); +#else + if (cache->kc_destructor != NULL) + kmem_std_destructor(buf, cache->kc_size, cache); + kmem_free(buf, cache->kc_size); +#endif +} + +/* + * Allow our caller to determine if there are running reaps. + * + * This call is very conservative and may return B_TRUE even when + * reaping activity isn't active. If it returns B_FALSE, then reaping + * activity is definitely inactive. + */ +boolean_t +kmem_cache_reap_active(void) +{ + + return (B_FALSE); +} + +/* + * Reap (almost) everything soon. + * + * Note: this does not wait for the reap-tasks to complete. Caller + * should use kmem_cache_reap_active() (above) and/or moderation to + * avoid scheduling too many reap-tasks. + */ +#ifdef _KERNEL +void +kmem_cache_reap_soon(kmem_cache_t *cache) +{ +#ifndef KMEM_DEBUG +#if __FreeBSD_version >= 1300043 + uma_zone_reclaim(cache->kc_zone, UMA_RECLAIM_DRAIN); +#else + zone_drain(cache->kc_zone); +#endif +#endif +} + +void +kmem_reap(void) +{ +#if __FreeBSD_version >= 1300043 + uma_reclaim(UMA_RECLAIM_TRIM); +#else + uma_reclaim(); +#endif +} +#else +void +kmem_cache_reap_soon(kmem_cache_t *cache __unused) +{ +} + +void +kmem_reap(void) +{ +} +#endif + +int +kmem_debugging(void) +{ + return (0); +} + +void * +calloc(size_t n, size_t s) +{ + return (kmem_zalloc(n * s, KM_NOSLEEP)); +} + +char * +kmem_vasprintf(const char *fmt, va_list adx) +{ + char *msg; + va_list adx2; + + va_copy(adx2, adx); + msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP); + (void) vsprintf(msg, fmt, adx2); + va_end(adx2); + + return (msg); +} + +#include <vm/uma.h> +#include <vm/uma_int.h> +#ifdef KMEM_DEBUG +#error "KMEM_DEBUG not currently supported" +#endif + +uint64_t +spl_kmem_cache_inuse(kmem_cache_t *cache) +{ + return (uma_zone_get_cur(cache->kc_zone)); +} + +uint64_t +spl_kmem_cache_entry_size(kmem_cache_t *cache) +{ + return (cache->kc_zone->uz_size); +} + +/* + * Register a move callback for cache defragmentation. + * XXX: Unimplemented but harmless to stub out for now. + */ +void +spl_kmem_cache_set_move(kmem_cache_t *skc, + kmem_cbrc_t (move)(void *, void *, size_t, void *)) +{ + ASSERT(move != NULL); +} + +#ifdef KMEM_DEBUG +void kmem_show(void *); +void +kmem_show(void *dummy __unused) +{ + struct kmem_item *i; + + mtx_lock(&kmem_items_mtx); + if (LIST_EMPTY(&kmem_items)) + printf("KMEM_DEBUG: No leaked elements.\n"); + else { + printf("KMEM_DEBUG: Leaked elements:\n\n"); + LIST_FOREACH(i, &kmem_items, next) { + printf("address=%p\n", i); + stack_print_ddb(&i->stack); + printf("\n"); + } + } + mtx_unlock(&kmem_items_mtx); +} + +SYSUNINIT(sol_kmem, SI_SUB_CPU, SI_ORDER_FIRST, kmem_show, NULL); +#endif /* KMEM_DEBUG */ diff --git a/module/os/freebsd/spl/spl_kstat.c b/module/os/freebsd/spl/spl_kstat.c new file mode 100644 index 000000000..fda03a3d7 --- /dev/null +++ b/module/os/freebsd/spl/spl_kstat.c @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2007 Pawel Jakub Dawidek <[email protected]> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/sysctl.h> +#include <sys/kstat.h> + +static MALLOC_DEFINE(M_KSTAT, "kstat_data", "Kernel statistics"); + +SYSCTL_ROOT_NODE(OID_AUTO, kstat, CTLFLAG_RW, 0, "Kernel statistics"); + +void +__kstat_set_raw_ops(kstat_t *ksp, + int (*headers)(char *buf, size_t size), + int (*data)(char *buf, size_t size, void *data), + void *(*addr)(kstat_t *ksp, loff_t index)) +{ + ksp->ks_raw_ops.headers = headers; + ksp->ks_raw_ops.data = data; + ksp->ks_raw_ops.addr = addr; +} + +static int +kstat_default_update(kstat_t *ksp, int rw) +{ + ASSERT(ksp != NULL); + + if (rw == KSTAT_WRITE) + return (EACCES); + + return (0); +} + +kstat_t * +__kstat_create(const char *module, int instance, const char *name, + const char *class, uchar_t ks_type, uint_t ks_ndata, uchar_t flags) +{ + struct sysctl_oid *root; + kstat_t *ksp; + + KASSERT(instance == 0, ("instance=%d", instance)); + if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO)) + ASSERT(ks_ndata == 1); + + /* + * Allocate the main structure. We don't need to copy module/class/name + * stuff in here, because it is only used for sysctl node creation + * done in this function. + */ + ksp = malloc(sizeof (*ksp), M_KSTAT, M_WAITOK|M_ZERO); + + ksp->ks_crtime = gethrtime(); + ksp->ks_snaptime = ksp->ks_crtime; + ksp->ks_instance = instance; + strncpy(ksp->ks_name, name, KSTAT_STRLEN); + strncpy(ksp->ks_class, class, KSTAT_STRLEN); + ksp->ks_type = ks_type; + ksp->ks_flags = flags; + ksp->ks_update = kstat_default_update; + + switch (ksp->ks_type) { + case KSTAT_TYPE_RAW: + ksp->ks_ndata = 1; + ksp->ks_data_size = ks_ndata; + break; + case KSTAT_TYPE_NAMED: + ksp->ks_ndata = ks_ndata; + ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t); + break; + case KSTAT_TYPE_INTR: + ksp->ks_ndata = ks_ndata; + ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t); + break; + case KSTAT_TYPE_IO: + ksp->ks_ndata = ks_ndata; + ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t); + break; + case KSTAT_TYPE_TIMER: + ksp->ks_ndata = ks_ndata; + ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t); + break; + default: + panic("Undefined kstat type %d\n", ksp->ks_type); + } + + if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) { + ksp->ks_data = NULL; + } else { + ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP); + if (ksp->ks_data == NULL) { + kmem_free(ksp, sizeof (*ksp)); + ksp = NULL; + } + } + /* + * Create sysctl tree for those statistics: + * + * kstat.<module>.<class>.<name>. + */ + sysctl_ctx_init(&ksp->ks_sysctl_ctx); + root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, + SYSCTL_STATIC_CHILDREN(_kstat), OID_AUTO, module, CTLFLAG_RW, 0, + ""); + if (root == NULL) { + printf("%s: Cannot create kstat.%s tree!\n", __func__, module); + sysctl_ctx_free(&ksp->ks_sysctl_ctx); + free(ksp, M_KSTAT); + return (NULL); + } + root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root), + OID_AUTO, class, CTLFLAG_RW, 0, ""); + if (root == NULL) { + printf("%s: Cannot create kstat.%s.%s tree!\n", __func__, + module, class); + sysctl_ctx_free(&ksp->ks_sysctl_ctx); + free(ksp, M_KSTAT); + return (NULL); + } + root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root), + OID_AUTO, name, CTLFLAG_RW, 0, ""); + if (root == NULL) { + printf("%s: Cannot create kstat.%s.%s.%s tree!\n", __func__, + module, class, name); + sysctl_ctx_free(&ksp->ks_sysctl_ctx); + free(ksp, M_KSTAT); + return (NULL); + } + ksp->ks_sysctl_root = root; + + return (ksp); +} + +static int +kstat_sysctl(SYSCTL_HANDLER_ARGS) +{ + kstat_named_t *ksent = arg1; + uint64_t val; + + val = ksent->value.ui64; + return (sysctl_handle_64(oidp, &val, 0, req)); +} + +void +kstat_install(kstat_t *ksp) +{ + kstat_named_t *ksent; + char *namelast; + int typelast; + + ksent = ksp->ks_data; + if (ksp->ks_ndata == UINT32_MAX) { +#ifdef INVARIANTS + printf("can't handle raw ops yet!!!\n"); +#endif + return; + } + if (ksent == NULL) { + printf("%s ksp->ks_data == NULL!!!!\n", __func__); + return; + } + typelast = 0; + namelast = NULL; + for (int i = 0; i < ksp->ks_ndata; i++, ksent++) { + if (ksent->data_type != 0) { + typelast = ksent->data_type; + namelast = ksent->name; + } + switch (typelast) { + case KSTAT_DATA_INT32: + SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, + SYSCTL_CHILDREN(ksp->ks_sysctl_root), + OID_AUTO, namelast, + CTLTYPE_S32 | CTLFLAG_RD, ksent, + sizeof (*ksent), kstat_sysctl, "I", + namelast); + break; + case KSTAT_DATA_UINT32: + SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, + SYSCTL_CHILDREN(ksp->ks_sysctl_root), + OID_AUTO, namelast, + CTLTYPE_U32 | CTLFLAG_RD, ksent, + sizeof (*ksent), kstat_sysctl, "IU", + namelast); + break; + case KSTAT_DATA_INT64: + SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, + SYSCTL_CHILDREN(ksp->ks_sysctl_root), + OID_AUTO, namelast, + CTLTYPE_S64 | CTLFLAG_RD, ksent, + sizeof (*ksent), kstat_sysctl, "Q", + namelast); + break; + case KSTAT_DATA_UINT64: + SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, + SYSCTL_CHILDREN(ksp->ks_sysctl_root), + OID_AUTO, namelast, + CTLTYPE_U64 | CTLFLAG_RD, ksent, + sizeof (*ksent), kstat_sysctl, "QU", + namelast); + break; + default: + panic("unsupported type: %d", typelast); + } + + } +} + +void +kstat_delete(kstat_t *ksp) +{ + + sysctl_ctx_free(&ksp->ks_sysctl_ctx); + free(ksp, M_KSTAT); +} + +void +kstat_set_string(char *dst, const char *src) +{ + + bzero(dst, KSTAT_STRLEN); + (void) strncpy(dst, src, KSTAT_STRLEN - 1); +} + +void +kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type) +{ + + kstat_set_string(knp->name, name); + knp->data_type = data_type; +} + +void +kstat_waitq_enter(kstat_io_t *kiop) +{ + hrtime_t new, delta; + ulong_t wcnt; + + new = gethrtime(); + delta = new - kiop->wlastupdate; + kiop->wlastupdate = new; + wcnt = kiop->wcnt++; + if (wcnt != 0) { + kiop->wlentime += delta * wcnt; + kiop->wtime += delta; + } +} + +void +kstat_waitq_exit(kstat_io_t *kiop) +{ + hrtime_t new, delta; + ulong_t wcnt; + + new = gethrtime(); + delta = new - kiop->wlastupdate; + kiop->wlastupdate = new; + wcnt = kiop->wcnt--; + ASSERT((int)wcnt > 0); + kiop->wlentime += delta * wcnt; + kiop->wtime += delta; +} + +void +kstat_runq_enter(kstat_io_t *kiop) +{ + hrtime_t new, delta; + ulong_t rcnt; + + new = gethrtime(); + delta = new - kiop->rlastupdate; + kiop->rlastupdate = new; + rcnt = kiop->rcnt++; + if (rcnt != 0) { + kiop->rlentime += delta * rcnt; + kiop->rtime += delta; + } +} + +void +kstat_runq_exit(kstat_io_t *kiop) +{ + hrtime_t new, delta; + ulong_t rcnt; + + new = gethrtime(); + delta = new - kiop->rlastupdate; + kiop->rlastupdate = new; + rcnt = kiop->rcnt--; + ASSERT((int)rcnt > 0); + kiop->rlentime += delta * rcnt; + kiop->rtime += delta; +} diff --git a/module/os/freebsd/spl/spl_misc.c b/module/os/freebsd/spl/spl_misc.c new file mode 100644 index 000000000..ab4702574 --- /dev/null +++ b/module/os/freebsd/spl/spl_misc.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2007 Pawel Jakub Dawidek <[email protected]> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/jail.h> +#include <sys/kernel.h> +#include <sys/libkern.h> +#include <sys/limits.h> +#include <sys/misc.h> +#include <sys/sysctl.h> + +#include <sys/zfs_context.h> + +char hw_serial[11] = "0"; + +static struct opensolaris_utsname hw_utsname = { + .machine = MACHINE +}; + +static void +opensolaris_utsname_init(void *arg) +{ + + hw_utsname.sysname = ostype; + hw_utsname.nodename = prison0.pr_hostname; + hw_utsname.release = osrelease; + snprintf(hw_utsname.version, sizeof (hw_utsname.version), + "%d", osreldate); +} + +char * +kmem_strdup(const char *s) +{ + char *buf; + + buf = kmem_alloc(strlen(s) + 1, KM_SLEEP); + strcpy(buf, s); + return (buf); +} + +int +ddi_copyin(const void *from, void *to, size_t len, int flags) +{ + /* Fake ioctl() issued by kernel, 'from' is a kernel address */ + if (flags & FKIOCTL) { + memcpy(to, from, len); + return (0); + } + + return (copyin(from, to, len)); +} + +int +ddi_copyout(const void *from, void *to, size_t len, int flags) +{ + /* Fake ioctl() issued by kernel, 'from' is a kernel address */ + if (flags & FKIOCTL) { + memcpy(to, from, len); + return (0); + } + + return (copyout(from, to, len)); +} + +int +spl_panic(const char *file, const char *func, int line, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vpanic(fmt, ap); + va_end(ap); +} + +utsname_t * +utsname(void) +{ + return (&hw_utsname); +} +SYSINIT(opensolaris_utsname_init, SI_SUB_TUNABLES, SI_ORDER_ANY, + opensolaris_utsname_init, NULL); diff --git a/module/os/freebsd/spl/spl_policy.c b/module/os/freebsd/spl/spl_policy.c new file mode 100644 index 000000000..53732836f --- /dev/null +++ b/module/os/freebsd/spl/spl_policy.c @@ -0,0 +1,429 @@ +/* + * Copyright (c) 2007 Pawel Jakub Dawidek <[email protected]> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/priv.h> +#include <sys/vnode.h> +#include <sys/mntent.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/jail.h> +#include <sys/policy.h> +#include <sys/zfs_vfsops.h> + + +int +secpolicy_nfs(cred_t *cr) +{ + + return (spl_priv_check_cred(cr, PRIV_NFS_DAEMON)); +} + +int +secpolicy_zfs(cred_t *cr) +{ + + return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT)); +} + +int +secpolicy_sys_config(cred_t *cr, int checkonly __unused) +{ + + return (spl_priv_check_cred(cr, PRIV_ZFS_POOL_CONFIG)); +} + +int +secpolicy_zinject(cred_t *cr) +{ + + return (spl_priv_check_cred(cr, PRIV_ZFS_INJECT)); +} + +int +secpolicy_fs_unmount(cred_t *cr, struct mount *vfsp __unused) +{ + + return (spl_priv_check_cred(cr, PRIV_VFS_UNMOUNT)); +} + +int +secpolicy_fs_owner(struct mount *mp, cred_t *cr) +{ + + if (zfs_super_owner) { + if (cr->cr_uid == mp->mnt_cred->cr_uid && + cr->cr_prison == mp->mnt_cred->cr_prison) { + return (0); + } + } + return (EPERM); +} + +/* + * This check is done in kern_link(), so we could just return 0 here. + */ +extern int hardlink_check_uid; +int +secpolicy_basic_link(vnode_t *vp, cred_t *cr) +{ + + if (!hardlink_check_uid) + return (0); + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return (0); + return (spl_priv_check_cred(cr, PRIV_VFS_LINK)); +} + +int +secpolicy_vnode_stky_modify(cred_t *cr) +{ + + return (EPERM); +} + +int +secpolicy_vnode_remove(vnode_t *vp, cred_t *cr) +{ + + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return (0); + return (spl_priv_check_cred(cr, PRIV_VFS_ADMIN)); +} + +int +secpolicy_vnode_access(cred_t *cr, vnode_t *vp, uid_t owner, accmode_t accmode) +{ + + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return (0); + + if ((accmode & VREAD) && spl_priv_check_cred(cr, PRIV_VFS_READ) != 0) + return (EACCES); + if ((accmode & VWRITE) && + spl_priv_check_cred(cr, PRIV_VFS_WRITE) != 0) { + return (EACCES); + } + if (accmode & VEXEC) { + if (vp->v_type == VDIR) { + if (spl_priv_check_cred(cr, PRIV_VFS_LOOKUP) != 0) + return (EACCES); + } else { + if (spl_priv_check_cred(cr, PRIV_VFS_EXEC) != 0) + return (EACCES); + } + } + return (0); +} + +/* + * Like secpolicy_vnode_access() but we get the actual wanted mode and the + * current mode of the file, not the missing bits. + */ +int +secpolicy_vnode_access2(cred_t *cr, vnode_t *vp, uid_t owner, + accmode_t curmode, accmode_t wantmode) +{ + accmode_t mode; + + mode = ~curmode & wantmode; + + if (mode == 0) + return (0); + + return (secpolicy_vnode_access(cr, vp, owner, mode)); +} + +int +secpolicy_vnode_any_access(cred_t *cr, vnode_t *vp, uid_t owner) +{ + static int privs[] = { + PRIV_VFS_ADMIN, + PRIV_VFS_READ, + PRIV_VFS_WRITE, + PRIV_VFS_EXEC, + PRIV_VFS_LOOKUP + }; + int i; + + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return (0); + + /* Same as secpolicy_vnode_setdac */ + if (owner == cr->cr_uid) + return (0); + + for (i = 0; i < sizeof (privs)/sizeof (int); i++) { + int priv; + + switch (priv = privs[i]) { + case PRIV_VFS_EXEC: + if (vp->v_type == VDIR) + continue; + break; + case PRIV_VFS_LOOKUP: + if (vp->v_type != VDIR) + continue; + break; + } + if (spl_priv_check_cred(cr, priv) == 0) + return (0); + } + return (EPERM); +} + +int +secpolicy_vnode_setdac(vnode_t *vp, cred_t *cr, uid_t owner) +{ + + if (owner == cr->cr_uid) + return (0); + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return (0); + return (spl_priv_check_cred(cr, PRIV_VFS_ADMIN)); +} + +int +secpolicy_vnode_setattr(cred_t *cr, vnode_t *vp, struct vattr *vap, + const struct vattr *ovap, int flags, + int unlocked_access(void *, int, cred_t *), void *node) +{ + int mask = vap->va_mask; + int error; + + if (mask & AT_SIZE) { + if (vp->v_type == VDIR) + return (EISDIR); + error = unlocked_access(node, VWRITE, cr); + if (error) + return (error); + } + if (mask & AT_MODE) { + /* + * If not the owner of the file then check privilege + * for two things: the privilege to set the mode at all + * and, if we're setting setuid, we also need permissions + * to add the set-uid bit, if we're not the owner. + * In the specific case of creating a set-uid root + * file, we need even more permissions. + */ + error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid); + if (error) + return (error); + error = secpolicy_setid_setsticky_clear(vp, vap, ovap, cr); + if (error) + return (error); + } else { + vap->va_mode = ovap->va_mode; + } + if (mask & (AT_UID | AT_GID)) { + error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid); + if (error) + return (error); + + /* + * To change the owner of a file, or change the group of + * a file to a group of which we are not a member, the + * caller must have privilege. + */ + if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) || + ((mask & AT_GID) && vap->va_gid != ovap->va_gid && + !groupmember(vap->va_gid, cr))) { + if (secpolicy_fs_owner(vp->v_mount, cr) != 0) { + error = spl_priv_check_cred(cr, PRIV_VFS_CHOWN); + if (error) + return (error); + } + } + + if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) || + ((mask & AT_GID) && vap->va_gid != ovap->va_gid)) { + secpolicy_setid_clear(vap, vp, cr); + } + } + if (mask & (AT_ATIME | AT_MTIME)) { + /* + * From utimes(2): + * If times is NULL, ... The caller must be the owner of + * the file, have permission to write the file, or be the + * super-user. + * If times is non-NULL, ... The caller must be the owner of + * the file or be the super-user. + */ + error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid); + if (error && (vap->va_vaflags & VA_UTIMES_NULL)) + error = unlocked_access(node, VWRITE, cr); + if (error) + return (error); + } + return (0); +} + +int +secpolicy_vnode_create_gid(cred_t *cr) +{ + + return (EPERM); +} + +int +secpolicy_vnode_setids_setgids(vnode_t *vp, cred_t *cr, gid_t gid) +{ + + if (groupmember(gid, cr)) + return (0); + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return (0); + return (spl_priv_check_cred(cr, PRIV_VFS_SETGID)); +} + +int +secpolicy_vnode_setid_retain(vnode_t *vp, cred_t *cr, + boolean_t issuidroot __unused) +{ + + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return (0); + return (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID)); +} + +void +secpolicy_setid_clear(struct vattr *vap, vnode_t *vp, cred_t *cr) +{ + + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return; + + if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0) { + if (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID)) { + vap->va_mask |= AT_MODE; + vap->va_mode &= ~(S_ISUID|S_ISGID); + } + } +} + +int +secpolicy_setid_setsticky_clear(vnode_t *vp, struct vattr *vap, + const struct vattr *ovap, cred_t *cr) +{ + int error; + + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return (0); + + /* + * Privileged processes may set the sticky bit on non-directories, + * as well as set the setgid bit on a file with a group that the process + * is not a member of. Both of these are allowed in jail(8). + */ + if (vp->v_type != VDIR && (vap->va_mode & S_ISTXT)) { + if (spl_priv_check_cred(cr, PRIV_VFS_STICKYFILE)) + return (EFTYPE); + } + /* + * Check for privilege if attempting to set the + * group-id bit. + */ + if ((vap->va_mode & S_ISGID) != 0) { + error = secpolicy_vnode_setids_setgids(vp, cr, ovap->va_gid); + if (error) + return (error); + } + /* + * Deny setting setuid if we are not the file owner. + */ + if ((vap->va_mode & S_ISUID) && ovap->va_uid != cr->cr_uid) { + error = spl_priv_check_cred(cr, PRIV_VFS_ADMIN); + if (error) + return (error); + } + return (0); +} + +int +secpolicy_fs_mount(cred_t *cr, vnode_t *mvp, struct mount *vfsp) +{ + + return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT)); +} + +int +secpolicy_vnode_owner(vnode_t *vp, cred_t *cr, uid_t owner) +{ + + if (owner == cr->cr_uid) + return (0); + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return (0); + + /* XXX: vfs_suser()? */ + return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT_OWNER)); +} + +int +secpolicy_vnode_chown(vnode_t *vp, cred_t *cr, uid_t owner) +{ + + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return (0); + return (spl_priv_check_cred(cr, PRIV_VFS_CHOWN)); +} + +void +secpolicy_fs_mount_clearopts(cred_t *cr, struct mount *vfsp) +{ + + if (spl_priv_check_cred(cr, PRIV_VFS_MOUNT_NONUSER) != 0) { + MNT_ILOCK(vfsp); + vfsp->vfs_flag |= VFS_NOSETUID | MNT_USER; + vfs_clearmntopt(vfsp, MNTOPT_SETUID); + vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 0); + MNT_IUNLOCK(vfsp); + } +} + +/* + * Check privileges for setting xvattr attributes + */ +int +secpolicy_xvattr(vnode_t *vp, xvattr_t *xvap, uid_t owner, cred_t *cr, + vtype_t vtype) +{ + + if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + return (0); + return (spl_priv_check_cred(cr, PRIV_VFS_SYSFLAGS)); +} + +int +secpolicy_smb(cred_t *cr) +{ + + return (spl_priv_check_cred(cr, PRIV_NETSMB)); +} diff --git a/module/os/freebsd/spl/spl_procfs_list.c b/module/os/freebsd/spl/spl_procfs_list.c new file mode 100644 index 000000000..7b4ae9d0e --- /dev/null +++ b/module/os/freebsd/spl/spl_procfs_list.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2020 iXsystems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/list.h> +#include <sys/mutex.h> +#include <sys/procfs_list.h> + +void +seq_printf(struct seq_file *m, const char *fmt, ...) +{} + +void +procfs_list_install(const char *module, + const char *name, + mode_t mode, + procfs_list_t *procfs_list, + int (*show)(struct seq_file *f, void *p), + int (*show_header)(struct seq_file *f), + int (*clear)(procfs_list_t *procfs_list), + size_t procfs_list_node_off) +{ + mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&procfs_list->pl_list, + procfs_list_node_off + sizeof (procfs_list_node_t), + procfs_list_node_off + offsetof(procfs_list_node_t, pln_link)); + procfs_list->pl_next_id = 1; + procfs_list->pl_node_offset = procfs_list_node_off; +} + +void +procfs_list_uninstall(procfs_list_t *procfs_list) +{} + +void +procfs_list_destroy(procfs_list_t *procfs_list) +{ + ASSERT(list_is_empty(&procfs_list->pl_list)); + list_destroy(&procfs_list->pl_list); + mutex_destroy(&procfs_list->pl_lock); +} + +#define NODE_ID(procfs_list, obj) \ + (((procfs_list_node_t *)(((char *)obj) + \ + (procfs_list)->pl_node_offset))->pln_id) + +void +procfs_list_add(procfs_list_t *procfs_list, void *p) +{ + ASSERT(MUTEX_HELD(&procfs_list->pl_lock)); + NODE_ID(procfs_list, p) = procfs_list->pl_next_id++; + list_insert_tail(&procfs_list->pl_list, p); +} diff --git a/module/os/freebsd/spl/spl_string.c b/module/os/freebsd/spl/spl_string.c new file mode 100644 index 000000000..14d816b5c --- /dev/null +++ b/module/os/freebsd/spl/spl_string.c @@ -0,0 +1,106 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/param.h> +#include <sys/string.h> +#include <sys/kmem.h> +#include <machine/stdarg.h> + +#define IS_DIGIT(c) ((c) >= '0' && (c) <= '9') + +#define IS_ALPHA(c) \ + (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) + +char * +strpbrk(const char *s, const char *b) +{ + const char *p; + + do { + for (p = b; *p != '\0' && *p != *s; ++p) + ; + if (*p != '\0') + return ((char *)s); + } while (*s++); + + return (NULL); +} + +/* + * Convert a string into a valid C identifier by replacing invalid + * characters with '_'. Also makes sure the string is nul-terminated + * and takes up at most n bytes. + */ +void +strident_canon(char *s, size_t n) +{ + char c; + char *end = s + n - 1; + + if ((c = *s) == 0) + return; + + if (!IS_ALPHA(c) && c != '_') + *s = '_'; + + while (s < end && ((c = *(++s)) != 0)) { + if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_') + *s = '_'; + } + *s = 0; +} + +/* + * Do not change the length of the returned string; it must be freed + * with strfree(). + */ +char * +kmem_asprintf(const char *fmt, ...) +{ + int size; + va_list adx; + char *buf; + + va_start(adx, fmt); + size = vsnprintf(NULL, 0, fmt, adx) + 1; + va_end(adx); + + buf = kmem_alloc(size, KM_SLEEP); + + va_start(adx, fmt); + (void) vsnprintf(buf, size, fmt, adx); + va_end(adx); + + return (buf); +} + +void +kmem_strfree(char *str) +{ + ASSERT(str != NULL); + kmem_free(str, strlen(str) + 1); +} diff --git a/module/os/freebsd/spl/spl_sunddi.c b/module/os/freebsd/spl/spl_sunddi.c new file mode 100644 index 000000000..1fa4f56f1 --- /dev/null +++ b/module/os/freebsd/spl/spl_sunddi.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2010 Pawel Jakub Dawidek <[email protected]> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/jail.h> +#include <sys/kernel.h> +#include <sys/libkern.h> +#include <sys/limits.h> +#include <sys/misc.h> +#include <sys/sunddi.h> +#include <sys/sysctl.h> + +int +ddi_strtol(const char *str, char **nptr, int base, long *result) +{ + + *result = strtol(str, nptr, base); + return (0); +} + +int +ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result) +{ + + if (str == hw_serial) { + *result = prison0.pr_hostid; + return (0); + } + + *result = strtoul(str, nptr, base); + return (0); +} + +int +ddi_strtoull(const char *str, char **nptr, int base, unsigned long long *result) +{ + + *result = (unsigned long long)strtouq(str, nptr, base); + return (0); +} + +int +ddi_strtoll(const char *str, char **nptr, int base, long long *result) +{ + + *result = (long long)strtoq(str, nptr, base); + return (0); +} diff --git a/module/os/freebsd/spl/spl_sysevent.c b/module/os/freebsd/spl/spl_sysevent.c new file mode 100644 index 000000000..d3748276a --- /dev/null +++ b/module/os/freebsd/spl/spl_sysevent.c @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2010 Pawel Jakub Dawidek <[email protected]> + * Copyright (c) 2020 iXsystems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/kmem.h> +#include <sys/sbuf.h> +#include <sys/nvpair.h> +#include <sys/sunddi.h> +#include <sys/sysevent.h> +#include <sys/fm/protocol.h> +#include <sys/fm/util.h> +#include <sys/bus.h> + +static int +log_sysevent(nvlist_t *event) +{ + struct sbuf *sb; + const char *type; + char typestr[128]; + nvpair_t *elem = NULL; + + sb = sbuf_new_auto(); + if (sb == NULL) + return (ENOMEM); + type = NULL; + + while ((elem = nvlist_next_nvpair(event, elem)) != NULL) { + switch (nvpair_type(elem)) { + case DATA_TYPE_BOOLEAN: + { + boolean_t value; + + (void) nvpair_value_boolean_value(elem, &value); + sbuf_printf(sb, " %s=%s", nvpair_name(elem), + value ? "true" : "false"); + break; + } + case DATA_TYPE_UINT8: + { + uint8_t value; + + (void) nvpair_value_uint8(elem, &value); + sbuf_printf(sb, " %s=%hhu", nvpair_name(elem), value); + break; + } + case DATA_TYPE_INT32: + { + int32_t value; + + (void) nvpair_value_int32(elem, &value); + sbuf_printf(sb, " %s=%jd", nvpair_name(elem), + (intmax_t)value); + break; + } + case DATA_TYPE_UINT32: + { + uint32_t value; + + (void) nvpair_value_uint32(elem, &value); + sbuf_printf(sb, " %s=%ju", nvpair_name(elem), + (uintmax_t)value); + break; + } + case DATA_TYPE_INT64: + { + int64_t value; + + (void) nvpair_value_int64(elem, &value); + sbuf_printf(sb, " %s=%jd", nvpair_name(elem), + (intmax_t)value); + break; + } + case DATA_TYPE_UINT64: + { + uint64_t value; + + (void) nvpair_value_uint64(elem, &value); + sbuf_printf(sb, " %s=%ju", nvpair_name(elem), + (uintmax_t)value); + break; + } + case DATA_TYPE_STRING: + { + char *value; + + (void) nvpair_value_string(elem, &value); + sbuf_printf(sb, " %s=%s", nvpair_name(elem), value); + if (strcmp(FM_CLASS, nvpair_name(elem)) == 0) + type = value; + break; + } + case DATA_TYPE_UINT8_ARRAY: + { + uint8_t *value; + uint_t ii, nelem; + + (void) nvpair_value_uint8_array(elem, &value, &nelem); + sbuf_printf(sb, " %s=", nvpair_name(elem)); + for (ii = 0; ii < nelem; ii++) + sbuf_printf(sb, "%02hhx", value[ii]); + break; + } + case DATA_TYPE_UINT16_ARRAY: + { + uint16_t *value; + uint_t ii, nelem; + + (void) nvpair_value_uint16_array(elem, &value, &nelem); + sbuf_printf(sb, " %s=", nvpair_name(elem)); + for (ii = 0; ii < nelem; ii++) + sbuf_printf(sb, "%04hx", value[ii]); + break; + } + case DATA_TYPE_UINT32_ARRAY: + { + uint32_t *value; + uint_t ii, nelem; + + (void) nvpair_value_uint32_array(elem, &value, &nelem); + sbuf_printf(sb, " %s=", nvpair_name(elem)); + for (ii = 0; ii < nelem; ii++) + sbuf_printf(sb, "%08jx", (uintmax_t)value[ii]); + break; + } + case DATA_TYPE_INT64_ARRAY: + { + int64_t *value; + uint_t ii, nelem; + + (void) nvpair_value_int64_array(elem, &value, &nelem); + sbuf_printf(sb, " %s=", nvpair_name(elem)); + for (ii = 0; ii < nelem; ii++) + sbuf_printf(sb, "%016lld", + (long long)value[ii]); + break; + } + case DATA_TYPE_UINT64_ARRAY: + { + uint64_t *value; + uint_t ii, nelem; + + (void) nvpair_value_uint64_array(elem, &value, &nelem); + sbuf_printf(sb, " %s=", nvpair_name(elem)); + for (ii = 0; ii < nelem; ii++) + sbuf_printf(sb, "%016jx", (uintmax_t)value[ii]); + break; + } + case DATA_TYPE_STRING_ARRAY: + { + char **strarr; + uint_t ii, nelem; + + (void) nvpair_value_string_array(elem, &strarr, &nelem); + + for (ii = 0; ii < nelem; ii++) { + if (strarr[ii] == NULL) { + sbuf_printf(sb, " <NULL>"); + continue; + } + + sbuf_printf(sb, " %s", strarr[ii]); + if (strcmp(FM_CLASS, strarr[ii]) == 0) + type = strarr[ii]; + } + break; + } + case DATA_TYPE_NVLIST: + /* XXX - requires recursing in log_sysevent */ + break; + default: + printf("%s: type %d is not implemented\n", __func__, + nvpair_type(elem)); + break; + } + } + + if (sbuf_finish(sb) != 0) { + sbuf_delete(sb); + return (ENOMEM); + } + + if (type == NULL) + type = ""; + if (strncmp(type, "ESC_ZFS_", 8) == 0) { + snprintf(typestr, sizeof (typestr), "misc.fs.zfs.%s", type + 8); + type = typestr; + } + devctl_notify("ZFS", "ZFS", type, sbuf_data(sb)); + sbuf_delete(sb); + + return (0); +} + +static void +sysevent_worker(void *arg __unused) +{ + zfs_zevent_t *ze; + nvlist_t *event; + uint64_t dropped = 0; + uint64_t dst_size; + int error; + + zfs_zevent_init(&ze); + for (;;) { + dst_size = 131072; + dropped = 0; + event = NULL; + error = zfs_zevent_next(ze, &event, + &dst_size, &dropped); + if (error) { + error = zfs_zevent_wait(ze); + if (error == ESHUTDOWN) + break; + } else { + VERIFY(event != NULL); + log_sysevent(event); + nvlist_free(event); + } + } + zfs_zevent_destroy(ze); + kthread_exit(); +} + +void +ddi_sysevent_init(void) +{ + kproc_kthread_add(sysevent_worker, NULL, &zfsproc, NULL, 0, 0, + "zfskern", "sysevent"); +} diff --git a/module/os/freebsd/spl/spl_taskq.c b/module/os/freebsd/spl/spl_taskq.c new file mode 100644 index 000000000..b6a501f67 --- /dev/null +++ b/module/os/freebsd/spl/spl_taskq.c @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2009 Pawel Jakub Dawidek <[email protected]> + * All rights reserved. + * + * Copyright (c) 2012 Spectra Logic Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/kmem.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/taskq.h> +#include <sys/zfs_context.h> + +#include <vm/uma.h> + +static uint_t taskq_tsd; +static uma_zone_t taskq_zone; + +taskq_t *system_taskq = NULL; +taskq_t *system_delay_taskq = NULL; +taskq_t *dynamic_taskq = NULL; + +extern int uma_align_cache; + +#define TQ_MASK uma_align_cache +#define TQ_PTR_MASK ~uma_align_cache + +#define TIMEOUT_TASK 1 +#define NORMAL_TASK 2 + +static int +taskqent_init(void *mem, int size, int flags) +{ + bzero(mem, sizeof (taskq_ent_t)); + return (0); +} + +static int +taskqent_ctor(void *mem, int size, void *arg, int flags) +{ + return (0); +} + +static void +taskqent_dtor(void *mem, int size, void *arg) +{ + taskq_ent_t *ent = mem; + + ent->tqent_gen = (ent->tqent_gen + 1) & TQ_MASK; +} + +static void +system_taskq_init(void *arg) +{ + + tsd_create(&taskq_tsd, NULL); + taskq_zone = uma_zcreate("taskq_zone", sizeof (taskq_ent_t), + taskqent_ctor, taskqent_dtor, taskqent_init, NULL, + UMA_ALIGN_CACHE, UMA_ZONE_NOFREE); + system_taskq = taskq_create("system_taskq", mp_ncpus, minclsyspri, + 0, 0, 0); + system_delay_taskq = taskq_create("system_delay_taskq", mp_ncpus, + minclsyspri, 0, 0, 0); +} +SYSINIT(system_taskq_init, SI_SUB_CONFIGURE, SI_ORDER_ANY, system_taskq_init, + NULL); + +static void +system_taskq_fini(void *arg) +{ + + taskq_destroy(system_taskq); + uma_zdestroy(taskq_zone); + tsd_destroy(&taskq_tsd); +} +SYSUNINIT(system_taskq_fini, SI_SUB_CONFIGURE, SI_ORDER_ANY, system_taskq_fini, + NULL); + +static void +taskq_tsd_set(void *context) +{ + taskq_t *tq = context; + + tsd_set(taskq_tsd, tq); +} + +static taskq_t * +taskq_create_with_init(const char *name, int nthreads, pri_t pri, + int minalloc __unused, int maxalloc __unused, uint_t flags) +{ + taskq_t *tq; + + if ((flags & TASKQ_THREADS_CPU_PCT) != 0) + nthreads = MAX((mp_ncpus * nthreads) / 100, 1); + + tq = kmem_alloc(sizeof (*tq), KM_SLEEP); + tq->tq_queue = taskqueue_create(name, M_WAITOK, + taskqueue_thread_enqueue, &tq->tq_queue); + taskqueue_set_callback(tq->tq_queue, TASKQUEUE_CALLBACK_TYPE_INIT, + taskq_tsd_set, tq); + taskqueue_set_callback(tq->tq_queue, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN, + taskq_tsd_set, NULL); + (void) taskqueue_start_threads(&tq->tq_queue, nthreads, pri, + "%s", name); + + return ((taskq_t *)tq); +} + +taskq_t * +taskq_create(const char *name, int nthreads, pri_t pri, int minalloc __unused, + int maxalloc __unused, uint_t flags) +{ + + return (taskq_create_with_init(name, nthreads, pri, minalloc, maxalloc, + flags)); +} + +taskq_t * +taskq_create_proc(const char *name, int nthreads, pri_t pri, int minalloc, + int maxalloc, proc_t *proc __unused, uint_t flags) +{ + + return (taskq_create_with_init(name, nthreads, pri, minalloc, maxalloc, + flags)); +} + +void +taskq_destroy(taskq_t *tq) +{ + + taskqueue_free(tq->tq_queue); + kmem_free(tq, sizeof (*tq)); +} + +int +taskq_member(taskq_t *tq, kthread_t *thread) +{ + + return (taskqueue_member(tq->tq_queue, thread)); +} + +taskq_t * +taskq_of_curthread(void) +{ + return (tsd_get(taskq_tsd)); +} + +int +taskq_cancel_id(taskq_t *tq, taskqid_t tid) +{ + uint32_t pend; + int rc; + taskq_ent_t *ent = (void*)(tid & TQ_PTR_MASK); + + if (ent == NULL) + return (0); + if ((tid & TQ_MASK) != ent->tqent_gen) + return (0); + if (ent->tqent_type == TIMEOUT_TASK) { + rc = taskqueue_cancel_timeout(tq->tq_queue, + &ent->tqent_timeout_task, &pend); + } else + rc = taskqueue_cancel(tq->tq_queue, &ent->tqent_task, &pend); + if (rc == EBUSY) + taskq_wait_id(tq, tid); + else + uma_zfree(taskq_zone, ent); + return (rc); +} + +static void +taskq_run(void *arg, int pending __unused) +{ + taskq_ent_t *task = arg; + + task->tqent_func(task->tqent_arg); + uma_zfree(taskq_zone, task); +} + +taskqid_t +taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg, + uint_t flags, clock_t expire_time) +{ + taskq_ent_t *task; + taskqid_t tid; + clock_t timo; + int mflag; + + timo = expire_time - ddi_get_lbolt(); + if (timo <= 0) + return (taskq_dispatch(tq, func, arg, flags)); + + if ((flags & (TQ_SLEEP | TQ_NOQUEUE)) == TQ_SLEEP) + mflag = M_WAITOK; + else + mflag = M_NOWAIT; + + task = uma_zalloc(taskq_zone, mflag); + if (task == NULL) + return (0); + tid = (uintptr_t)task; + MPASS((tid & TQ_MASK) == 0); + task->tqent_func = func; + task->tqent_arg = arg; + task->tqent_type = TIMEOUT_TASK; + tid |= task->tqent_gen; + TIMEOUT_TASK_INIT(tq->tq_queue, &task->tqent_timeout_task, 0, + taskq_run, task); + + taskqueue_enqueue_timeout(tq->tq_queue, &task->tqent_timeout_task, + timo); + return (tid); +} + +taskqid_t +taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) +{ + taskq_ent_t *task; + int mflag, prio; + taskqid_t tid; + + if ((flags & (TQ_SLEEP | TQ_NOQUEUE)) == TQ_SLEEP) + mflag = M_WAITOK; + else + mflag = M_NOWAIT; + /* + * If TQ_FRONT is given, we want higher priority for this task, so it + * can go at the front of the queue. + */ + prio = !!(flags & TQ_FRONT); + + task = uma_zalloc(taskq_zone, mflag); + if (task == NULL) + return (0); + + tid = (uintptr_t)task; + MPASS((tid & TQ_MASK) == 0); + task->tqent_func = func; + task->tqent_arg = arg; + task->tqent_type = NORMAL_TASK; + TASK_INIT(&task->tqent_task, prio, taskq_run, task); + tid |= task->tqent_gen; + taskqueue_enqueue(tq->tq_queue, &task->tqent_task); + return (tid); +} + +static void +taskq_run_ent(void *arg, int pending __unused) +{ + taskq_ent_t *task = arg; + + task->tqent_func(task->tqent_arg); +} + +void +taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint32_t flags, + taskq_ent_t *task) +{ + int prio; + + /* + * If TQ_FRONT is given, we want higher priority for this task, so it + * can go at the front of the queue. + */ + prio = !!(flags & TQ_FRONT); + + task->tqent_func = func; + task->tqent_arg = arg; + + TASK_INIT(&task->tqent_task, prio, taskq_run_ent, task); + taskqueue_enqueue(tq->tq_queue, &task->tqent_task); +} + +void +taskq_wait(taskq_t *tq) +{ + taskqueue_quiesce(tq->tq_queue); +} + +void +taskq_wait_id(taskq_t *tq, taskqid_t tid) +{ + taskq_ent_t *ent = (void*)(tid & TQ_PTR_MASK); + + if ((tid & TQ_MASK) != ent->tqent_gen) + return; + + taskqueue_drain(tq->tq_queue, &ent->tqent_task); +} + +void +taskq_wait_outstanding(taskq_t *tq, taskqid_t id __unused) +{ + taskqueue_drain_all(tq->tq_queue); +} + +int +taskq_empty_ent(taskq_ent_t *t) +{ + return (t->tqent_task.ta_pending == 0); +} diff --git a/module/os/freebsd/spl/spl_uio.c b/module/os/freebsd/spl/spl_uio.c new file mode 100644 index 000000000..05dbfd06d --- /dev/null +++ b/module/os/freebsd/spl/spl_uio.c @@ -0,0 +1,92 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * University Copyright- Copyright (c) 1982, 1986, 1988 + * The Regents of the University of California + * All Rights Reserved + * + * University Acknowledgment- Portions of this document are derived from + * software developed by the University of California, Berkeley, and its + * contributors. + */ + +/* + * $FreeBSD$ + */ + +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/vnode.h> + +/* + * same as uiomove() but doesn't modify uio structure. + * return in cbytes how many bytes were copied. + */ +int +uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes) +{ + struct iovec small_iovec[1]; + struct uio small_uio_clone; + struct uio *uio_clone; + int error; + + ASSERT3U(uio->uio_rw, ==, rw); + if (uio->uio_iovcnt == 1) { + small_uio_clone = *uio; + small_iovec[0] = *uio->uio_iov; + small_uio_clone.uio_iov = small_iovec; + uio_clone = &small_uio_clone; + } else { + uio_clone = cloneuio(uio); + } + + error = vn_io_fault_uiomove(p, n, uio_clone); + *cbytes = uio->uio_resid - uio_clone->uio_resid; + if (uio_clone != &small_uio_clone) + free(uio_clone, M_IOV); + return (error); +} + +/* + * Drop the next n chars out of *uiop. + */ +void +uioskip(uio_t *uio, size_t n) +{ + enum uio_seg segflg; + + /* For the full compatibility with illumos. */ + if (n > uio->uio_resid) + return; + + segflg = uio->uio_segflg; + uio->uio_segflg = UIO_NOCOPY; + uiomove(NULL, n, uio->uio_rw, uio); + uio->uio_segflg = segflg; +} diff --git a/module/os/freebsd/spl/spl_vfs.c b/module/os/freebsd/spl/spl_vfs.c new file mode 100644 index 000000000..99da8c976 --- /dev/null +++ b/module/os/freebsd/spl/spl_vfs.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2006-2007 Pawel Jakub Dawidek <[email protected]> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/cred.h> +#include <sys/vfs.h> +#include <sys/priv.h> +#include <sys/libkern.h> + +#include <sys/mutex.h> +#include <sys/vnode.h> + +MALLOC_DECLARE(M_MOUNT); + +void +vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg, + int flags __unused) +{ + struct vfsopt *opt; + size_t namesize; + int locked; + + if (!(locked = mtx_owned(MNT_MTX(vfsp)))) + MNT_ILOCK(vfsp); + + if (vfsp->mnt_opt == NULL) { + void *opts; + + MNT_IUNLOCK(vfsp); + opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK); + MNT_ILOCK(vfsp); + if (vfsp->mnt_opt == NULL) { + vfsp->mnt_opt = opts; + TAILQ_INIT(vfsp->mnt_opt); + } else { + free(opts, M_MOUNT); + } + } + + MNT_IUNLOCK(vfsp); + + opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK); + namesize = strlen(name) + 1; + opt->name = malloc(namesize, M_MOUNT, M_WAITOK); + strlcpy(opt->name, name, namesize); + opt->pos = -1; + opt->seen = 1; + if (arg == NULL) { + opt->value = NULL; + opt->len = 0; + } else { + opt->len = strlen(arg) + 1; + opt->value = malloc(opt->len, M_MOUNT, M_WAITOK); + bcopy(arg, opt->value, opt->len); + } + + MNT_ILOCK(vfsp); + TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link); + if (!locked) + MNT_IUNLOCK(vfsp); +} + +void +vfs_clearmntopt(vfs_t *vfsp, const char *name) +{ + int locked; + + if (!(locked = mtx_owned(MNT_MTX(vfsp)))) + MNT_ILOCK(vfsp); + vfs_deleteopt(vfsp->mnt_opt, name); + if (!locked) + MNT_IUNLOCK(vfsp); +} + +int +vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp) +{ + struct vfsoptlist *opts = vfsp->mnt_optnew; + int error; + + if (opts == NULL) + return (0); + error = vfs_getopt(opts, opt, (void **)argp, NULL); + return (error != 0 ? 0 : 1); +} + +int +mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, + char *fspec, int fsflags) +{ + struct vfsconf *vfsp; + struct mount *mp; + vnode_t *vp, *mvp; + struct ucred *cr; + int error; + + ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot"); + + vp = *vpp; + *vpp = NULL; + error = 0; + + /* + * Be ultra-paranoid about making sure the type and fspath + * variables will fit in our mp buffers, including the + * terminating NUL. + */ + if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) + error = ENAMETOOLONG; + if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL) + error = ENODEV; + if (error == 0 && vp->v_type != VDIR) + error = ENOTDIR; + /* + * We need vnode lock to protect v_mountedhere and vnode interlock + * to protect v_iflag. + */ + if (error == 0) { + VI_LOCK(vp); + if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL) + vp->v_iflag |= VI_MOUNT; + else + error = EBUSY; + VI_UNLOCK(vp); + } + if (error != 0) { + vput(vp); + return (error); + } + VOP_UNLOCK1(vp); + + /* + * Allocate and initialize the filesystem. + * We don't want regular user that triggered snapshot mount to be able + * to unmount it, so pass credentials of the parent mount. + */ + mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred); + + mp->mnt_optnew = NULL; + vfs_setmntopt(mp, "from", fspec, 0); + mp->mnt_optnew = mp->mnt_opt; + mp->mnt_opt = NULL; + + /* + * Set the mount level flags. + */ + mp->mnt_flag = fsflags & MNT_UPDATEMASK; + /* + * Snapshots are always read-only. + */ + mp->mnt_flag |= MNT_RDONLY; + /* + * We don't want snapshots to allow access to vulnerable setuid + * programs, so we turn off setuid when mounting snapshots. + */ + mp->mnt_flag |= MNT_NOSUID; + /* + * We don't want snapshots to be visible in regular + * mount(8) and df(1) output. + */ + mp->mnt_flag |= MNT_IGNORE; + /* + * XXX: This is evil, but we can't mount a snapshot as a regular user. + * XXX: Is is safe when snapshot is mounted from within a jail? + */ + cr = td->td_ucred; + td->td_ucred = kcred; + error = VFS_MOUNT(mp); + td->td_ucred = cr; + + if (error != 0) { + /* + * Clear VI_MOUNT and decrement the use count "atomically", + * under the vnode lock. This is not strictly required, + * but makes it easier to reason about the life-cycle and + * ownership of the covered vnode. + */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + VI_LOCK(vp); + vp->v_iflag &= ~VI_MOUNT; + VI_UNLOCK(vp); + vput(vp); + vfs_unbusy(mp); + vfs_freeopts(mp->mnt_optnew); + mp->mnt_vnodecovered = NULL; + vfs_mount_destroy(mp); + return (error); + } + + if (mp->mnt_opt != NULL) + vfs_freeopts(mp->mnt_opt); + mp->mnt_opt = mp->mnt_optnew; + (void) VFS_STATFS(mp, &mp->mnt_stat); + + /* + * Prevent external consumers of mount options from reading + * mnt_optnew. + */ + mp->mnt_optnew = NULL; + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); +#ifdef FREEBSD_NAMECACHE + cache_purge(vp); +#endif + VI_LOCK(vp); + vp->v_iflag &= ~VI_MOUNT; + VI_UNLOCK(vp); + + vp->v_mountedhere = mp; + /* Put the new filesystem on the mount list. */ + mtx_lock(&mountlist_mtx); + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + mtx_unlock(&mountlist_mtx); + vfs_event_signal(NULL, VQ_MOUNT, 0); + if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp)) + panic("mount: lost mount"); + VOP_UNLOCK1(vp); +#if __FreeBSD_version >= 1300048 + vfs_op_exit(mp); +#endif + vfs_unbusy(mp); + *vpp = mvp; + return (0); +} + +/* + * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it + * asynchronously using a taskq. This can avoid deadlocks caused by re-entering + * the file system as a result of releasing the vnode. Note, file systems + * already have to handle the race where the vnode is incremented before the + * inactive routine is called and does its locking. + * + * Warning: Excessive use of this routine can lead to performance problems. + * This is because taskqs throttle back allocation if too many are created. + */ +void +vn_rele_async(vnode_t *vp, taskq_t *taskq) +{ + VERIFY(vp->v_count > 0); + if (refcount_release_if_not_last(&vp->v_usecount)) { +#if __FreeBSD_version < 1300045 + vdrop(vp); +#endif + return; + } + VERIFY(taskq_dispatch((taskq_t *)taskq, + (task_func_t *)vrele, vp, TQ_SLEEP) != 0); +} diff --git a/module/os/freebsd/spl/spl_vm.c b/module/os/freebsd/spl/spl_vm.c new file mode 100644 index 000000000..cd18ebb7a --- /dev/null +++ b/module/os/freebsd/spl/spl_vm.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2013 EMC Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/byteorder.h> +#include <sys/lock.h> +#include <sys/freebsd_rwlock.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_pager.h> + +const int zfs_vm_pagerret_bad = VM_PAGER_BAD; +const int zfs_vm_pagerret_error = VM_PAGER_ERROR; +const int zfs_vm_pagerret_ok = VM_PAGER_OK; +const int zfs_vm_pagerput_sync = VM_PAGER_PUT_SYNC; +const int zfs_vm_pagerput_inval = VM_PAGER_PUT_INVAL; + +void +zfs_vmobject_assert_wlocked(vm_object_t object) +{ + + /* + * This is not ideal because FILE/LINE used by assertions will not + * be too helpful, but it must be an hard function for + * compatibility reasons. + */ + VM_OBJECT_ASSERT_WLOCKED(object); +} + +void +zfs_vmobject_wlock(vm_object_t object) +{ + + VM_OBJECT_WLOCK(object); +} + +void +zfs_vmobject_wunlock(vm_object_t object) +{ + + VM_OBJECT_WUNLOCK(object); +} diff --git a/module/os/freebsd/spl/spl_zlib.c b/module/os/freebsd/spl/spl_zlib.c new file mode 100644 index 000000000..7549483d8 --- /dev/null +++ b/module/os/freebsd/spl/spl_zlib.c @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2020 iXsystems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/kmem.h> +#include <sys/kmem_cache.h> +#include <sys/zmod.h> +#if __FreeBSD_version >= 1300041 +#include <contrib/zlib/zlib.h> +#else +#include <sys/zlib.h> +#endif +#include <sys/kobj.h> + + +/*ARGSUSED*/ +static void * +zcalloc(void *opaque, uint_t items, uint_t size) +{ + + return (malloc((size_t)items*size, M_SOLARIS, M_NOWAIT)); +} + +/*ARGSUSED*/ +static void +zcfree(void *opaque, void *ptr) +{ + + free(ptr, M_SOLARIS); +} + +static int +zlib_deflateInit(z_stream *stream, int level) +{ + + stream->zalloc = zcalloc; + stream->opaque = NULL; + stream->zfree = zcfree; + + return (deflateInit(stream, level)); +} + +static int +zlib_deflate(z_stream *stream, int flush) +{ + return (deflate(stream, flush)); +} + +static int +zlib_deflateEnd(z_stream *stream) +{ + return (deflateEnd(stream)); +} + +static int +zlib_inflateInit(z_stream *stream) +{ + stream->zalloc = zcalloc; + stream->opaque = NULL; + stream->zfree = zcfree; + + return (inflateInit(stream)); +} + +static int +zlib_inflate(z_stream *stream, int finish) +{ +#if __FreeBSD_version >= 1300024 + return (inflate(stream, finish)); +#else + return (_zlib104_inflate(stream, finish)); +#endif +} + + +static int +zlib_inflateEnd(z_stream *stream) +{ + return (inflateInit(stream)); +} + +/* + * A kmem_cache is used for the zlib workspaces to avoid having to vmalloc + * and vfree for every call. Using a kmem_cache also has the advantage + * that improves the odds that the memory used will be local to this cpu. + * To further improve things it might be wise to create a dedicated per-cpu + * workspace for use. This would take some additional care because we then + * must disable preemption around the critical section, and verify that + * zlib_deflate* and zlib_inflate* never internally call schedule(). + */ +static void * +zlib_workspace_alloc(int flags) +{ + // return (kmem_cache_alloc(zlib_workspace_cache, flags)); + return (NULL); +} + +static void +zlib_workspace_free(void *workspace) +{ + // kmem_cache_free(zlib_workspace_cache, workspace); +} + +/* + * Compresses the source buffer into the destination buffer. The level + * parameter has the same meaning as in deflateInit. sourceLen is the byte + * length of the source buffer. Upon entry, destLen is the total size of the + * destination buffer, which must be at least 0.1% larger than sourceLen plus + * 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. + * + * compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + * memory, Z_BUF_ERROR if there was not enough room in the output buffer, + * Z_STREAM_ERROR if the level parameter is invalid. + */ +int +z_compress_level(void *dest, size_t *destLen, const void *source, + size_t sourceLen, int level) +{ + z_stream stream; + int err; + + bzero(&stream, sizeof (stream)); + stream.next_in = (Byte *)source; + stream.avail_in = (uInt)sourceLen; + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + stream.opaque = NULL; + + if ((size_t)stream.avail_out != *destLen) + return (Z_BUF_ERROR); + + stream.opaque = zlib_workspace_alloc(KM_SLEEP); +#if 0 + if (!stream.opaque) + return (Z_MEM_ERROR); +#endif + err = zlib_deflateInit(&stream, level); + if (err != Z_OK) { + zlib_workspace_free(stream.opaque); + return (err); + } + + err = zlib_deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + zlib_deflateEnd(&stream); + zlib_workspace_free(stream.opaque); + return (err == Z_OK ? Z_BUF_ERROR : err); + } + *destLen = stream.total_out; + + err = zlib_deflateEnd(&stream); + zlib_workspace_free(stream.opaque); + return (err); +} + +/* + * Decompresses the source buffer into the destination buffer. sourceLen is + * the byte length of the source buffer. Upon entry, destLen is the total + * size of the destination buffer, which must be large enough to hold the + * entire uncompressed data. (The size of the uncompressed data must have + * been saved previously by the compressor and transmitted to the decompressor + * by some mechanism outside the scope of this compression library.) + * Upon exit, destLen is the actual size of the compressed buffer. + * This function can be used to decompress a whole file at once if the + * input file is mmap'ed. + * + * uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + * enough memory, Z_BUF_ERROR if there was not enough room in the output + * buffer, or Z_DATA_ERROR if the input data was corrupted. + */ +int +z_uncompress(void *dest, size_t *destLen, const void *source, size_t sourceLen) +{ + z_stream stream; + int err; + + bzero(&stream, sizeof (stream)); + + stream.next_in = (Byte *)source; + stream.avail_in = (uInt)sourceLen; + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + + if ((size_t)stream.avail_out != *destLen) + return (Z_BUF_ERROR); + + stream.opaque = zlib_workspace_alloc(KM_SLEEP); +#if 0 + if (!stream.opaque) + return (Z_MEM_ERROR); +#endif + err = zlib_inflateInit(&stream); + if (err != Z_OK) { + zlib_workspace_free(stream.opaque); + return (err); + } + + err = zlib_inflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + zlib_inflateEnd(&stream); + zlib_workspace_free(stream.opaque); + + if (err == Z_NEED_DICT || + (err == Z_BUF_ERROR && stream.avail_in == 0)) + return (Z_DATA_ERROR); + + return (err); + } + *destLen = stream.total_out; + + err = zlib_inflateEnd(&stream); + zlib_workspace_free(stream.opaque); + + return (err); +} + +#if 0 +int +spl_zlib_init(void) +{ + int size; + + size = MAX(spl_zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), + zlib_inflate_workspacesize()); + + zlib_workspace_cache = kmem_cache_create( + "spl_zlib_workspace_cache", + size, 0, NULL, NULL, NULL, NULL, NULL, + KMC_VMEM | KMC_NOEMERGENCY); + if (!zlib_workspace_cache) + return (1); + + return (0); +} + +void +spl_zlib_fini(void) +{ + kmem_cache_destroy(zlib_workspace_cache); + zlib_workspace_cache = NULL; +} +#endif diff --git a/module/os/freebsd/spl/spl_zone.c b/module/os/freebsd/spl/spl_zone.c new file mode 100644 index 000000000..40f21934e --- /dev/null +++ b/module/os/freebsd/spl/spl_zone.c @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2007 Pawel Jakub Dawidek <[email protected]> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sx.h> +#include <sys/malloc.h> +#include <sys/queue.h> +#include <sys/jail.h> +#include <sys/osd.h> +#include <sys/priv.h> +#include <sys/zone.h> + +#include <sys/policy.h> + +static MALLOC_DEFINE(M_ZONES, "zones_data", "Zones data"); + +/* + * Structure to record list of ZFS datasets exported to a zone. + */ +typedef struct zone_dataset { + LIST_ENTRY(zone_dataset) zd_next; + char zd_dataset[0]; +} zone_dataset_t; + +LIST_HEAD(zone_dataset_head, zone_dataset); + +static int zone_slot; + +int +zone_dataset_attach(struct ucred *cred, const char *dataset, int jailid) +{ + struct zone_dataset_head *head; + zone_dataset_t *zd, *zd2; + struct prison *pr; + int dofree, error; + + if ((error = spl_priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0) + return (error); + + /* Allocate memory before we grab prison's mutex. */ + zd = malloc(sizeof (*zd) + strlen(dataset) + 1, M_ZONES, M_WAITOK); + + sx_slock(&allprison_lock); + pr = prison_find(jailid); /* Locks &pr->pr_mtx. */ + sx_sunlock(&allprison_lock); + if (pr == NULL) { + free(zd, M_ZONES); + return (ENOENT); + } + + head = osd_jail_get(pr, zone_slot); + if (head != NULL) { + dofree = 0; + LIST_FOREACH(zd2, head, zd_next) { + if (strcmp(dataset, zd2->zd_dataset) == 0) { + free(zd, M_ZONES); + error = EEXIST; + goto end; + } + } + } else { + dofree = 1; + prison_hold_locked(pr); + mtx_unlock(&pr->pr_mtx); + head = malloc(sizeof (*head), M_ZONES, M_WAITOK); + LIST_INIT(head); + mtx_lock(&pr->pr_mtx); + error = osd_jail_set(pr, zone_slot, head); + KASSERT(error == 0, ("osd_jail_set() failed (error=%d)", + error)); + } + strcpy(zd->zd_dataset, dataset); + LIST_INSERT_HEAD(head, zd, zd_next); +end: + if (dofree) + prison_free_locked(pr); + else + mtx_unlock(&pr->pr_mtx); + return (error); +} + +int +zone_dataset_detach(struct ucred *cred, const char *dataset, int jailid) +{ + struct zone_dataset_head *head; + zone_dataset_t *zd; + struct prison *pr; + int error; + + if ((error = spl_priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0) + return (error); + + sx_slock(&allprison_lock); + pr = prison_find(jailid); + sx_sunlock(&allprison_lock); + if (pr == NULL) + return (ENOENT); + head = osd_jail_get(pr, zone_slot); + if (head == NULL) { + error = ENOENT; + goto end; + } + LIST_FOREACH(zd, head, zd_next) { + if (strcmp(dataset, zd->zd_dataset) == 0) + break; + } + if (zd == NULL) + error = ENOENT; + else { + LIST_REMOVE(zd, zd_next); + free(zd, M_ZONES); + if (LIST_EMPTY(head)) + osd_jail_del(pr, zone_slot); + error = 0; + } +end: + mtx_unlock(&pr->pr_mtx); + return (error); +} + +/* + * Returns true if the named dataset is visible in the current zone. + * The 'write' parameter is set to 1 if the dataset is also writable. + */ +int +zone_dataset_visible(const char *dataset, int *write) +{ + struct zone_dataset_head *head; + zone_dataset_t *zd; + struct prison *pr; + size_t len; + int ret = 0; + + if (dataset[0] == '\0') + return (0); + if (INGLOBALZONE(curproc)) { + if (write != NULL) + *write = 1; + return (1); + } + pr = curthread->td_ucred->cr_prison; + mtx_lock(&pr->pr_mtx); + head = osd_jail_get(pr, zone_slot); + if (head == NULL) + goto end; + + /* + * Walk the list once, looking for datasets which match exactly, or + * specify a dataset underneath an exported dataset. If found, return + * true and note that it is writable. + */ + LIST_FOREACH(zd, head, zd_next) { + len = strlen(zd->zd_dataset); + if (strlen(dataset) >= len && + bcmp(dataset, zd->zd_dataset, len) == 0 && + (dataset[len] == '\0' || dataset[len] == '/' || + dataset[len] == '@')) { + if (write) + *write = 1; + ret = 1; + goto end; + } + } + + /* + * Walk the list a second time, searching for datasets which are parents + * of exported datasets. These should be visible, but read-only. + * + * Note that we also have to support forms such as 'pool/dataset/', with + * a trailing slash. + */ + LIST_FOREACH(zd, head, zd_next) { + len = strlen(dataset); + if (dataset[len - 1] == '/') + len--; /* Ignore trailing slash */ + if (len < strlen(zd->zd_dataset) && + bcmp(dataset, zd->zd_dataset, len) == 0 && + zd->zd_dataset[len] == '/') { + if (write) + *write = 0; + ret = 1; + goto end; + } + } +end: + mtx_unlock(&pr->pr_mtx); + return (ret); +} + +static void +zone_destroy(void *arg) +{ + struct zone_dataset_head *head; + zone_dataset_t *zd; + + head = arg; + while ((zd = LIST_FIRST(head)) != NULL) { + LIST_REMOVE(zd, zd_next); + free(zd, M_ZONES); + } + free(head, M_ZONES); +} + +uint32_t +zone_get_hostid(void *ptr) +{ + + KASSERT(ptr == NULL, ("only NULL pointer supported in %s", __func__)); + + return ((uint32_t)curthread->td_ucred->cr_prison->pr_hostid); +} + +boolean_t +in_globalzone(struct proc *p) +{ + return (!jailed(FIRST_THREAD_IN_PROC((p))->td_ucred)); +} + +static void +zone_sysinit(void *arg __unused) +{ + + zone_slot = osd_jail_register(zone_destroy, NULL); +} + +static void +zone_sysuninit(void *arg __unused) +{ + + osd_jail_deregister(zone_slot); +} + +SYSINIT(zone_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, zone_sysinit, NULL); +SYSUNINIT(zone_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, zone_sysuninit, NULL); |