aboutsummaryrefslogtreecommitdiffstats
path: root/module/spl
diff options
context:
space:
mode:
authorMatthew Macy <[email protected]>2019-09-06 11:26:26 -0700
committerBrian Behlendorf <[email protected]>2019-09-06 11:26:26 -0700
commitbced7e3aaa3cf54d5e8e4f94e067144b27cb744b (patch)
tree729dac6996f4f11b88bc3a831b2b8d6852e6fbb6 /module/spl
parent870e7a52c105f26ef4254b90230d396f4ce39ea7 (diff)
OpenZFS restructuring - move platform specific sources
Move platform specific Linux source under module/os/linux/ and update the build system accordingly. Additional code restructuring will follow to make the common code fully portable. Reviewed-by: Jorgen Lundman <[email protected]> Reviewed-by: Igor Kozhukhov <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Matthew Macy <[email protected]> Closes #9206
Diffstat (limited to 'module/spl')
-rw-r--r--module/spl/Makefile.in20
-rw-r--r--module/spl/README.md16
-rw-r--r--module/spl/THIRDPARTYLICENSE.gplv2339
-rw-r--r--module/spl/THIRDPARTYLICENSE.gplv2.descrip1
-rw-r--r--module/spl/spl-atomic.c36
-rw-r--r--module/spl/spl-condvar.c461
-rw-r--r--module/spl/spl-cred.c200
-rw-r--r--module/spl/spl-err.c124
-rw-r--r--module/spl/spl-generic.c757
-rw-r--r--module/spl/spl-kmem-cache.c1780
-rw-r--r--module/spl/spl-kmem.c556
-rw-r--r--module/spl/spl-kobj.c86
-rw-r--r--module/spl/spl-kstat.c770
-rw-r--r--module/spl/spl-proc.c782
-rw-r--r--module/spl/spl-procfs-list.c257
-rw-r--r--module/spl/spl-taskq.c1292
-rw-r--r--module/spl/spl-thread.c163
-rw-r--r--module/spl/spl-tsd.c720
-rw-r--r--module/spl/spl-vmem.c135
-rw-r--r--module/spl/spl-vnode.c719
-rw-r--r--module/spl/spl-xdr.c513
-rw-r--r--module/spl/spl-zlib.c217
22 files changed, 2 insertions, 9942 deletions
diff --git a/module/spl/Makefile.in b/module/spl/Makefile.in
index e16666aa9..8602f4edd 100644
--- a/module/spl/Makefile.in
+++ b/module/spl/Makefile.in
@@ -7,21 +7,5 @@ obj-$(CONFIG_ZFS) := $(MODULE).o
ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
-$(MODULE)-objs += spl-atomic.o
-$(MODULE)-objs += spl-condvar.o
-$(MODULE)-objs += spl-cred.o
-$(MODULE)-objs += spl-err.o
-$(MODULE)-objs += spl-generic.o
-$(MODULE)-objs += spl-kmem.o
-$(MODULE)-objs += spl-kmem-cache.o
-$(MODULE)-objs += spl-kobj.o
-$(MODULE)-objs += spl-kstat.o
-$(MODULE)-objs += spl-proc.o
-$(MODULE)-objs += spl-procfs-list.o
-$(MODULE)-objs += spl-taskq.o
-$(MODULE)-objs += spl-thread.o
-$(MODULE)-objs += spl-tsd.o
-$(MODULE)-objs += spl-vmem.o
-$(MODULE)-objs += spl-vnode.o
-$(MODULE)-objs += spl-xdr.o
-$(MODULE)-objs += spl-zlib.o
+
+-include @abs_top_builddir@/module/os/linux/spl/Makefile
diff --git a/module/spl/README.md b/module/spl/README.md
deleted file mode 100644
index 57f635aed..000000000
--- a/module/spl/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-The Solaris Porting Layer, SPL, is a Linux kernel module which provides a
-compatibility layer used by the [ZFS on Linux](http://zfsonlinux.org) project.
-
-# Installation
-
-The latest version of the SPL is maintained as part of this repository.
-Only when building ZFS version 0.7.x or earlier must an external SPL release
-be used. These releases can be found at:
-
- * Version 0.7.x: https://github.com/zfsonlinux/spl/tree/spl-0.7-release
- * Version 0.6.5.x: https://github.com/zfsonlinux/spl/tree/spl-0.6.5-release
-
-# Release
-
-The SPL is released under a GPLv2 license.
-For more details see the NOTICE and THIRDPARTYLICENSE files; `UCRL-CODE-235197`
diff --git a/module/spl/THIRDPARTYLICENSE.gplv2 b/module/spl/THIRDPARTYLICENSE.gplv2
deleted file mode 100644
index d159169d1..000000000
--- a/module/spl/THIRDPARTYLICENSE.gplv2
+++ /dev/null
@@ -1,339 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The licenses for most software are designed to take away your
-freedom to share and change it. By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users. This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it. (Some other Free Software Foundation software is covered by
-the GNU Lesser General Public License instead.) You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must show them these terms so they know their
-rights.
-
- We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
- Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
- Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- GNU GENERAL PUBLIC LICENSE
- TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
- 0. This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License. The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language. (Hereinafter, translation is included without limitation in
-the term "modification".) Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
- 1. You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
- 2. You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
- a) You must cause the modified files to carry prominent notices
- stating that you changed the files and the date of any change.
-
- b) You must cause any work that you distribute or publish, that in
- whole or in part contains or is derived from the Program or any
- part thereof, to be licensed as a whole at no charge to all third
- parties under the terms of this License.
-
- c) If the modified program normally reads commands interactively
- when run, you must cause it, when started running for such
- interactive use in the most ordinary way, to print or display an
- announcement including an appropriate copyright notice and a
- notice that there is no warranty (or else, saying that you provide
- a warranty) and that users may redistribute the program under
- these conditions, and telling the user how to view a copy of this
- License. (Exception: if the Program itself is interactive but
- does not normally print such an announcement, your work based on
- the Program is not required to print an announcement.)
-
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
- 3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
- a) Accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of Sections
- 1 and 2 above on a medium customarily used for software interchange; or,
-
- b) Accompany it with a written offer, valid for at least three
- years, to give any third party, for a charge no more than your
- cost of physically performing source distribution, a complete
- machine-readable copy of the corresponding source code, to be
- distributed under the terms of Sections 1 and 2 above on a medium
- customarily used for software interchange; or,
-
- c) Accompany it with the information you received as to the offer
- to distribute corresponding source code. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form with such
- an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it. For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable. However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
- 4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License. Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
- 5. You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Program or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
- 6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
- 7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all. For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
- 8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded. In such case, this License incorporates
-the limitation as if written in the body of this License.
-
- 9. The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies a version number of this License which applies to it and "any
-later version", you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation. If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
- 10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission. For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this. Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
- NO WARRANTY
-
- 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
- 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
- Gnomovision version 69, Copyright (C) year name of author
- Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, the commands you use may
-be called something other than `show w' and `show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary. Here is a sample; alter the names:
-
- Yoyodyne, Inc., hereby disclaims all copyright interest in the program
- `Gnomovision' (which makes passes at compilers) written by James Hacker.
-
- <signature of Ty Coon>, 1 April 1989
- Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs. If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.
diff --git a/module/spl/THIRDPARTYLICENSE.gplv2.descrip b/module/spl/THIRDPARTYLICENSE.gplv2.descrip
deleted file mode 100644
index 78535a8ee..000000000
--- a/module/spl/THIRDPARTYLICENSE.gplv2.descrip
+++ /dev/null
@@ -1 +0,0 @@
-COMPATIBILITY LAYER FOR OPENZFS ON LINUX
diff --git a/module/spl/spl-atomic.c b/module/spl/spl-atomic.c
deleted file mode 100644
index 47ed1886e..000000000
--- a/module/spl/spl-atomic.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) Atomic Implementation.
- */
-
-#include <sys/atomic.h>
-
-#ifdef ATOMIC_SPINLOCK
-/* Global atomic lock declarations */
-DEFINE_SPINLOCK(atomic32_lock);
-DEFINE_SPINLOCK(atomic64_lock);
-
-EXPORT_SYMBOL(atomic32_lock);
-EXPORT_SYMBOL(atomic64_lock);
-#endif /* ATOMIC_SPINLOCK */
diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c
deleted file mode 100644
index 3cc33da62..000000000
--- a/module/spl/spl-condvar.c
+++ /dev/null
@@ -1,461 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) Credential Implementation.
- */
-
-#include <sys/condvar.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <linux/hrtimer.h>
-#include <linux/compiler_compat.h>
-#include <linux/mod_compat.h>
-
-#include <linux/sched.h>
-
-#ifdef HAVE_SCHED_SIGNAL_HEADER
-#include <linux/sched/signal.h>
-#endif
-
-#define MAX_HRTIMEOUT_SLACK_US 1000
-unsigned int spl_schedule_hrtimeout_slack_us = 0;
-
-static int
-param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
-{
- unsigned long val;
- int error;
-
- error = kstrtoul(buf, 0, &val);
- if (error)
- return (error);
-
- if (val > MAX_HRTIMEOUT_SLACK_US)
- return (-EINVAL);
-
- error = param_set_uint(buf, kp);
- if (error < 0)
- return (error);
-
- return (0);
-}
-
-module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
- param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
-MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
- "schedule_hrtimeout_range() delta/slack value in us, default(0)");
-
-void
-__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
-{
- ASSERT(cvp);
- ASSERT(name == NULL);
- ASSERT(type == CV_DEFAULT);
- ASSERT(arg == NULL);
-
- cvp->cv_magic = CV_MAGIC;
- init_waitqueue_head(&cvp->cv_event);
- init_waitqueue_head(&cvp->cv_destroy);
- atomic_set(&cvp->cv_waiters, 0);
- atomic_set(&cvp->cv_refs, 1);
- cvp->cv_mutex = NULL;
-}
-EXPORT_SYMBOL(__cv_init);
-
-static int
-cv_destroy_wakeup(kcondvar_t *cvp)
-{
- if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
- ASSERT(cvp->cv_mutex == NULL);
- ASSERT(!waitqueue_active(&cvp->cv_event));
- return (1);
- }
-
- return (0);
-}
-
-void
-__cv_destroy(kcondvar_t *cvp)
-{
- ASSERT(cvp);
- ASSERT(cvp->cv_magic == CV_MAGIC);
-
- cvp->cv_magic = CV_DESTROY;
- atomic_dec(&cvp->cv_refs);
-
- /* Block until all waiters are woken and references dropped. */
- while (cv_destroy_wakeup(cvp) == 0)
- wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
-
- ASSERT3P(cvp->cv_mutex, ==, NULL);
- ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
- ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
- ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
-}
-EXPORT_SYMBOL(__cv_destroy);
-
-static void
-cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
-{
- DEFINE_WAIT(wait);
- kmutex_t *m;
-
- ASSERT(cvp);
- ASSERT(mp);
- ASSERT(cvp->cv_magic == CV_MAGIC);
- ASSERT(mutex_owned(mp));
- atomic_inc(&cvp->cv_refs);
-
- m = READ_ONCE(cvp->cv_mutex);
- if (!m)
- m = xchg(&cvp->cv_mutex, mp);
- /* Ensure the same mutex is used by all callers */
- ASSERT(m == NULL || m == mp);
-
- prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
- atomic_inc(&cvp->cv_waiters);
-
- /*
- * Mutex should be dropped after prepare_to_wait() this
- * ensures we're linked in to the waiters list and avoids the
- * race where 'cvp->cv_waiters > 0' but the list is empty.
- */
- mutex_exit(mp);
- if (io)
- io_schedule();
- else
- schedule();
-
- /* No more waiters a different mutex could be used */
- if (atomic_dec_and_test(&cvp->cv_waiters)) {
- /*
- * This is set without any lock, so it's racy. But this is
- * just for debug anyway, so make it best-effort
- */
- cvp->cv_mutex = NULL;
- wake_up(&cvp->cv_destroy);
- }
-
- finish_wait(&cvp->cv_event, &wait);
- atomic_dec(&cvp->cv_refs);
-
- /*
- * Hold mutex after we release the cvp, otherwise we could dead lock
- * with a thread holding the mutex and call cv_destroy.
- */
- mutex_enter(mp);
-}
-
-void
-__cv_wait(kcondvar_t *cvp, kmutex_t *mp)
-{
- cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
-}
-EXPORT_SYMBOL(__cv_wait);
-
-void
-__cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
-{
- cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
-}
-EXPORT_SYMBOL(__cv_wait_io);
-
-int
-__cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
-{
- cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
-
- return (signal_pending(current) ? 0 : 1);
-}
-EXPORT_SYMBOL(__cv_wait_io_sig);
-
-int
-__cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
-{
- cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
-
- return (signal_pending(current) ? 0 : 1);
-}
-EXPORT_SYMBOL(__cv_wait_sig);
-
-#if defined(HAVE_IO_SCHEDULE_TIMEOUT)
-#define spl_io_schedule_timeout(t) io_schedule_timeout(t)
-#else
-
-struct spl_task_timer {
- struct timer_list timer;
- struct task_struct *task;
-};
-
-static void
-__cv_wakeup(spl_timer_list_t t)
-{
- struct timer_list *tmr = (struct timer_list *)t;
- struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer);
-
- wake_up_process(task_timer->task);
-}
-
-static long
-spl_io_schedule_timeout(long time_left)
-{
- long expire_time = jiffies + time_left;
- struct spl_task_timer task_timer;
- struct timer_list *timer = &task_timer.timer;
-
- task_timer.task = current;
-
- timer_setup(timer, __cv_wakeup, 0);
-
- timer->expires = expire_time;
- add_timer(timer);
-
- io_schedule();
-
- del_timer_sync(timer);
-
- time_left = expire_time - jiffies;
-
- return (time_left < 0 ? 0 : time_left);
-}
-#endif
-
-/*
- * 'expire_time' argument is an absolute wall clock time in jiffies.
- * Return value is time left (expire_time - now) or -1 if timeout occurred.
- */
-static clock_t
-__cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
- int state, int io)
-{
- DEFINE_WAIT(wait);
- kmutex_t *m;
- clock_t time_left;
-
- ASSERT(cvp);
- ASSERT(mp);
- ASSERT(cvp->cv_magic == CV_MAGIC);
- ASSERT(mutex_owned(mp));
-
- /* XXX - Does not handle jiffie wrap properly */
- time_left = expire_time - jiffies;
- if (time_left <= 0)
- return (-1);
-
- atomic_inc(&cvp->cv_refs);
- m = READ_ONCE(cvp->cv_mutex);
- if (!m)
- m = xchg(&cvp->cv_mutex, mp);
- /* Ensure the same mutex is used by all callers */
- ASSERT(m == NULL || m == mp);
-
- prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
- atomic_inc(&cvp->cv_waiters);
-
- /*
- * Mutex should be dropped after prepare_to_wait() this
- * ensures we're linked in to the waiters list and avoids the
- * race where 'cvp->cv_waiters > 0' but the list is empty.
- */
- mutex_exit(mp);
- if (io)
- time_left = spl_io_schedule_timeout(time_left);
- else
- time_left = schedule_timeout(time_left);
-
- /* No more waiters a different mutex could be used */
- if (atomic_dec_and_test(&cvp->cv_waiters)) {
- /*
- * This is set without any lock, so it's racy. But this is
- * just for debug anyway, so make it best-effort
- */
- cvp->cv_mutex = NULL;
- wake_up(&cvp->cv_destroy);
- }
-
- finish_wait(&cvp->cv_event, &wait);
- atomic_dec(&cvp->cv_refs);
-
- /*
- * Hold mutex after we release the cvp, otherwise we could dead lock
- * with a thread holding the mutex and call cv_destroy.
- */
- mutex_enter(mp);
- return (time_left > 0 ? time_left : -1);
-}
-
-clock_t
-__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
-{
- return (__cv_timedwait_common(cvp, mp, exp_time,
- TASK_UNINTERRUPTIBLE, 0));
-}
-EXPORT_SYMBOL(__cv_timedwait);
-
-clock_t
-__cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
-{
- return (__cv_timedwait_common(cvp, mp, exp_time,
- TASK_UNINTERRUPTIBLE, 1));
-}
-EXPORT_SYMBOL(__cv_timedwait_io);
-
-clock_t
-__cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
-{
- return (__cv_timedwait_common(cvp, mp, exp_time,
- TASK_INTERRUPTIBLE, 0));
-}
-EXPORT_SYMBOL(__cv_timedwait_sig);
-
-/*
- * 'expire_time' argument is an absolute clock time in nanoseconds.
- * Return value is time left (expire_time - now) or -1 if timeout occurred.
- */
-static clock_t
-__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
- hrtime_t res, int state)
-{
- DEFINE_WAIT(wait);
- kmutex_t *m;
- hrtime_t time_left;
- ktime_t ktime_left;
- u64 slack = 0;
-
- ASSERT(cvp);
- ASSERT(mp);
- ASSERT(cvp->cv_magic == CV_MAGIC);
- ASSERT(mutex_owned(mp));
-
- time_left = expire_time - gethrtime();
- if (time_left <= 0)
- return (-1);
-
- atomic_inc(&cvp->cv_refs);
- m = READ_ONCE(cvp->cv_mutex);
- if (!m)
- m = xchg(&cvp->cv_mutex, mp);
- /* Ensure the same mutex is used by all callers */
- ASSERT(m == NULL || m == mp);
-
- prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
- atomic_inc(&cvp->cv_waiters);
-
- /*
- * Mutex should be dropped after prepare_to_wait() this
- * ensures we're linked in to the waiters list and avoids the
- * race where 'cvp->cv_waiters > 0' but the list is empty.
- */
- mutex_exit(mp);
-
- ktime_left = ktime_set(0, time_left);
- slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
- MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
- schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
-
- /* No more waiters a different mutex could be used */
- if (atomic_dec_and_test(&cvp->cv_waiters)) {
- /*
- * This is set without any lock, so it's racy. But this is
- * just for debug anyway, so make it best-effort
- */
- cvp->cv_mutex = NULL;
- wake_up(&cvp->cv_destroy);
- }
-
- finish_wait(&cvp->cv_event, &wait);
- atomic_dec(&cvp->cv_refs);
-
- mutex_enter(mp);
- time_left = expire_time - gethrtime();
- return (time_left > 0 ? NSEC_TO_TICK(time_left) : -1);
-}
-
-/*
- * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
- */
-static clock_t
-cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
- hrtime_t res, int flag, int state)
-{
- if (!(flag & CALLOUT_FLAG_ABSOLUTE))
- tim += gethrtime();
-
- return (__cv_timedwait_hires(cvp, mp, tim, res, state));
-}
-
-clock_t
-cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
- int flag)
-{
- return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
- TASK_UNINTERRUPTIBLE));
-}
-EXPORT_SYMBOL(cv_timedwait_hires);
-
-clock_t
-cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
- hrtime_t res, int flag)
-{
- return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
- TASK_INTERRUPTIBLE));
-}
-EXPORT_SYMBOL(cv_timedwait_sig_hires);
-
-void
-__cv_signal(kcondvar_t *cvp)
-{
- ASSERT(cvp);
- ASSERT(cvp->cv_magic == CV_MAGIC);
- atomic_inc(&cvp->cv_refs);
-
- /*
- * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
- * waiter will be set runnable with each call to wake_up().
- * Additionally wake_up() holds a spin_lock associated with
- * the wait queue to ensure we don't race waking up processes.
- */
- if (atomic_read(&cvp->cv_waiters) > 0)
- wake_up(&cvp->cv_event);
-
- atomic_dec(&cvp->cv_refs);
-}
-EXPORT_SYMBOL(__cv_signal);
-
-void
-__cv_broadcast(kcondvar_t *cvp)
-{
- ASSERT(cvp);
- ASSERT(cvp->cv_magic == CV_MAGIC);
- atomic_inc(&cvp->cv_refs);
-
- /*
- * Wake_up_all() will wake up all waiters even those which
- * have the WQ_FLAG_EXCLUSIVE flag set.
- */
- if (atomic_read(&cvp->cv_waiters) > 0)
- wake_up_all(&cvp->cv_event);
-
- atomic_dec(&cvp->cv_refs);
-}
-EXPORT_SYMBOL(__cv_broadcast);
diff --git a/module/spl/spl-cred.c b/module/spl/spl-cred.c
deleted file mode 100644
index ea3e903f9..000000000
--- a/module/spl/spl-cred.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) Credential Implementation.
- */
-
-#include <sys/cred.h>
-
-static int
-#ifdef HAVE_KUIDGID_T
-cr_groups_search(const struct group_info *group_info, kgid_t grp)
-#else
-cr_groups_search(const struct group_info *group_info, gid_t grp)
-#endif
-{
- unsigned int left, right, mid;
- int cmp;
-
- if (!group_info)
- return (0);
-
- left = 0;
- right = group_info->ngroups;
- while (left < right) {
- mid = (left + right) / 2;
- cmp = KGID_TO_SGID(grp) -
- KGID_TO_SGID(GROUP_AT(group_info, mid));
-
- if (cmp > 0)
- left = mid + 1;
- else if (cmp < 0)
- right = mid;
- else
- return (1);
- }
- return (0);
-}
-
-/* Hold a reference on the credential */
-void
-crhold(cred_t *cr)
-{
- (void) get_cred((const cred_t *)cr);
-}
-
-/* Free a reference on the credential */
-void
-crfree(cred_t *cr)
-{
- put_cred((const cred_t *)cr);
-}
-
-/* Return the number of supplemental groups */
-int
-crgetngroups(const cred_t *cr)
-{
- struct group_info *gi;
- int rc;
-
- gi = cr->group_info;
- rc = gi->ngroups;
-#ifndef HAVE_GROUP_INFO_GID
- /*
- * For Linux <= 4.8,
- * crgetgroups will only returns gi->blocks[0], which contains only
- * the first NGROUPS_PER_BLOCK groups.
- */
- if (rc > NGROUPS_PER_BLOCK) {
- WARN_ON_ONCE(1);
- rc = NGROUPS_PER_BLOCK;
- }
-#endif
- return (rc);
-}
-
-/*
- * Return an array of supplemental gids. The returned address is safe
- * to use as long as the caller has taken a reference with crhold().
- *
- * Linux 4.9 API change, group_info changed from 2d array via ->blocks to 1d
- * array via ->gid.
- */
-gid_t *
-crgetgroups(const cred_t *cr)
-{
- struct group_info *gi;
- gid_t *gids = NULL;
-
- gi = cr->group_info;
-#ifdef HAVE_GROUP_INFO_GID
- gids = KGIDP_TO_SGIDP(gi->gid);
-#else
- if (gi->nblocks > 0)
- gids = KGIDP_TO_SGIDP(gi->blocks[0]);
-#endif
- return (gids);
-}
-
-/* Check if the passed gid is available in supplied credential. */
-int
-groupmember(gid_t gid, const cred_t *cr)
-{
- struct group_info *gi;
- int rc;
-
- gi = cr->group_info;
- rc = cr_groups_search(gi, SGID_TO_KGID(gid));
-
- return (rc);
-}
-
-/* Return the effective user id */
-uid_t
-crgetuid(const cred_t *cr)
-{
- return (KUID_TO_SUID(cr->euid));
-}
-
-/* Return the real user id */
-uid_t
-crgetruid(const cred_t *cr)
-{
- return (KUID_TO_SUID(cr->uid));
-}
-
-/* Return the saved user id */
-uid_t
-crgetsuid(const cred_t *cr)
-{
- return (KUID_TO_SUID(cr->suid));
-}
-
-/* Return the filesystem user id */
-uid_t
-crgetfsuid(const cred_t *cr)
-{
- return (KUID_TO_SUID(cr->fsuid));
-}
-
-/* Return the effective group id */
-gid_t
-crgetgid(const cred_t *cr)
-{
- return (KGID_TO_SGID(cr->egid));
-}
-
-/* Return the real group id */
-gid_t
-crgetrgid(const cred_t *cr)
-{
- return (KGID_TO_SGID(cr->gid));
-}
-
-/* Return the saved group id */
-gid_t
-crgetsgid(const cred_t *cr)
-{
- return (KGID_TO_SGID(cr->sgid));
-}
-
-/* Return the filesystem group id */
-gid_t
-crgetfsgid(const cred_t *cr)
-{
- return (KGID_TO_SGID(cr->fsgid));
-}
-
-EXPORT_SYMBOL(crhold);
-EXPORT_SYMBOL(crfree);
-EXPORT_SYMBOL(crgetuid);
-EXPORT_SYMBOL(crgetruid);
-EXPORT_SYMBOL(crgetsuid);
-EXPORT_SYMBOL(crgetfsuid);
-EXPORT_SYMBOL(crgetgid);
-EXPORT_SYMBOL(crgetrgid);
-EXPORT_SYMBOL(crgetsgid);
-EXPORT_SYMBOL(crgetfsgid);
-EXPORT_SYMBOL(crgetngroups);
-EXPORT_SYMBOL(crgetgroups);
-EXPORT_SYMBOL(groupmember);
diff --git a/module/spl/spl-err.c b/module/spl/spl-err.c
deleted file mode 100644
index 3c0bb71c0..000000000
--- a/module/spl/spl-err.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) Error Implementation.
- */
-
-#include <sys/sysmacros.h>
-#include <sys/cmn_err.h>
-
-/*
- * It is often useful to actually have the panic crash the node so you
- * can then get notified of the event, get the crashdump for later
- * analysis and other such goodies.
- * But we would still default to the current default of not to do that.
- */
-/* BEGIN CSTYLED */
-unsigned int spl_panic_halt;
-module_param(spl_panic_halt, uint, 0644);
-MODULE_PARM_DESC(spl_panic_halt, "Cause kernel panic on assertion failures");
-/* END CSTYLED */
-
-void
-spl_dumpstack(void)
-{
- printk("Showing stack for process %d\n", current->pid);
- dump_stack();
-}
-EXPORT_SYMBOL(spl_dumpstack);
-
-int
-spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
-{
- const char *newfile;
- char msg[MAXMSGLEN];
- va_list ap;
-
- newfile = strrchr(file, '/');
- if (newfile != NULL)
- newfile = newfile + 1;
- else
- newfile = file;
-
- va_start(ap, fmt);
- (void) vsnprintf(msg, sizeof (msg), fmt, ap);
- va_end(ap);
-
- printk(KERN_EMERG "%s", msg);
- printk(KERN_EMERG "PANIC at %s:%d:%s()\n", newfile, line, func);
- if (spl_panic_halt)
- panic("%s", msg);
-
- spl_dumpstack();
-
- /* Halt the thread to facilitate further debugging */
- set_current_state(TASK_UNINTERRUPTIBLE);
- while (1)
- schedule();
-
- /* Unreachable */
- return (1);
-}
-EXPORT_SYMBOL(spl_panic);
-
-void
-vcmn_err(int ce, const char *fmt, va_list ap)
-{
- char msg[MAXMSGLEN];
-
- vsnprintf(msg, MAXMSGLEN, fmt, ap);
-
- switch (ce) {
- case CE_IGNORE:
- break;
- case CE_CONT:
- printk("%s", msg);
- break;
- case CE_NOTE:
- printk(KERN_NOTICE "NOTICE: %s\n", msg);
- break;
- case CE_WARN:
- printk(KERN_WARNING "WARNING: %s\n", msg);
- break;
- case CE_PANIC:
- printk(KERN_EMERG "PANIC: %s\n", msg);
- spl_dumpstack();
-
- /* Halt the thread to facilitate further debugging */
- set_current_state(TASK_UNINTERRUPTIBLE);
- while (1)
- schedule();
- }
-} /* vcmn_err() */
-EXPORT_SYMBOL(vcmn_err);
-
-void
-cmn_err(int ce, const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- vcmn_err(ce, fmt, ap);
- va_end(ap);
-} /* cmn_err() */
-EXPORT_SYMBOL(cmn_err);
diff --git a/module/spl/spl-generic.c b/module/spl/spl-generic.c
deleted file mode 100644
index 1deb2f444..000000000
--- a/module/spl/spl-generic.c
+++ /dev/null
@@ -1,757 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) Generic Implementation.
- */
-
-#include <sys/sysmacros.h>
-#include <sys/systeminfo.h>
-#include <sys/vmsystm.h>
-#include <sys/kobj.h>
-#include <sys/kmem.h>
-#include <sys/kmem_cache.h>
-#include <sys/vmem.h>
-#include <sys/mutex.h>
-#include <sys/rwlock.h>
-#include <sys/taskq.h>
-#include <sys/tsd.h>
-#include <sys/zmod.h>
-#include <sys/debug.h>
-#include <sys/proc.h>
-#include <sys/kstat.h>
-#include <sys/file.h>
-#include <linux/ctype.h>
-#include <sys/disp.h>
-#include <sys/random.h>
-#include <sys/strings.h>
-#include <linux/kmod.h>
-#include "zfs_gitrev.h"
-
-char spl_gitrev[64] = ZFS_META_GITREV;
-
-/* BEGIN CSTYLED */
-unsigned long spl_hostid = 0;
-EXPORT_SYMBOL(spl_hostid);
-/* BEGIN CSTYLED */
-module_param(spl_hostid, ulong, 0644);
-MODULE_PARM_DESC(spl_hostid, "The system hostid.");
-/* END CSTYLED */
-
-proc_t p0;
-EXPORT_SYMBOL(p0);
-
-/*
- * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna
- *
- * "Further scramblings of Marsaglia's xorshift generators"
- * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
- *
- * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose
- * is to provide bytes containing random numbers. It is mapped to /dev/urandom
- * on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's
- * random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so
- * we can implement it using a fast PRNG that we seed using Linux' actual
- * equivalent to random_get_pseudo_bytes(). We do this by providing each CPU
- * with an independent seed so that all calls to random_get_pseudo_bytes() are
- * free of atomic instructions.
- *
- * A consequence of using a fast PRNG is that using random_get_pseudo_bytes()
- * to generate words larger than 128 bits will paradoxically be limited to
- * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1`
- * 128-bit words and selecting the first will implicitly select the second. If
- * a caller finds this behavior undesirable, random_get_bytes() should be used
- * instead.
- *
- * XXX: Linux interrupt handlers that trigger within the critical section
- * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will
- * see the same numbers. Nothing in the code currently calls this in an
- * interrupt handler, so this is considered to be okay. If that becomes a
- * problem, we could create a set of per-cpu variables for interrupt handlers
- * and use them when in_interrupt() from linux/preempt_mask.h evaluates to
- * true.
- */
-static DEFINE_PER_CPU(uint64_t[2], spl_pseudo_entropy);
-
-/*
- * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed
- * file:
- *
- * http://xorshift.di.unimi.it/xorshift128plus.c
- */
-
-static inline uint64_t
-spl_rand_next(uint64_t *s)
-{
- uint64_t s1 = s[0];
- const uint64_t s0 = s[1];
- s[0] = s0;
- s1 ^= s1 << 23; // a
- s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
- return (s[1] + s0);
-}
-
-static inline void
-spl_rand_jump(uint64_t *s)
-{
- static const uint64_t JUMP[] =
- { 0x8a5cd789635d2dff, 0x121fd2155c472f96 };
-
- uint64_t s0 = 0;
- uint64_t s1 = 0;
- int i, b;
- for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++)
- for (b = 0; b < 64; b++) {
- if (JUMP[i] & 1ULL << b) {
- s0 ^= s[0];
- s1 ^= s[1];
- }
- (void) spl_rand_next(s);
- }
-
- s[0] = s0;
- s[1] = s1;
-}
-
-int
-random_get_pseudo_bytes(uint8_t *ptr, size_t len)
-{
- uint64_t *xp, s[2];
-
- ASSERT(ptr);
-
- xp = get_cpu_var(spl_pseudo_entropy);
-
- s[0] = xp[0];
- s[1] = xp[1];
-
- while (len) {
- union {
- uint64_t ui64;
- uint8_t byte[sizeof (uint64_t)];
- }entropy;
- int i = MIN(len, sizeof (uint64_t));
-
- len -= i;
- entropy.ui64 = spl_rand_next(s);
-
- while (i--)
- *ptr++ = entropy.byte[i];
- }
-
- xp[0] = s[0];
- xp[1] = s[1];
-
- put_cpu_var(spl_pseudo_entropy);
-
- return (0);
-}
-
-
-EXPORT_SYMBOL(random_get_pseudo_bytes);
-
-#if BITS_PER_LONG == 32
-/*
- * Support 64/64 => 64 division on a 32-bit platform. While the kernel
- * provides a div64_u64() function for this we do not use it because the
- * implementation is flawed. There are cases which return incorrect
- * results as late as linux-2.6.35. Until this is fixed upstream the
- * spl must provide its own implementation.
- *
- * This implementation is a slightly modified version of the algorithm
- * proposed by the book 'Hacker's Delight'. The original source can be
- * found here and is available for use without restriction.
- *
- * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
- */
-
-/*
- * Calculate number of leading of zeros for a 64-bit value.
- */
-static int
-nlz64(uint64_t x)
-{
- register int n = 0;
-
- if (x == 0)
- return (64);
-
- if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; }
- if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; }
- if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n + 8; x = x << 8; }
- if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n + 4; x = x << 4; }
- if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n + 2; x = x << 2; }
- if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n + 1; }
-
- return (n);
-}
-
-/*
- * Newer kernels have a div_u64() function but we define our own
- * to simplify portability between kernel versions.
- */
-static inline uint64_t
-__div_u64(uint64_t u, uint32_t v)
-{
- (void) do_div(u, v);
- return (u);
-}
-
-/*
- * Implementation of 64-bit unsigned division for 32-bit machines.
- *
- * First the procedure takes care of the case in which the divisor is a
- * 32-bit quantity. There are two subcases: (1) If the left half of the
- * dividend is less than the divisor, one execution of do_div() is all that
- * is required (overflow is not possible). (2) Otherwise it does two
- * divisions, using the grade school method.
- */
-uint64_t
-__udivdi3(uint64_t u, uint64_t v)
-{
- uint64_t u0, u1, v1, q0, q1, k;
- int n;
-
- if (v >> 32 == 0) { // If v < 2**32:
- if (u >> 32 < v) { // If u/v cannot overflow,
- return (__div_u64(u, v)); // just do one division.
- } else { // If u/v would overflow:
- u1 = u >> 32; // Break u into two halves.
- u0 = u & 0xFFFFFFFF;
- q1 = __div_u64(u1, v); // First quotient digit.
- k = u1 - q1 * v; // First remainder, < v.
- u0 += (k << 32);
- q0 = __div_u64(u0, v); // Seconds quotient digit.
- return ((q1 << 32) + q0);
- }
- } else { // If v >= 2**32:
- n = nlz64(v); // 0 <= n <= 31.
- v1 = (v << n) >> 32; // Normalize divisor, MSB is 1.
- u1 = u >> 1; // To ensure no overflow.
- q1 = __div_u64(u1, v1); // Get quotient from
- q0 = (q1 << n) >> 31; // Undo normalization and
- // division of u by 2.
- if (q0 != 0) // Make q0 correct or
- q0 = q0 - 1; // too small by 1.
- if ((u - q0 * v) >= v)
- q0 = q0 + 1; // Now q0 is correct.
-
- return (q0);
- }
-}
-EXPORT_SYMBOL(__udivdi3);
-
-/* BEGIN CSTYLED */
-#ifndef abs64
-#define abs64(x) ({ uint64_t t = (x) >> 63; ((x) ^ t) - t; })
-#endif
-/* END CSTYLED */
-
-/*
- * Implementation of 64-bit signed division for 32-bit machines.
- */
-int64_t
-__divdi3(int64_t u, int64_t v)
-{
- int64_t q, t;
- q = __udivdi3(abs64(u), abs64(v));
- t = (u ^ v) >> 63; // If u, v have different
- return ((q ^ t) - t); // signs, negate q.
-}
-EXPORT_SYMBOL(__divdi3);
-
-/*
- * Implementation of 64-bit unsigned modulo for 32-bit machines.
- */
-uint64_t
-__umoddi3(uint64_t dividend, uint64_t divisor)
-{
- return (dividend - (divisor * __udivdi3(dividend, divisor)));
-}
-EXPORT_SYMBOL(__umoddi3);
-
-/*
- * Implementation of 64-bit unsigned division/modulo for 32-bit machines.
- */
-uint64_t
-__udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)
-{
- uint64_t q = __udivdi3(n, d);
- if (r)
- *r = n - d * q;
- return (q);
-}
-EXPORT_SYMBOL(__udivmoddi4);
-
-/*
- * Implementation of 64-bit signed division/modulo for 32-bit machines.
- */
-int64_t
-__divmoddi4(int64_t n, int64_t d, int64_t *r)
-{
- int64_t q, rr;
- boolean_t nn = B_FALSE;
- boolean_t nd = B_FALSE;
- if (n < 0) {
- nn = B_TRUE;
- n = -n;
- }
- if (d < 0) {
- nd = B_TRUE;
- d = -d;
- }
-
- q = __udivmoddi4(n, d, (uint64_t *)&rr);
-
- if (nn != nd)
- q = -q;
- if (nn)
- rr = -rr;
- if (r)
- *r = rr;
- return (q);
-}
-EXPORT_SYMBOL(__divmoddi4);
-
-#if defined(__arm) || defined(__arm__)
-/*
- * Implementation of 64-bit (un)signed division for 32-bit arm machines.
- *
- * Run-time ABI for the ARM Architecture (page 20). A pair of (unsigned)
- * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
- * and the remainder in {r2, r3}. The return type is specifically left
- * set to 'void' to ensure the compiler does not overwrite these registers
- * during the return. All results are in registers as per ABI
- */
-void
-__aeabi_uldivmod(uint64_t u, uint64_t v)
-{
- uint64_t res;
- uint64_t mod;
-
- res = __udivdi3(u, v);
- mod = __umoddi3(u, v);
- {
- register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
- register uint32_t r1 asm("r1") = (res >> 32);
- register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
- register uint32_t r3 asm("r3") = (mod >> 32);
-
- /* BEGIN CSTYLED */
- asm volatile(""
- : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */
- : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */
- /* END CSTYLED */
-
- return; /* r0; */
- }
-}
-EXPORT_SYMBOL(__aeabi_uldivmod);
-
-void
-__aeabi_ldivmod(int64_t u, int64_t v)
-{
- int64_t res;
- uint64_t mod;
-
- res = __divdi3(u, v);
- mod = __umoddi3(u, v);
- {
- register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
- register uint32_t r1 asm("r1") = (res >> 32);
- register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
- register uint32_t r3 asm("r3") = (mod >> 32);
-
- /* BEGIN CSTYLED */
- asm volatile(""
- : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */
- : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */
- /* END CSTYLED */
-
- return; /* r0; */
- }
-}
-EXPORT_SYMBOL(__aeabi_ldivmod);
-#endif /* __arm || __arm__ */
-#endif /* BITS_PER_LONG */
-
-/*
- * NOTE: The strtoxx behavior is solely based on my reading of the Solaris
- * ddi_strtol(9F) man page. I have not verified the behavior of these
- * functions against their Solaris counterparts. It is possible that I
- * may have misinterpreted the man page or the man page is incorrect.
- */
-int ddi_strtoul(const char *, char **, int, unsigned long *);
-int ddi_strtol(const char *, char **, int, long *);
-int ddi_strtoull(const char *, char **, int, unsigned long long *);
-int ddi_strtoll(const char *, char **, int, long long *);
-
-#define define_ddi_strtoux(type, valtype) \
-int ddi_strtou##type(const char *str, char **endptr, \
- int base, valtype *result) \
-{ \
- valtype last_value, value = 0; \
- char *ptr = (char *)str; \
- int flag = 1, digit; \
- \
- if (strlen(ptr) == 0) \
- return (EINVAL); \
- \
- /* Auto-detect base based on prefix */ \
- if (!base) { \
- if (str[0] == '0') { \
- if (tolower(str[1]) == 'x' && isxdigit(str[2])) { \
- base = 16; /* hex */ \
- ptr += 2; \
- } else if (str[1] >= '0' && str[1] < 8) { \
- base = 8; /* octal */ \
- ptr += 1; \
- } else { \
- return (EINVAL); \
- } \
- } else { \
- base = 10; /* decimal */ \
- } \
- } \
- \
- while (1) { \
- if (isdigit(*ptr)) \
- digit = *ptr - '0'; \
- else if (isalpha(*ptr)) \
- digit = tolower(*ptr) - 'a' + 10; \
- else \
- break; \
- \
- if (digit >= base) \
- break; \
- \
- last_value = value; \
- value = value * base + digit; \
- if (last_value > value) /* Overflow */ \
- return (ERANGE); \
- \
- flag = 1; \
- ptr++; \
- } \
- \
- if (flag) \
- *result = value; \
- \
- if (endptr) \
- *endptr = (char *)(flag ? ptr : str); \
- \
- return (0); \
-} \
-
-#define define_ddi_strtox(type, valtype) \
-int ddi_strto##type(const char *str, char **endptr, \
- int base, valtype *result) \
-{ \
- int rc; \
- \
- if (*str == '-') { \
- rc = ddi_strtou##type(str + 1, endptr, base, result); \
- if (!rc) { \
- if (*endptr == str + 1) \
- *endptr = (char *)str; \
- else \
- *result = -*result; \
- } \
- } else { \
- rc = ddi_strtou##type(str, endptr, base, result); \
- } \
- \
- return (rc); \
-}
-
-define_ddi_strtoux(l, unsigned long)
-define_ddi_strtox(l, long)
-define_ddi_strtoux(ll, unsigned long long)
-define_ddi_strtox(ll, long long)
-
-EXPORT_SYMBOL(ddi_strtoul);
-EXPORT_SYMBOL(ddi_strtol);
-EXPORT_SYMBOL(ddi_strtoll);
-EXPORT_SYMBOL(ddi_strtoull);
-
-int
-ddi_copyin(const void *from, void *to, size_t len, int flags)
-{
- /* Fake ioctl() issued by kernel, 'from' is a kernel address */
- if (flags & FKIOCTL) {
- memcpy(to, from, len);
- return (0);
- }
-
- return (copyin(from, to, len));
-}
-EXPORT_SYMBOL(ddi_copyin);
-
-int
-ddi_copyout(const void *from, void *to, size_t len, int flags)
-{
- /* Fake ioctl() issued by kernel, 'from' is a kernel address */
- if (flags & FKIOCTL) {
- memcpy(to, from, len);
- return (0);
- }
-
- return (copyout(from, to, len));
-}
-EXPORT_SYMBOL(ddi_copyout);
-
-/*
- * Read the unique system identifier from the /etc/hostid file.
- *
- * The behavior of /usr/bin/hostid on Linux systems with the
- * regular eglibc and coreutils is:
- *
- * 1. Generate the value if the /etc/hostid file does not exist
- * or if the /etc/hostid file is less than four bytes in size.
- *
- * 2. If the /etc/hostid file is at least 4 bytes, then return
- * the first four bytes [0..3] in native endian order.
- *
- * 3. Always ignore bytes [4..] if they exist in the file.
- *
- * Only the first four bytes are significant, even on systems that
- * have a 64-bit word size.
- *
- * See:
- *
- * eglibc: sysdeps/unix/sysv/linux/gethostid.c
- * coreutils: src/hostid.c
- *
- * Notes:
- *
- * The /etc/hostid file on Solaris is a text file that often reads:
- *
- * # DO NOT EDIT
- * "0123456789"
- *
- * Directly copying this file to Linux results in a constant
- * hostid of 4f442023 because the default comment constitutes
- * the first four bytes of the file.
- *
- */
-
-char *spl_hostid_path = HW_HOSTID_PATH;
-module_param(spl_hostid_path, charp, 0444);
-MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)");
-
-static int
-hostid_read(uint32_t *hostid)
-{
- uint64_t size;
- struct _buf *file;
- uint32_t value = 0;
- int error;
-
- file = kobj_open_file(spl_hostid_path);
- if (file == (struct _buf *)-1)
- return (ENOENT);
-
- error = kobj_get_filesize(file, &size);
- if (error) {
- kobj_close_file(file);
- return (error);
- }
-
- if (size < sizeof (HW_HOSTID_MASK)) {
- kobj_close_file(file);
- return (EINVAL);
- }
-
- /*
- * Read directly into the variable like eglibc does.
- * Short reads are okay; native behavior is preserved.
- */
- error = kobj_read_file(file, (char *)&value, sizeof (value), 0);
- if (error < 0) {
- kobj_close_file(file);
- return (EIO);
- }
-
- /* Mask down to 32 bits like coreutils does. */
- *hostid = (value & HW_HOSTID_MASK);
- kobj_close_file(file);
-
- return (0);
-}
-
-/*
- * Return the system hostid. Preferentially use the spl_hostid module option
- * when set, otherwise use the value in the /etc/hostid file.
- */
-uint32_t
-zone_get_hostid(void *zone)
-{
- uint32_t hostid;
-
- ASSERT3P(zone, ==, NULL);
-
- if (spl_hostid != 0)
- return ((uint32_t)(spl_hostid & HW_HOSTID_MASK));
-
- if (hostid_read(&hostid) == 0)
- return (hostid);
-
- return (0);
-}
-EXPORT_SYMBOL(zone_get_hostid);
-
-static int
-spl_kvmem_init(void)
-{
- int rc = 0;
-
- rc = spl_kmem_init();
- if (rc)
- return (rc);
-
- rc = spl_vmem_init();
- if (rc) {
- spl_kmem_fini();
- return (rc);
- }
-
- return (rc);
-}
-
-/*
- * We initialize the random number generator with 128 bits of entropy from the
- * system random number generator. In the improbable case that we have a zero
- * seed, we fallback to the system jiffies, unless it is also zero, in which
- * situation we use a preprogrammed seed. We step forward by 2^64 iterations to
- * initialize each of the per-cpu seeds so that the sequences generated on each
- * CPU are guaranteed to never overlap in practice.
- */
-static void __init
-spl_random_init(void)
-{
- uint64_t s[2];
- int i;
-
- get_random_bytes(s, sizeof (s));
-
- if (s[0] == 0 && s[1] == 0) {
- if (jiffies != 0) {
- s[0] = jiffies;
- s[1] = ~0 - jiffies;
- } else {
- (void) memcpy(s, "improbable seed", sizeof (s));
- }
- printk("SPL: get_random_bytes() returned 0 "
- "when generating random seed. Setting initial seed to "
- "0x%016llx%016llx.\n", cpu_to_be64(s[0]),
- cpu_to_be64(s[1]));
- }
-
- for_each_possible_cpu(i) {
- uint64_t *wordp = per_cpu(spl_pseudo_entropy, i);
-
- spl_rand_jump(s);
-
- wordp[0] = s[0];
- wordp[1] = s[1];
- }
-}
-
-static void
-spl_kvmem_fini(void)
-{
- spl_vmem_fini();
- spl_kmem_fini();
-}
-
-static int __init
-spl_init(void)
-{
- int rc = 0;
-
- bzero(&p0, sizeof (proc_t));
- spl_random_init();
-
- if ((rc = spl_kvmem_init()))
- goto out1;
-
- if ((rc = spl_tsd_init()))
- goto out2;
-
- if ((rc = spl_taskq_init()))
- goto out3;
-
- if ((rc = spl_kmem_cache_init()))
- goto out4;
-
- if ((rc = spl_vn_init()))
- goto out5;
-
- if ((rc = spl_proc_init()))
- goto out6;
-
- if ((rc = spl_kstat_init()))
- goto out7;
-
- if ((rc = spl_zlib_init()))
- goto out8;
-
- return (rc);
-
-out8:
- spl_kstat_fini();
-out7:
- spl_proc_fini();
-out6:
- spl_vn_fini();
-out5:
- spl_kmem_cache_fini();
-out4:
- spl_taskq_fini();
-out3:
- spl_tsd_fini();
-out2:
- spl_kvmem_fini();
-out1:
- return (rc);
-}
-
-static void __exit
-spl_fini(void)
-{
- spl_zlib_fini();
- spl_kstat_fini();
- spl_proc_fini();
- spl_vn_fini();
- spl_kmem_cache_fini();
- spl_taskq_fini();
- spl_tsd_fini();
- spl_kvmem_fini();
-}
-
-module_init(spl_init);
-module_exit(spl_fini);
-
-MODULE_DESCRIPTION("Solaris Porting Layer");
-MODULE_AUTHOR(ZFS_META_AUTHOR);
-MODULE_LICENSE("GPL");
-MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c
deleted file mode 100644
index b39867b03..000000000
--- a/module/spl/spl-kmem-cache.c
+++ /dev/null
@@ -1,1780 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <sys/kmem.h>
-#include <sys/kmem_cache.h>
-#include <sys/shrinker.h>
-#include <sys/taskq.h>
-#include <sys/timer.h>
-#include <sys/vmem.h>
-#include <sys/wait.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/prefetch.h>
-
-/*
- * Within the scope of spl-kmem.c file the kmem_cache_* definitions
- * are removed to allow access to the real Linux slab allocator.
- */
-#undef kmem_cache_destroy
-#undef kmem_cache_create
-#undef kmem_cache_alloc
-#undef kmem_cache_free
-
-
-/*
- * Linux 3.16 replaced smp_mb__{before,after}_{atomic,clear}_{dec,inc,bit}()
- * with smp_mb__{before,after}_atomic() because they were redundant. This is
- * only used inside our SLAB allocator, so we implement an internal wrapper
- * here to give us smp_mb__{before,after}_atomic() on older kernels.
- */
-#ifndef smp_mb__before_atomic
-#define smp_mb__before_atomic(x) smp_mb__before_clear_bit(x)
-#endif
-
-#ifndef smp_mb__after_atomic
-#define smp_mb__after_atomic(x) smp_mb__after_clear_bit(x)
-#endif
-
-/*
- * Cache expiration was implemented because it was part of the default Solaris
- * kmem_cache behavior. The idea is that per-cpu objects which haven't been
- * accessed in several seconds should be returned to the cache. On the other
- * hand Linux slabs never move objects back to the slabs unless there is
- * memory pressure on the system. By default the Linux method is enabled
- * because it has been shown to improve responsiveness on low memory systems.
- * This policy may be changed by setting KMC_EXPIRE_AGE or KMC_EXPIRE_MEM.
- */
-/* BEGIN CSTYLED */
-unsigned int spl_kmem_cache_expire = KMC_EXPIRE_MEM;
-EXPORT_SYMBOL(spl_kmem_cache_expire);
-module_param(spl_kmem_cache_expire, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_expire, "By age (0x1) or low memory (0x2)");
-
-/*
- * Cache magazines are an optimization designed to minimize the cost of
- * allocating memory. They do this by keeping a per-cpu cache of recently
- * freed objects, which can then be reallocated without taking a lock. This
- * can improve performance on highly contended caches. However, because
- * objects in magazines will prevent otherwise empty slabs from being
- * immediately released this may not be ideal for low memory machines.
- *
- * For this reason spl_kmem_cache_magazine_size can be used to set a maximum
- * magazine size. When this value is set to 0 the magazine size will be
- * automatically determined based on the object size. Otherwise magazines
- * will be limited to 2-256 objects per magazine (i.e per cpu). Magazines
- * may never be entirely disabled in this implementation.
- */
-unsigned int spl_kmem_cache_magazine_size = 0;
-module_param(spl_kmem_cache_magazine_size, uint, 0444);
-MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
- "Default magazine size (2-256), set automatically (0)");
-
-/*
- * The default behavior is to report the number of objects remaining in the
- * cache. This allows the Linux VM to repeatedly reclaim objects from the
- * cache when memory is low satisfy other memory allocations. Alternately,
- * setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
- * is reclaimed. This may increase the likelihood of out of memory events.
- */
-unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
-module_param(spl_kmem_cache_reclaim, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
-
-unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
-module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
-
-unsigned int spl_kmem_cache_obj_per_slab_min = SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN;
-module_param(spl_kmem_cache_obj_per_slab_min, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab_min,
- "Minimal number of objects per slab");
-
-unsigned int spl_kmem_cache_max_size = SPL_KMEM_CACHE_MAX_SIZE;
-module_param(spl_kmem_cache_max_size, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB");
-
-/*
- * For small objects the Linux slab allocator should be used to make the most
- * efficient use of the memory. However, large objects are not supported by
- * the Linux slab and therefore the SPL implementation is preferred. A cutoff
- * of 16K was determined to be optimal for architectures using 4K pages.
- */
-#if PAGE_SIZE == 4096
-unsigned int spl_kmem_cache_slab_limit = 16384;
-#else
-unsigned int spl_kmem_cache_slab_limit = 0;
-#endif
-module_param(spl_kmem_cache_slab_limit, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_slab_limit,
- "Objects less than N bytes use the Linux slab");
-
-/*
- * This value defaults to a threshold designed to avoid allocations which
- * have been deemed costly by the kernel.
- */
-unsigned int spl_kmem_cache_kmem_limit =
- ((1 << (PAGE_ALLOC_COSTLY_ORDER - 1)) * PAGE_SIZE) /
- SPL_KMEM_CACHE_OBJ_PER_SLAB;
-module_param(spl_kmem_cache_kmem_limit, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_kmem_limit,
- "Objects less than N bytes use the kmalloc");
-
-/*
- * The number of threads available to allocate new slabs for caches. This
- * should not need to be tuned but it is available for performance analysis.
- */
-unsigned int spl_kmem_cache_kmem_threads = 4;
-module_param(spl_kmem_cache_kmem_threads, uint, 0444);
-MODULE_PARM_DESC(spl_kmem_cache_kmem_threads,
- "Number of spl_kmem_cache threads");
-/* END CSTYLED */
-
-/*
- * Slab allocation interfaces
- *
- * While the Linux slab implementation was inspired by the Solaris
- * implementation I cannot use it to emulate the Solaris APIs. I
- * require two features which are not provided by the Linux slab.
- *
- * 1) Constructors AND destructors. Recent versions of the Linux
- * kernel have removed support for destructors. This is a deal
- * breaker for the SPL which contains particularly expensive
- * initializers for mutex's, condition variables, etc. We also
- * require a minimal level of cleanup for these data types unlike
- * many Linux data types which do need to be explicitly destroyed.
- *
- * 2) Virtual address space backed slab. Callers of the Solaris slab
- * expect it to work well for both small are very large allocations.
- * Because of memory fragmentation the Linux slab which is backed
- * by kmalloc'ed memory performs very badly when confronted with
- * large numbers of large allocations. Basing the slab on the
- * virtual address space removes the need for contiguous pages
- * and greatly improve performance for large allocations.
- *
- * For these reasons, the SPL has its own slab implementation with
- * the needed features. It is not as highly optimized as either the
- * Solaris or Linux slabs, but it should get me most of what is
- * needed until it can be optimized or obsoleted by another approach.
- *
- * One serious concern I do have about this method is the relatively
- * small virtual address space on 32bit arches. This will seriously
- * constrain the size of the slab caches and their performance.
- */
-
-struct list_head spl_kmem_cache_list; /* List of caches */
-struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
-taskq_t *spl_kmem_cache_taskq; /* Task queue for aging / reclaim */
-
-static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj);
-
-SPL_SHRINKER_CALLBACK_FWD_DECLARE(spl_kmem_cache_generic_shrinker);
-SPL_SHRINKER_DECLARE(spl_kmem_cache_shrinker,
- spl_kmem_cache_generic_shrinker, KMC_DEFAULT_SEEKS);
-
-static void *
-kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
-{
- gfp_t lflags = kmem_flags_convert(flags);
- void *ptr;
-
- if (skc->skc_flags & KMC_KMEM) {
- ASSERT(ISP2(size));
- ptr = (void *)__get_free_pages(lflags, get_order(size));
- } else {
- ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL);
- }
-
- /* Resulting allocated memory will be page aligned */
- ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
-
- return (ptr);
-}
-
-static void
-kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
-{
- ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
-
- /*
- * The Linux direct reclaim path uses this out of band value to
- * determine if forward progress is being made. Normally this is
- * incremented by kmem_freepages() which is part of the various
- * Linux slab implementations. However, since we are using none
- * of that infrastructure we are responsible for incrementing it.
- */
- if (current->reclaim_state)
- current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
-
- if (skc->skc_flags & KMC_KMEM) {
- ASSERT(ISP2(size));
- free_pages((unsigned long)ptr, get_order(size));
- } else {
- vfree(ptr);
- }
-}
-
-/*
- * Required space for each aligned sks.
- */
-static inline uint32_t
-spl_sks_size(spl_kmem_cache_t *skc)
-{
- return (P2ROUNDUP_TYPED(sizeof (spl_kmem_slab_t),
- skc->skc_obj_align, uint32_t));
-}
-
-/*
- * Required space for each aligned object.
- */
-static inline uint32_t
-spl_obj_size(spl_kmem_cache_t *skc)
-{
- uint32_t align = skc->skc_obj_align;
-
- return (P2ROUNDUP_TYPED(skc->skc_obj_size, align, uint32_t) +
- P2ROUNDUP_TYPED(sizeof (spl_kmem_obj_t), align, uint32_t));
-}
-
-/*
- * Lookup the spl_kmem_object_t for an object given that object.
- */
-static inline spl_kmem_obj_t *
-spl_sko_from_obj(spl_kmem_cache_t *skc, void *obj)
-{
- return (obj + P2ROUNDUP_TYPED(skc->skc_obj_size,
- skc->skc_obj_align, uint32_t));
-}
-
-/*
- * Required space for each offslab object taking in to account alignment
- * restrictions and the power-of-two requirement of kv_alloc().
- */
-static inline uint32_t
-spl_offslab_size(spl_kmem_cache_t *skc)
-{
- return (1UL << (fls64(spl_obj_size(skc)) + 1));
-}
-
-/*
- * It's important that we pack the spl_kmem_obj_t structure and the
- * actual objects in to one large address space to minimize the number
- * of calls to the allocator. It is far better to do a few large
- * allocations and then subdivide it ourselves. Now which allocator
- * we use requires balancing a few trade offs.
- *
- * For small objects we use kmem_alloc() because as long as you are
- * only requesting a small number of pages (ideally just one) its cheap.
- * However, when you start requesting multiple pages with kmem_alloc()
- * it gets increasingly expensive since it requires contiguous pages.
- * For this reason we shift to vmem_alloc() for slabs of large objects
- * which removes the need for contiguous pages. We do not use
- * vmem_alloc() in all cases because there is significant locking
- * overhead in __get_vm_area_node(). This function takes a single
- * global lock when acquiring an available virtual address range which
- * serializes all vmem_alloc()'s for all slab caches. Using slightly
- * different allocation functions for small and large objects should
- * give us the best of both worlds.
- *
- * KMC_ONSLAB KMC_OFFSLAB
- *
- * +------------------------+ +-----------------+
- * | spl_kmem_slab_t --+-+ | | spl_kmem_slab_t |---+-+
- * | skc_obj_size <-+ | | +-----------------+ | |
- * | spl_kmem_obj_t | | | |
- * | skc_obj_size <---+ | +-----------------+ | |
- * | spl_kmem_obj_t | | | skc_obj_size | <-+ |
- * | ... v | | spl_kmem_obj_t | |
- * +------------------------+ +-----------------+ v
- */
-static spl_kmem_slab_t *
-spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
-{
- spl_kmem_slab_t *sks;
- spl_kmem_obj_t *sko, *n;
- void *base, *obj;
- uint32_t obj_size, offslab_size = 0;
- int i, rc = 0;
-
- base = kv_alloc(skc, skc->skc_slab_size, flags);
- if (base == NULL)
- return (NULL);
-
- sks = (spl_kmem_slab_t *)base;
- sks->sks_magic = SKS_MAGIC;
- sks->sks_objs = skc->skc_slab_objs;
- sks->sks_age = jiffies;
- sks->sks_cache = skc;
- INIT_LIST_HEAD(&sks->sks_list);
- INIT_LIST_HEAD(&sks->sks_free_list);
- sks->sks_ref = 0;
- obj_size = spl_obj_size(skc);
-
- if (skc->skc_flags & KMC_OFFSLAB)
- offslab_size = spl_offslab_size(skc);
-
- for (i = 0; i < sks->sks_objs; i++) {
- if (skc->skc_flags & KMC_OFFSLAB) {
- obj = kv_alloc(skc, offslab_size, flags);
- if (!obj) {
- rc = -ENOMEM;
- goto out;
- }
- } else {
- obj = base + spl_sks_size(skc) + (i * obj_size);
- }
-
- ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
- sko = spl_sko_from_obj(skc, obj);
- sko->sko_addr = obj;
- sko->sko_magic = SKO_MAGIC;
- sko->sko_slab = sks;
- INIT_LIST_HEAD(&sko->sko_list);
- list_add_tail(&sko->sko_list, &sks->sks_free_list);
- }
-
-out:
- if (rc) {
- if (skc->skc_flags & KMC_OFFSLAB)
- list_for_each_entry_safe(sko,
- n, &sks->sks_free_list, sko_list) {
- kv_free(skc, sko->sko_addr, offslab_size);
- }
-
- kv_free(skc, base, skc->skc_slab_size);
- sks = NULL;
- }
-
- return (sks);
-}
-
-/*
- * Remove a slab from complete or partial list, it must be called with
- * the 'skc->skc_lock' held but the actual free must be performed
- * outside the lock to prevent deadlocking on vmem addresses.
- */
-static void
-spl_slab_free(spl_kmem_slab_t *sks,
- struct list_head *sks_list, struct list_head *sko_list)
-{
- spl_kmem_cache_t *skc;
-
- ASSERT(sks->sks_magic == SKS_MAGIC);
- ASSERT(sks->sks_ref == 0);
-
- skc = sks->sks_cache;
- ASSERT(skc->skc_magic == SKC_MAGIC);
-
- /*
- * Update slab/objects counters in the cache, then remove the
- * slab from the skc->skc_partial_list. Finally add the slab
- * and all its objects in to the private work lists where the
- * destructors will be called and the memory freed to the system.
- */
- skc->skc_obj_total -= sks->sks_objs;
- skc->skc_slab_total--;
- list_del(&sks->sks_list);
- list_add(&sks->sks_list, sks_list);
- list_splice_init(&sks->sks_free_list, sko_list);
-}
-
-/*
- * Reclaim empty slabs at the end of the partial list.
- */
-static void
-spl_slab_reclaim(spl_kmem_cache_t *skc)
-{
- spl_kmem_slab_t *sks, *m;
- spl_kmem_obj_t *sko, *n;
- LIST_HEAD(sks_list);
- LIST_HEAD(sko_list);
- uint32_t size = 0;
-
- /*
- * Empty slabs and objects must be moved to a private list so they
- * can be safely freed outside the spin lock. All empty slabs are
- * at the end of skc->skc_partial_list, therefore once a non-empty
- * slab is found we can stop scanning.
- */
- spin_lock(&skc->skc_lock);
- list_for_each_entry_safe_reverse(sks, m,
- &skc->skc_partial_list, sks_list) {
-
- if (sks->sks_ref > 0)
- break;
-
- spl_slab_free(sks, &sks_list, &sko_list);
- }
- spin_unlock(&skc->skc_lock);
-
- /*
- * The following two loops ensure all the object destructors are
- * run, any offslab objects are freed, and the slabs themselves
- * are freed. This is all done outside the skc->skc_lock since
- * this allows the destructor to sleep, and allows us to perform
- * a conditional reschedule when a freeing a large number of
- * objects and slabs back to the system.
- */
- if (skc->skc_flags & KMC_OFFSLAB)
- size = spl_offslab_size(skc);
-
- list_for_each_entry_safe(sko, n, &sko_list, sko_list) {
- ASSERT(sko->sko_magic == SKO_MAGIC);
-
- if (skc->skc_flags & KMC_OFFSLAB)
- kv_free(skc, sko->sko_addr, size);
- }
-
- list_for_each_entry_safe(sks, m, &sks_list, sks_list) {
- ASSERT(sks->sks_magic == SKS_MAGIC);
- kv_free(skc, sks, skc->skc_slab_size);
- }
-}
-
-static spl_kmem_emergency_t *
-spl_emergency_search(struct rb_root *root, void *obj)
-{
- struct rb_node *node = root->rb_node;
- spl_kmem_emergency_t *ske;
- unsigned long address = (unsigned long)obj;
-
- while (node) {
- ske = container_of(node, spl_kmem_emergency_t, ske_node);
-
- if (address < ske->ske_obj)
- node = node->rb_left;
- else if (address > ske->ske_obj)
- node = node->rb_right;
- else
- return (ske);
- }
-
- return (NULL);
-}
-
-static int
-spl_emergency_insert(struct rb_root *root, spl_kmem_emergency_t *ske)
-{
- struct rb_node **new = &(root->rb_node), *parent = NULL;
- spl_kmem_emergency_t *ske_tmp;
- unsigned long address = ske->ske_obj;
-
- while (*new) {
- ske_tmp = container_of(*new, spl_kmem_emergency_t, ske_node);
-
- parent = *new;
- if (address < ske_tmp->ske_obj)
- new = &((*new)->rb_left);
- else if (address > ske_tmp->ske_obj)
- new = &((*new)->rb_right);
- else
- return (0);
- }
-
- rb_link_node(&ske->ske_node, parent, new);
- rb_insert_color(&ske->ske_node, root);
-
- return (1);
-}
-
-/*
- * Allocate a single emergency object and track it in a red black tree.
- */
-static int
-spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
-{
- gfp_t lflags = kmem_flags_convert(flags);
- spl_kmem_emergency_t *ske;
- int order = get_order(skc->skc_obj_size);
- int empty;
-
- /* Last chance use a partial slab if one now exists */
- spin_lock(&skc->skc_lock);
- empty = list_empty(&skc->skc_partial_list);
- spin_unlock(&skc->skc_lock);
- if (!empty)
- return (-EEXIST);
-
- ske = kmalloc(sizeof (*ske), lflags);
- if (ske == NULL)
- return (-ENOMEM);
-
- ske->ske_obj = __get_free_pages(lflags, order);
- if (ske->ske_obj == 0) {
- kfree(ske);
- return (-ENOMEM);
- }
-
- spin_lock(&skc->skc_lock);
- empty = spl_emergency_insert(&skc->skc_emergency_tree, ske);
- if (likely(empty)) {
- skc->skc_obj_total++;
- skc->skc_obj_emergency++;
- if (skc->skc_obj_emergency > skc->skc_obj_emergency_max)
- skc->skc_obj_emergency_max = skc->skc_obj_emergency;
- }
- spin_unlock(&skc->skc_lock);
-
- if (unlikely(!empty)) {
- free_pages(ske->ske_obj, order);
- kfree(ske);
- return (-EINVAL);
- }
-
- *obj = (void *)ske->ske_obj;
-
- return (0);
-}
-
-/*
- * Locate the passed object in the red black tree and free it.
- */
-static int
-spl_emergency_free(spl_kmem_cache_t *skc, void *obj)
-{
- spl_kmem_emergency_t *ske;
- int order = get_order(skc->skc_obj_size);
-
- spin_lock(&skc->skc_lock);
- ske = spl_emergency_search(&skc->skc_emergency_tree, obj);
- if (ske) {
- rb_erase(&ske->ske_node, &skc->skc_emergency_tree);
- skc->skc_obj_emergency--;
- skc->skc_obj_total--;
- }
- spin_unlock(&skc->skc_lock);
-
- if (ske == NULL)
- return (-ENOENT);
-
- free_pages(ske->ske_obj, order);
- kfree(ske);
-
- return (0);
-}
-
-/*
- * Release objects from the per-cpu magazine back to their slab. The flush
- * argument contains the max number of entries to remove from the magazine.
- */
-static void
-__spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
-{
- int i, count = MIN(flush, skm->skm_avail);
-
- ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT(skm->skm_magic == SKM_MAGIC);
-
- for (i = 0; i < count; i++)
- spl_cache_shrink(skc, skm->skm_objs[i]);
-
- skm->skm_avail -= count;
- memmove(skm->skm_objs, &(skm->skm_objs[count]),
- sizeof (void *) * skm->skm_avail);
-}
-
-static void
-spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
-{
- spin_lock(&skc->skc_lock);
- __spl_cache_flush(skc, skm, flush);
- spin_unlock(&skc->skc_lock);
-}
-
-static void
-spl_magazine_age(void *data)
-{
- spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data;
- spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
-
- ASSERT(skm->skm_magic == SKM_MAGIC);
- ASSERT(skm->skm_cpu == smp_processor_id());
- ASSERT(irqs_disabled());
-
- /* There are no available objects or they are too young to age out */
- if ((skm->skm_avail == 0) ||
- time_before(jiffies, skm->skm_age + skc->skc_delay * HZ))
- return;
-
- /*
- * Because we're executing in interrupt context we may have
- * interrupted the holder of this lock. To avoid a potential
- * deadlock return if the lock is contended.
- */
- if (!spin_trylock(&skc->skc_lock))
- return;
-
- __spl_cache_flush(skc, skm, skm->skm_refill);
- spin_unlock(&skc->skc_lock);
-}
-
-/*
- * Called regularly to keep a downward pressure on the cache.
- *
- * Objects older than skc->skc_delay seconds in the per-cpu magazines will
- * be returned to the caches. This is done to prevent idle magazines from
- * holding memory which could be better used elsewhere. The delay is
- * present to prevent thrashing the magazine.
- *
- * The newly released objects may result in empty partial slabs. Those
- * slabs should be released to the system. Otherwise moving the objects
- * out of the magazines is just wasted work.
- */
-static void
-spl_cache_age(void *data)
-{
- spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data;
- taskqid_t id = 0;
-
- ASSERT(skc->skc_magic == SKC_MAGIC);
-
- /* Dynamically disabled at run time */
- if (!(spl_kmem_cache_expire & KMC_EXPIRE_AGE))
- return;
-
- atomic_inc(&skc->skc_ref);
-
- if (!(skc->skc_flags & KMC_NOMAGAZINE))
- on_each_cpu(spl_magazine_age, skc, 1);
-
- spl_slab_reclaim(skc);
-
- while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) {
- id = taskq_dispatch_delay(
- spl_kmem_cache_taskq, spl_cache_age, skc, TQ_SLEEP,
- ddi_get_lbolt() + skc->skc_delay / 3 * HZ);
-
- /* Destroy issued after dispatch immediately cancel it */
- if (test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && id)
- taskq_cancel_id(spl_kmem_cache_taskq, id);
- }
-
- spin_lock(&skc->skc_lock);
- skc->skc_taskqid = id;
- spin_unlock(&skc->skc_lock);
-
- atomic_dec(&skc->skc_ref);
-}
-
-/*
- * Size a slab based on the size of each aligned object plus spl_kmem_obj_t.
- * When on-slab we want to target spl_kmem_cache_obj_per_slab. However,
- * for very small objects we may end up with more than this so as not
- * to waste space in the minimal allocation of a single page. Also for
- * very large objects we may use as few as spl_kmem_cache_obj_per_slab_min,
- * lower than this and we will fail.
- */
-static int
-spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size)
-{
- uint32_t sks_size, obj_size, max_size, tgt_size, tgt_objs;
-
- if (skc->skc_flags & KMC_OFFSLAB) {
- tgt_objs = spl_kmem_cache_obj_per_slab;
- tgt_size = P2ROUNDUP(sizeof (spl_kmem_slab_t), PAGE_SIZE);
-
- if ((skc->skc_flags & KMC_KMEM) &&
- (spl_obj_size(skc) > (SPL_MAX_ORDER_NR_PAGES * PAGE_SIZE)))
- return (-ENOSPC);
- } else {
- sks_size = spl_sks_size(skc);
- obj_size = spl_obj_size(skc);
- max_size = (spl_kmem_cache_max_size * 1024 * 1024);
- tgt_size = (spl_kmem_cache_obj_per_slab * obj_size + sks_size);
-
- /*
- * KMC_KMEM slabs are allocated by __get_free_pages() which
- * rounds up to the nearest order. Knowing this the size
- * should be rounded up to the next power of two with a hard
- * maximum defined by the maximum allowed allocation order.
- */
- if (skc->skc_flags & KMC_KMEM) {
- max_size = SPL_MAX_ORDER_NR_PAGES * PAGE_SIZE;
- tgt_size = MIN(max_size,
- PAGE_SIZE * (1 << MAX(get_order(tgt_size) - 1, 1)));
- }
-
- if (tgt_size <= max_size) {
- tgt_objs = (tgt_size - sks_size) / obj_size;
- } else {
- tgt_objs = (max_size - sks_size) / obj_size;
- tgt_size = (tgt_objs * obj_size) + sks_size;
- }
- }
-
- if (tgt_objs == 0)
- return (-ENOSPC);
-
- *objs = tgt_objs;
- *size = tgt_size;
-
- return (0);
-}
-
-/*
- * Make a guess at reasonable per-cpu magazine size based on the size of
- * each object and the cost of caching N of them in each magazine. Long
- * term this should really adapt based on an observed usage heuristic.
- */
-static int
-spl_magazine_size(spl_kmem_cache_t *skc)
-{
- uint32_t obj_size = spl_obj_size(skc);
- int size;
-
- if (spl_kmem_cache_magazine_size > 0)
- return (MAX(MIN(spl_kmem_cache_magazine_size, 256), 2));
-
- /* Per-magazine sizes below assume a 4Kib page size */
- if (obj_size > (PAGE_SIZE * 256))
- size = 4; /* Minimum 4Mib per-magazine */
- else if (obj_size > (PAGE_SIZE * 32))
- size = 16; /* Minimum 2Mib per-magazine */
- else if (obj_size > (PAGE_SIZE))
- size = 64; /* Minimum 256Kib per-magazine */
- else if (obj_size > (PAGE_SIZE / 4))
- size = 128; /* Minimum 128Kib per-magazine */
- else
- size = 256;
-
- return (size);
-}
-
-/*
- * Allocate a per-cpu magazine to associate with a specific core.
- */
-static spl_kmem_magazine_t *
-spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu)
-{
- spl_kmem_magazine_t *skm;
- int size = sizeof (spl_kmem_magazine_t) +
- sizeof (void *) * skc->skc_mag_size;
-
- skm = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
- if (skm) {
- skm->skm_magic = SKM_MAGIC;
- skm->skm_avail = 0;
- skm->skm_size = skc->skc_mag_size;
- skm->skm_refill = skc->skc_mag_refill;
- skm->skm_cache = skc;
- skm->skm_age = jiffies;
- skm->skm_cpu = cpu;
- }
-
- return (skm);
-}
-
-/*
- * Free a per-cpu magazine associated with a specific core.
- */
-static void
-spl_magazine_free(spl_kmem_magazine_t *skm)
-{
- ASSERT(skm->skm_magic == SKM_MAGIC);
- ASSERT(skm->skm_avail == 0);
- kfree(skm);
-}
-
-/*
- * Create all pre-cpu magazines of reasonable sizes.
- */
-static int
-spl_magazine_create(spl_kmem_cache_t *skc)
-{
- int i;
-
- if (skc->skc_flags & KMC_NOMAGAZINE)
- return (0);
-
- skc->skc_mag = kzalloc(sizeof (spl_kmem_magazine_t *) *
- num_possible_cpus(), kmem_flags_convert(KM_SLEEP));
- skc->skc_mag_size = spl_magazine_size(skc);
- skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
-
- for_each_possible_cpu(i) {
- skc->skc_mag[i] = spl_magazine_alloc(skc, i);
- if (!skc->skc_mag[i]) {
- for (i--; i >= 0; i--)
- spl_magazine_free(skc->skc_mag[i]);
-
- kfree(skc->skc_mag);
- return (-ENOMEM);
- }
- }
-
- return (0);
-}
-
-/*
- * Destroy all pre-cpu magazines.
- */
-static void
-spl_magazine_destroy(spl_kmem_cache_t *skc)
-{
- spl_kmem_magazine_t *skm;
- int i;
-
- if (skc->skc_flags & KMC_NOMAGAZINE)
- return;
-
- for_each_possible_cpu(i) {
- skm = skc->skc_mag[i];
- spl_cache_flush(skc, skm, skm->skm_avail);
- spl_magazine_free(skm);
- }
-
- kfree(skc->skc_mag);
-}
-
-/*
- * Create a object cache based on the following arguments:
- * name cache name
- * size cache object size
- * align cache object alignment
- * ctor cache object constructor
- * dtor cache object destructor
- * reclaim cache object reclaim
- * priv cache private data for ctor/dtor/reclaim
- * vmp unused must be NULL
- * flags
- * KMC_KMEM Force SPL kmem backed cache
- * KMC_VMEM Force SPL vmem backed cache
- * KMC_SLAB Force Linux slab backed cache
- * KMC_OFFSLAB Locate objects off the slab
- * KMC_NOTOUCH unsupported
- * KMC_NODEBUG unsupported
- * KMC_NOHASH unsupported
- * KMC_QCACHE unsupported
- * KMC_NOMAGAZINE unsupported
- */
-spl_kmem_cache_t *
-spl_kmem_cache_create(char *name, size_t size, size_t align,
- spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, spl_kmem_reclaim_t reclaim,
- void *priv, void *vmp, int flags)
-{
- gfp_t lflags = kmem_flags_convert(KM_SLEEP);
- spl_kmem_cache_t *skc;
- int rc;
-
- /*
- * Unsupported flags
- */
- ASSERT0(flags & KMC_NOMAGAZINE);
- ASSERT0(flags & KMC_NOHASH);
- ASSERT0(flags & KMC_QCACHE);
- ASSERT(vmp == NULL);
-
- might_sleep();
-
- skc = kzalloc(sizeof (*skc), lflags);
- if (skc == NULL)
- return (NULL);
-
- skc->skc_magic = SKC_MAGIC;
- skc->skc_name_size = strlen(name) + 1;
- skc->skc_name = (char *)kmalloc(skc->skc_name_size, lflags);
- if (skc->skc_name == NULL) {
- kfree(skc);
- return (NULL);
- }
- strncpy(skc->skc_name, name, skc->skc_name_size);
-
- skc->skc_ctor = ctor;
- skc->skc_dtor = dtor;
- skc->skc_reclaim = reclaim;
- skc->skc_private = priv;
- skc->skc_vmp = vmp;
- skc->skc_linux_cache = NULL;
- skc->skc_flags = flags;
- skc->skc_obj_size = size;
- skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
- skc->skc_delay = SPL_KMEM_CACHE_DELAY;
- skc->skc_reap = SPL_KMEM_CACHE_REAP;
- atomic_set(&skc->skc_ref, 0);
-
- INIT_LIST_HEAD(&skc->skc_list);
- INIT_LIST_HEAD(&skc->skc_complete_list);
- INIT_LIST_HEAD(&skc->skc_partial_list);
- skc->skc_emergency_tree = RB_ROOT;
- spin_lock_init(&skc->skc_lock);
- init_waitqueue_head(&skc->skc_waitq);
- skc->skc_slab_fail = 0;
- skc->skc_slab_create = 0;
- skc->skc_slab_destroy = 0;
- skc->skc_slab_total = 0;
- skc->skc_slab_alloc = 0;
- skc->skc_slab_max = 0;
- skc->skc_obj_total = 0;
- skc->skc_obj_alloc = 0;
- skc->skc_obj_max = 0;
- skc->skc_obj_deadlock = 0;
- skc->skc_obj_emergency = 0;
- skc->skc_obj_emergency_max = 0;
-
- /*
- * Verify the requested alignment restriction is sane.
- */
- if (align) {
- VERIFY(ISP2(align));
- VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN);
- VERIFY3U(align, <=, PAGE_SIZE);
- skc->skc_obj_align = align;
- }
-
- /*
- * When no specific type of slab is requested (kmem, vmem, or
- * linuxslab) then select a cache type based on the object size
- * and default tunables.
- */
- if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB))) {
-
- if (spl_kmem_cache_slab_limit &&
- size <= (size_t)spl_kmem_cache_slab_limit) {
- /*
- * Objects smaller than spl_kmem_cache_slab_limit can
- * use the Linux slab for better space-efficiency.
- */
- skc->skc_flags |= KMC_SLAB;
- } else if (spl_obj_size(skc) <= spl_kmem_cache_kmem_limit) {
- /*
- * Small objects, less than spl_kmem_cache_kmem_limit
- * per object should use kmem because their slabs are
- * small.
- */
- skc->skc_flags |= KMC_KMEM;
- } else {
- /*
- * All other objects are considered large and are
- * placed on vmem backed slabs.
- */
- skc->skc_flags |= KMC_VMEM;
- }
- }
-
- /*
- * Given the type of slab allocate the required resources.
- */
- if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
- rc = spl_slab_size(skc,
- &skc->skc_slab_objs, &skc->skc_slab_size);
- if (rc)
- goto out;
-
- rc = spl_magazine_create(skc);
- if (rc)
- goto out;
- } else {
- unsigned long slabflags = 0;
-
- if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) {
- rc = EINVAL;
- goto out;
- }
-
-#if defined(SLAB_USERCOPY)
- /*
- * Required for PAX-enabled kernels if the slab is to be
- * used for copying between user and kernel space.
- */
- slabflags |= SLAB_USERCOPY;
-#endif
-
-#if defined(HAVE_KMEM_CACHE_CREATE_USERCOPY)
- /*
- * Newer grsec patchset uses kmem_cache_create_usercopy()
- * instead of SLAB_USERCOPY flag
- */
- skc->skc_linux_cache = kmem_cache_create_usercopy(
- skc->skc_name, size, align, slabflags, 0, size, NULL);
-#else
- skc->skc_linux_cache = kmem_cache_create(
- skc->skc_name, size, align, slabflags, NULL);
-#endif
- if (skc->skc_linux_cache == NULL) {
- rc = ENOMEM;
- goto out;
- }
-
-#if defined(HAVE_KMEM_CACHE_ALLOCFLAGS)
- skc->skc_linux_cache->allocflags |= __GFP_COMP;
-#elif defined(HAVE_KMEM_CACHE_GFPFLAGS)
- skc->skc_linux_cache->gfpflags |= __GFP_COMP;
-#endif
- skc->skc_flags |= KMC_NOMAGAZINE;
- }
-
- if (spl_kmem_cache_expire & KMC_EXPIRE_AGE) {
- skc->skc_taskqid = taskq_dispatch_delay(spl_kmem_cache_taskq,
- spl_cache_age, skc, TQ_SLEEP,
- ddi_get_lbolt() + skc->skc_delay / 3 * HZ);
- }
-
- down_write(&spl_kmem_cache_sem);
- list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
- up_write(&spl_kmem_cache_sem);
-
- return (skc);
-out:
- kfree(skc->skc_name);
- kfree(skc);
- return (NULL);
-}
-EXPORT_SYMBOL(spl_kmem_cache_create);
-
-/*
- * Register a move callback for cache defragmentation.
- * XXX: Unimplemented but harmless to stub out for now.
- */
-void
-spl_kmem_cache_set_move(spl_kmem_cache_t *skc,
- kmem_cbrc_t (move)(void *, void *, size_t, void *))
-{
- ASSERT(move != NULL);
-}
-EXPORT_SYMBOL(spl_kmem_cache_set_move);
-
-/*
- * Destroy a cache and all objects associated with the cache.
- */
-void
-spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
-{
- DECLARE_WAIT_QUEUE_HEAD(wq);
- taskqid_t id;
-
- ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB));
-
- down_write(&spl_kmem_cache_sem);
- list_del_init(&skc->skc_list);
- up_write(&spl_kmem_cache_sem);
-
- /* Cancel any and wait for any pending delayed tasks */
- VERIFY(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
-
- spin_lock(&skc->skc_lock);
- id = skc->skc_taskqid;
- spin_unlock(&skc->skc_lock);
-
- taskq_cancel_id(spl_kmem_cache_taskq, id);
-
- /*
- * Wait until all current callers complete, this is mainly
- * to catch the case where a low memory situation triggers a
- * cache reaping action which races with this destroy.
- */
- wait_event(wq, atomic_read(&skc->skc_ref) == 0);
-
- if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
- spl_magazine_destroy(skc);
- spl_slab_reclaim(skc);
- } else {
- ASSERT(skc->skc_flags & KMC_SLAB);
- kmem_cache_destroy(skc->skc_linux_cache);
- }
-
- spin_lock(&skc->skc_lock);
-
- /*
- * Validate there are no objects in use and free all the
- * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers.
- */
- ASSERT3U(skc->skc_slab_alloc, ==, 0);
- ASSERT3U(skc->skc_obj_alloc, ==, 0);
- ASSERT3U(skc->skc_slab_total, ==, 0);
- ASSERT3U(skc->skc_obj_total, ==, 0);
- ASSERT3U(skc->skc_obj_emergency, ==, 0);
- ASSERT(list_empty(&skc->skc_complete_list));
-
- spin_unlock(&skc->skc_lock);
-
- kfree(skc->skc_name);
- kfree(skc);
-}
-EXPORT_SYMBOL(spl_kmem_cache_destroy);
-
-/*
- * Allocate an object from a slab attached to the cache. This is used to
- * repopulate the per-cpu magazine caches in batches when they run low.
- */
-static void *
-spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
-{
- spl_kmem_obj_t *sko;
-
- ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT(sks->sks_magic == SKS_MAGIC);
-
- sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
- ASSERT(sko->sko_magic == SKO_MAGIC);
- ASSERT(sko->sko_addr != NULL);
-
- /* Remove from sks_free_list */
- list_del_init(&sko->sko_list);
-
- sks->sks_age = jiffies;
- sks->sks_ref++;
- skc->skc_obj_alloc++;
-
- /* Track max obj usage statistics */
- if (skc->skc_obj_alloc > skc->skc_obj_max)
- skc->skc_obj_max = skc->skc_obj_alloc;
-
- /* Track max slab usage statistics */
- if (sks->sks_ref == 1) {
- skc->skc_slab_alloc++;
-
- if (skc->skc_slab_alloc > skc->skc_slab_max)
- skc->skc_slab_max = skc->skc_slab_alloc;
- }
-
- return (sko->sko_addr);
-}
-
-/*
- * Generic slab allocation function to run by the global work queues.
- * It is responsible for allocating a new slab, linking it in to the list
- * of partial slabs, and then waking any waiters.
- */
-static int
-__spl_cache_grow(spl_kmem_cache_t *skc, int flags)
-{
- spl_kmem_slab_t *sks;
-
- fstrans_cookie_t cookie = spl_fstrans_mark();
- sks = spl_slab_alloc(skc, flags);
- spl_fstrans_unmark(cookie);
-
- spin_lock(&skc->skc_lock);
- if (sks) {
- skc->skc_slab_total++;
- skc->skc_obj_total += sks->sks_objs;
- list_add_tail(&sks->sks_list, &skc->skc_partial_list);
-
- smp_mb__before_atomic();
- clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
- smp_mb__after_atomic();
- wake_up_all(&skc->skc_waitq);
- }
- spin_unlock(&skc->skc_lock);
-
- return (sks == NULL ? -ENOMEM : 0);
-}
-
-static void
-spl_cache_grow_work(void *data)
-{
- spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
- spl_kmem_cache_t *skc = ska->ska_cache;
-
- (void) __spl_cache_grow(skc, ska->ska_flags);
-
- atomic_dec(&skc->skc_ref);
- smp_mb__before_atomic();
- clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
- smp_mb__after_atomic();
-
- kfree(ska);
-}
-
-/*
- * Returns non-zero when a new slab should be available.
- */
-static int
-spl_cache_grow_wait(spl_kmem_cache_t *skc)
-{
- return (!test_bit(KMC_BIT_GROWING, &skc->skc_flags));
-}
-
-/*
- * No available objects on any slabs, create a new slab. Note that this
- * functionality is disabled for KMC_SLAB caches which are backed by the
- * Linux slab.
- */
-static int
-spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
-{
- int remaining, rc = 0;
-
- ASSERT0(flags & ~KM_PUBLIC_MASK);
- ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT((skc->skc_flags & KMC_SLAB) == 0);
- might_sleep();
- *obj = NULL;
-
- /*
- * Before allocating a new slab wait for any reaping to complete and
- * then return so the local magazine can be rechecked for new objects.
- */
- if (test_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
- rc = spl_wait_on_bit(&skc->skc_flags, KMC_BIT_REAPING,
- TASK_UNINTERRUPTIBLE);
- return (rc ? rc : -EAGAIN);
- }
-
- /*
- * To reduce the overhead of context switch and improve NUMA locality,
- * it tries to allocate a new slab in the current process context with
- * KM_NOSLEEP flag. If it fails, it will launch a new taskq to do the
- * allocation.
- *
- * However, this can't be applied to KVM_VMEM due to a bug that
- * __vmalloc() doesn't honor gfp flags in page table allocation.
- */
- if (!(skc->skc_flags & KMC_VMEM)) {
- rc = __spl_cache_grow(skc, flags | KM_NOSLEEP);
- if (rc == 0)
- return (0);
- }
-
- /*
- * This is handled by dispatching a work request to the global work
- * queue. This allows us to asynchronously allocate a new slab while
- * retaining the ability to safely fall back to a smaller synchronous
- * allocations to ensure forward progress is always maintained.
- */
- if (test_and_set_bit(KMC_BIT_GROWING, &skc->skc_flags) == 0) {
- spl_kmem_alloc_t *ska;
-
- ska = kmalloc(sizeof (*ska), kmem_flags_convert(flags));
- if (ska == NULL) {
- clear_bit_unlock(KMC_BIT_GROWING, &skc->skc_flags);
- smp_mb__after_atomic();
- wake_up_all(&skc->skc_waitq);
- return (-ENOMEM);
- }
-
- atomic_inc(&skc->skc_ref);
- ska->ska_cache = skc;
- ska->ska_flags = flags;
- taskq_init_ent(&ska->ska_tqe);
- taskq_dispatch_ent(spl_kmem_cache_taskq,
- spl_cache_grow_work, ska, 0, &ska->ska_tqe);
- }
-
- /*
- * The goal here is to only detect the rare case where a virtual slab
- * allocation has deadlocked. We must be careful to minimize the use
- * of emergency objects which are more expensive to track. Therefore,
- * we set a very long timeout for the asynchronous allocation and if
- * the timeout is reached the cache is flagged as deadlocked. From
- * this point only new emergency objects will be allocated until the
- * asynchronous allocation completes and clears the deadlocked flag.
- */
- if (test_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags)) {
- rc = spl_emergency_alloc(skc, flags, obj);
- } else {
- remaining = wait_event_timeout(skc->skc_waitq,
- spl_cache_grow_wait(skc), HZ / 10);
-
- if (!remaining) {
- spin_lock(&skc->skc_lock);
- if (test_bit(KMC_BIT_GROWING, &skc->skc_flags)) {
- set_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
- skc->skc_obj_deadlock++;
- }
- spin_unlock(&skc->skc_lock);
- }
-
- rc = -ENOMEM;
- }
-
- return (rc);
-}
-
-/*
- * Refill a per-cpu magazine with objects from the slabs for this cache.
- * Ideally the magazine can be repopulated using existing objects which have
- * been released, however if we are unable to locate enough free objects new
- * slabs of objects will be created. On success NULL is returned, otherwise
- * the address of a single emergency object is returned for use by the caller.
- */
-static void *
-spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
-{
- spl_kmem_slab_t *sks;
- int count = 0, rc, refill;
- void *obj = NULL;
-
- ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT(skm->skm_magic == SKM_MAGIC);
-
- refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail);
- spin_lock(&skc->skc_lock);
-
- while (refill > 0) {
- /* No slabs available we may need to grow the cache */
- if (list_empty(&skc->skc_partial_list)) {
- spin_unlock(&skc->skc_lock);
-
- local_irq_enable();
- rc = spl_cache_grow(skc, flags, &obj);
- local_irq_disable();
-
- /* Emergency object for immediate use by caller */
- if (rc == 0 && obj != NULL)
- return (obj);
-
- if (rc)
- goto out;
-
- /* Rescheduled to different CPU skm is not local */
- if (skm != skc->skc_mag[smp_processor_id()])
- goto out;
-
- /*
- * Potentially rescheduled to the same CPU but
- * allocations may have occurred from this CPU while
- * we were sleeping so recalculate max refill.
- */
- refill = MIN(refill, skm->skm_size - skm->skm_avail);
-
- spin_lock(&skc->skc_lock);
- continue;
- }
-
- /* Grab the next available slab */
- sks = list_entry((&skc->skc_partial_list)->next,
- spl_kmem_slab_t, sks_list);
- ASSERT(sks->sks_magic == SKS_MAGIC);
- ASSERT(sks->sks_ref < sks->sks_objs);
- ASSERT(!list_empty(&sks->sks_free_list));
-
- /*
- * Consume as many objects as needed to refill the requested
- * cache. We must also be careful not to overfill it.
- */
- while (sks->sks_ref < sks->sks_objs && refill-- > 0 &&
- ++count) {
- ASSERT(skm->skm_avail < skm->skm_size);
- ASSERT(count < skm->skm_size);
- skm->skm_objs[skm->skm_avail++] =
- spl_cache_obj(skc, sks);
- }
-
- /* Move slab to skc_complete_list when full */
- if (sks->sks_ref == sks->sks_objs) {
- list_del(&sks->sks_list);
- list_add(&sks->sks_list, &skc->skc_complete_list);
- }
- }
-
- spin_unlock(&skc->skc_lock);
-out:
- return (NULL);
-}
-
-/*
- * Release an object back to the slab from which it came.
- */
-static void
-spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
-{
- spl_kmem_slab_t *sks = NULL;
- spl_kmem_obj_t *sko = NULL;
-
- ASSERT(skc->skc_magic == SKC_MAGIC);
-
- sko = spl_sko_from_obj(skc, obj);
- ASSERT(sko->sko_magic == SKO_MAGIC);
- sks = sko->sko_slab;
- ASSERT(sks->sks_magic == SKS_MAGIC);
- ASSERT(sks->sks_cache == skc);
- list_add(&sko->sko_list, &sks->sks_free_list);
-
- sks->sks_age = jiffies;
- sks->sks_ref--;
- skc->skc_obj_alloc--;
-
- /*
- * Move slab to skc_partial_list when no longer full. Slabs
- * are added to the head to keep the partial list is quasi-full
- * sorted order. Fuller at the head, emptier at the tail.
- */
- if (sks->sks_ref == (sks->sks_objs - 1)) {
- list_del(&sks->sks_list);
- list_add(&sks->sks_list, &skc->skc_partial_list);
- }
-
- /*
- * Move empty slabs to the end of the partial list so
- * they can be easily found and freed during reclamation.
- */
- if (sks->sks_ref == 0) {
- list_del(&sks->sks_list);
- list_add_tail(&sks->sks_list, &skc->skc_partial_list);
- skc->skc_slab_alloc--;
- }
-}
-
-/*
- * Allocate an object from the per-cpu magazine, or if the magazine
- * is empty directly allocate from a slab and repopulate the magazine.
- */
-void *
-spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
-{
- spl_kmem_magazine_t *skm;
- void *obj = NULL;
-
- ASSERT0(flags & ~KM_PUBLIC_MASK);
- ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
-
- /*
- * Allocate directly from a Linux slab. All optimizations are left
- * to the underlying cache we only need to guarantee that KM_SLEEP
- * callers will never fail.
- */
- if (skc->skc_flags & KMC_SLAB) {
- struct kmem_cache *slc = skc->skc_linux_cache;
- do {
- obj = kmem_cache_alloc(slc, kmem_flags_convert(flags));
- } while ((obj == NULL) && !(flags & KM_NOSLEEP));
-
- goto ret;
- }
-
- local_irq_disable();
-
-restart:
- /*
- * Safe to update per-cpu structure without lock, but
- * in the restart case we must be careful to reacquire
- * the local magazine since this may have changed
- * when we need to grow the cache.
- */
- skm = skc->skc_mag[smp_processor_id()];
- ASSERT(skm->skm_magic == SKM_MAGIC);
-
- if (likely(skm->skm_avail)) {
- /* Object available in CPU cache, use it */
- obj = skm->skm_objs[--skm->skm_avail];
- skm->skm_age = jiffies;
- } else {
- obj = spl_cache_refill(skc, skm, flags);
- if ((obj == NULL) && !(flags & KM_NOSLEEP))
- goto restart;
-
- local_irq_enable();
- goto ret;
- }
-
- local_irq_enable();
- ASSERT(obj);
- ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
-
-ret:
- /* Pre-emptively migrate object to CPU L1 cache */
- if (obj) {
- if (obj && skc->skc_ctor)
- skc->skc_ctor(obj, skc->skc_private, flags);
- else
- prefetchw(obj);
- }
-
- return (obj);
-}
-EXPORT_SYMBOL(spl_kmem_cache_alloc);
-
-/*
- * Free an object back to the local per-cpu magazine, there is no
- * guarantee that this is the same magazine the object was originally
- * allocated from. We may need to flush entire from the magazine
- * back to the slabs to make space.
- */
-void
-spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
-{
- spl_kmem_magazine_t *skm;
- unsigned long flags;
- int do_reclaim = 0;
- int do_emergency = 0;
-
- ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
-
- /*
- * Run the destructor
- */
- if (skc->skc_dtor)
- skc->skc_dtor(obj, skc->skc_private);
-
- /*
- * Free the object from the Linux underlying Linux slab.
- */
- if (skc->skc_flags & KMC_SLAB) {
- kmem_cache_free(skc->skc_linux_cache, obj);
- return;
- }
-
- /*
- * While a cache has outstanding emergency objects all freed objects
- * must be checked. However, since emergency objects will never use
- * a virtual address these objects can be safely excluded as an
- * optimization.
- */
- if (!is_vmalloc_addr(obj)) {
- spin_lock(&skc->skc_lock);
- do_emergency = (skc->skc_obj_emergency > 0);
- spin_unlock(&skc->skc_lock);
-
- if (do_emergency && (spl_emergency_free(skc, obj) == 0))
- return;
- }
-
- local_irq_save(flags);
-
- /*
- * Safe to update per-cpu structure without lock, but
- * no remote memory allocation tracking is being performed
- * it is entirely possible to allocate an object from one
- * CPU cache and return it to another.
- */
- skm = skc->skc_mag[smp_processor_id()];
- ASSERT(skm->skm_magic == SKM_MAGIC);
-
- /*
- * Per-CPU cache full, flush it to make space for this object,
- * this may result in an empty slab which can be reclaimed once
- * interrupts are re-enabled.
- */
- if (unlikely(skm->skm_avail >= skm->skm_size)) {
- spl_cache_flush(skc, skm, skm->skm_refill);
- do_reclaim = 1;
- }
-
- /* Available space in cache, use it */
- skm->skm_objs[skm->skm_avail++] = obj;
-
- local_irq_restore(flags);
-
- if (do_reclaim)
- spl_slab_reclaim(skc);
-}
-EXPORT_SYMBOL(spl_kmem_cache_free);
-
-/*
- * The generic shrinker function for all caches. Under Linux a shrinker
- * may not be tightly coupled with a slab cache. In fact Linux always
- * systematically tries calling all registered shrinker callbacks which
- * report that they contain unused objects. Because of this we only
- * register one shrinker function in the shim layer for all slab caches.
- * We always attempt to shrink all caches when this generic shrinker
- * is called.
- *
- * If sc->nr_to_scan is zero, the caller is requesting a query of the
- * number of objects which can potentially be freed. If it is nonzero,
- * the request is to free that many objects.
- *
- * Linux kernels >= 3.12 have the count_objects and scan_objects callbacks
- * in struct shrinker and also require the shrinker to return the number
- * of objects freed.
- *
- * Older kernels require the shrinker to return the number of freeable
- * objects following the freeing of nr_to_free.
- *
- * Linux semantics differ from those under Solaris, which are to
- * free all available objects which may (and probably will) be more
- * objects than the requested nr_to_scan.
- */
-static spl_shrinker_t
-__spl_kmem_cache_generic_shrinker(struct shrinker *shrink,
- struct shrink_control *sc)
-{
- spl_kmem_cache_t *skc;
- int alloc = 0;
-
- /*
- * No shrinking in a transaction context. Can cause deadlocks.
- */
- if (sc->nr_to_scan && spl_fstrans_check())
- return (SHRINK_STOP);
-
- down_read(&spl_kmem_cache_sem);
- list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
- if (sc->nr_to_scan) {
-#ifdef HAVE_SPLIT_SHRINKER_CALLBACK
- uint64_t oldalloc = skc->skc_obj_alloc;
- spl_kmem_cache_reap_now(skc,
- MAX(sc->nr_to_scan>>fls64(skc->skc_slab_objs), 1));
- if (oldalloc > skc->skc_obj_alloc)
- alloc += oldalloc - skc->skc_obj_alloc;
-#else
- spl_kmem_cache_reap_now(skc,
- MAX(sc->nr_to_scan>>fls64(skc->skc_slab_objs), 1));
- alloc += skc->skc_obj_alloc;
-#endif /* HAVE_SPLIT_SHRINKER_CALLBACK */
- } else {
- /* Request to query number of freeable objects */
- alloc += skc->skc_obj_alloc;
- }
- }
- up_read(&spl_kmem_cache_sem);
-
- /*
- * When KMC_RECLAIM_ONCE is set allow only a single reclaim pass.
- * This functionality only exists to work around a rare issue where
- * shrink_slabs() is repeatedly invoked by many cores causing the
- * system to thrash.
- */
- if ((spl_kmem_cache_reclaim & KMC_RECLAIM_ONCE) && sc->nr_to_scan)
- return (SHRINK_STOP);
-
- return (MAX(alloc, 0));
-}
-
-SPL_SHRINKER_CALLBACK_WRAPPER(spl_kmem_cache_generic_shrinker);
-
-/*
- * Call the registered reclaim function for a cache. Depending on how
- * many and which objects are released it may simply repopulate the
- * local magazine which will then need to age-out. Objects which cannot
- * fit in the magazine we will be released back to their slabs which will
- * also need to age out before being release. This is all just best
- * effort and we do not want to thrash creating and destroying slabs.
- */
-void
-spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count)
-{
- ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
-
- atomic_inc(&skc->skc_ref);
-
- /*
- * Execute the registered reclaim callback if it exists.
- */
- if (skc->skc_flags & KMC_SLAB) {
- if (skc->skc_reclaim)
- skc->skc_reclaim(skc->skc_private);
- goto out;
- }
-
- /*
- * Prevent concurrent cache reaping when contended.
- */
- if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags))
- goto out;
-
- /*
- * When a reclaim function is available it may be invoked repeatedly
- * until at least a single slab can be freed. This ensures that we
- * do free memory back to the system. This helps minimize the chance
- * of an OOM event when the bulk of memory is used by the slab.
- *
- * When free slabs are already available the reclaim callback will be
- * skipped. Additionally, if no forward progress is detected despite
- * a reclaim function the cache will be skipped to avoid deadlock.
- *
- * Longer term this would be the correct place to add the code which
- * repacks the slabs in order minimize fragmentation.
- */
- if (skc->skc_reclaim) {
- uint64_t objects = UINT64_MAX;
- int do_reclaim;
-
- do {
- spin_lock(&skc->skc_lock);
- do_reclaim =
- (skc->skc_slab_total > 0) &&
- ((skc->skc_slab_total-skc->skc_slab_alloc) == 0) &&
- (skc->skc_obj_alloc < objects);
-
- objects = skc->skc_obj_alloc;
- spin_unlock(&skc->skc_lock);
-
- if (do_reclaim)
- skc->skc_reclaim(skc->skc_private);
-
- } while (do_reclaim);
- }
-
- /* Reclaim from the magazine and free all now empty slabs. */
- if (spl_kmem_cache_expire & KMC_EXPIRE_MEM) {
- spl_kmem_magazine_t *skm;
- unsigned long irq_flags;
-
- local_irq_save(irq_flags);
- skm = skc->skc_mag[smp_processor_id()];
- spl_cache_flush(skc, skm, skm->skm_avail);
- local_irq_restore(irq_flags);
- }
-
- spl_slab_reclaim(skc);
- clear_bit_unlock(KMC_BIT_REAPING, &skc->skc_flags);
- smp_mb__after_atomic();
- wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING);
-out:
- atomic_dec(&skc->skc_ref);
-}
-EXPORT_SYMBOL(spl_kmem_cache_reap_now);
-
-/*
- * This is stubbed out for code consistency with other platforms. There
- * is existing logic to prevent concurrent reaping so while this is ugly
- * it should do no harm.
- */
-int
-spl_kmem_cache_reap_active()
-{
- return (0);
-}
-EXPORT_SYMBOL(spl_kmem_cache_reap_active);
-
-/*
- * Reap all free slabs from all registered caches.
- */
-void
-spl_kmem_reap(void)
-{
- struct shrink_control sc;
-
- sc.nr_to_scan = KMC_REAP_CHUNK;
- sc.gfp_mask = GFP_KERNEL;
-
- (void) __spl_kmem_cache_generic_shrinker(NULL, &sc);
-}
-EXPORT_SYMBOL(spl_kmem_reap);
-
-int
-spl_kmem_cache_init(void)
-{
- init_rwsem(&spl_kmem_cache_sem);
- INIT_LIST_HEAD(&spl_kmem_cache_list);
- spl_kmem_cache_taskq = taskq_create("spl_kmem_cache",
- spl_kmem_cache_kmem_threads, maxclsyspri,
- spl_kmem_cache_kmem_threads * 8, INT_MAX,
- TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
- spl_register_shrinker(&spl_kmem_cache_shrinker);
-
- return (0);
-}
-
-void
-spl_kmem_cache_fini(void)
-{
- spl_unregister_shrinker(&spl_kmem_cache_shrinker);
- taskq_destroy(spl_kmem_cache_taskq);
-}
diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c
deleted file mode 100644
index 824b5e89f..000000000
--- a/module/spl/spl-kmem.c
+++ /dev/null
@@ -1,556 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <sys/debug.h>
-#include <sys/sysmacros.h>
-#include <sys/kmem.h>
-#include <sys/vmem.h>
-#include <linux/mm.h>
-
-/*
- * As a general rule kmem_alloc() allocations should be small, preferably
- * just a few pages since they must by physically contiguous. Therefore, a
- * rate limited warning will be printed to the console for any kmem_alloc()
- * which exceeds a reasonable threshold.
- *
- * The default warning threshold is set to sixteen pages but capped at 64K to
- * accommodate systems using large pages. This value was selected to be small
- * enough to ensure the largest allocations are quickly noticed and fixed.
- * But large enough to avoid logging any warnings when a allocation size is
- * larger than optimal but not a serious concern. Since this value is tunable,
- * developers are encouraged to set it lower when testing so any new largish
- * allocations are quickly caught. These warnings may be disabled by setting
- * the threshold to zero.
- */
-/* BEGIN CSTYLED */
-unsigned int spl_kmem_alloc_warn = MIN(16 * PAGE_SIZE, 64 * 1024);
-module_param(spl_kmem_alloc_warn, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_alloc_warn,
- "Warning threshold in bytes for a kmem_alloc()");
-EXPORT_SYMBOL(spl_kmem_alloc_warn);
-
-/*
- * Large kmem_alloc() allocations will fail if they exceed KMALLOC_MAX_SIZE.
- * Allocations which are marginally smaller than this limit may succeed but
- * should still be avoided due to the expense of locating a contiguous range
- * of free pages. Therefore, a maximum kmem size with reasonable safely
- * margin of 4x is set. Kmem_alloc() allocations larger than this maximum
- * will quickly fail. Vmem_alloc() allocations less than or equal to this
- * value will use kmalloc(), but shift to vmalloc() when exceeding this value.
- */
-unsigned int spl_kmem_alloc_max = (KMALLOC_MAX_SIZE >> 2);
-module_param(spl_kmem_alloc_max, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_alloc_max,
- "Maximum size in bytes for a kmem_alloc()");
-EXPORT_SYMBOL(spl_kmem_alloc_max);
-/* END CSTYLED */
-
-int
-kmem_debugging(void)
-{
- return (0);
-}
-EXPORT_SYMBOL(kmem_debugging);
-
-char *
-kmem_vasprintf(const char *fmt, va_list ap)
-{
- va_list aq;
- char *ptr;
-
- do {
- va_copy(aq, ap);
- ptr = kvasprintf(kmem_flags_convert(KM_SLEEP), fmt, aq);
- va_end(aq);
- } while (ptr == NULL);
-
- return (ptr);
-}
-EXPORT_SYMBOL(kmem_vasprintf);
-
-char *
-kmem_asprintf(const char *fmt, ...)
-{
- va_list ap;
- char *ptr;
-
- do {
- va_start(ap, fmt);
- ptr = kvasprintf(kmem_flags_convert(KM_SLEEP), fmt, ap);
- va_end(ap);
- } while (ptr == NULL);
-
- return (ptr);
-}
-EXPORT_SYMBOL(kmem_asprintf);
-
-static char *
-__strdup(const char *str, int flags)
-{
- char *ptr;
- int n;
-
- n = strlen(str);
- ptr = kmalloc(n + 1, kmem_flags_convert(flags));
- if (ptr)
- memcpy(ptr, str, n + 1);
-
- return (ptr);
-}
-
-char *
-strdup(const char *str)
-{
- return (__strdup(str, KM_SLEEP));
-}
-EXPORT_SYMBOL(strdup);
-
-void
-strfree(char *str)
-{
- kfree(str);
-}
-EXPORT_SYMBOL(strfree);
-
-/*
- * General purpose unified implementation of kmem_alloc(). It is an
- * amalgamation of Linux and Illumos allocator design. It should never be
- * exported to ensure that code using kmem_alloc()/kmem_zalloc() remains
- * relatively portable. Consumers may only access this function through
- * wrappers that enforce the common flags to ensure portability.
- */
-inline void *
-spl_kmem_alloc_impl(size_t size, int flags, int node)
-{
- gfp_t lflags = kmem_flags_convert(flags);
- int use_vmem = 0;
- void *ptr;
-
- /*
- * Log abnormally large allocations and rate limit the console output.
- * Allocations larger than spl_kmem_alloc_warn should be performed
- * through the vmem_alloc()/vmem_zalloc() interfaces.
- */
- if ((spl_kmem_alloc_warn > 0) && (size > spl_kmem_alloc_warn) &&
- !(flags & KM_VMEM)) {
- printk(KERN_WARNING
- "Large kmem_alloc(%lu, 0x%x), please file an issue at:\n"
- "https://github.com/zfsonlinux/zfs/issues/new\n",
- (unsigned long)size, flags);
- dump_stack();
- }
-
- /*
- * Use a loop because kmalloc_node() can fail when GFP_KERNEL is used
- * unlike kmem_alloc() with KM_SLEEP on Illumos.
- */
- do {
- /*
- * Calling kmalloc_node() when the size >= spl_kmem_alloc_max
- * is unsafe. This must fail for all for kmem_alloc() and
- * kmem_zalloc() callers.
- *
- * For vmem_alloc() and vmem_zalloc() callers it is permissible
- * to use __vmalloc(). However, in general use of __vmalloc()
- * is strongly discouraged because a global lock must be
- * acquired. Contention on this lock can significantly
- * impact performance so frequently manipulating the virtual
- * address space is strongly discouraged.
- */
- if ((size > spl_kmem_alloc_max) || use_vmem) {
- if (flags & KM_VMEM) {
- ptr = __vmalloc(size, lflags | __GFP_HIGHMEM,
- PAGE_KERNEL);
- } else {
- return (NULL);
- }
- } else {
- ptr = kmalloc_node(size, lflags, node);
- }
-
- if (likely(ptr) || (flags & KM_NOSLEEP))
- return (ptr);
-
- /*
- * For vmem_alloc() and vmem_zalloc() callers retry immediately
- * using __vmalloc() which is unlikely to fail.
- */
- if ((flags & KM_VMEM) && (use_vmem == 0)) {
- use_vmem = 1;
- continue;
- }
-
- /*
- * Use cond_resched() instead of congestion_wait() to avoid
- * deadlocking systems where there are no block devices.
- */
- cond_resched();
- } while (1);
-
- return (NULL);
-}
-
-inline void
-spl_kmem_free_impl(const void *buf, size_t size)
-{
- if (is_vmalloc_addr(buf))
- vfree(buf);
- else
- kfree(buf);
-}
-
-/*
- * Memory allocation and accounting for kmem_* * style allocations. When
- * DEBUG_KMEM is enabled the total memory allocated will be tracked and
- * any memory leaked will be reported during module unload.
- *
- * ./configure --enable-debug-kmem
- */
-#ifdef DEBUG_KMEM
-
-/* Shim layer memory accounting */
-#ifdef HAVE_ATOMIC64_T
-atomic64_t kmem_alloc_used = ATOMIC64_INIT(0);
-unsigned long long kmem_alloc_max = 0;
-#else /* HAVE_ATOMIC64_T */
-atomic_t kmem_alloc_used = ATOMIC_INIT(0);
-unsigned long long kmem_alloc_max = 0;
-#endif /* HAVE_ATOMIC64_T */
-
-EXPORT_SYMBOL(kmem_alloc_used);
-EXPORT_SYMBOL(kmem_alloc_max);
-
-inline void *
-spl_kmem_alloc_debug(size_t size, int flags, int node)
-{
- void *ptr;
-
- ptr = spl_kmem_alloc_impl(size, flags, node);
- if (ptr) {
- kmem_alloc_used_add(size);
- if (unlikely(kmem_alloc_used_read() > kmem_alloc_max))
- kmem_alloc_max = kmem_alloc_used_read();
- }
-
- return (ptr);
-}
-
-inline void
-spl_kmem_free_debug(const void *ptr, size_t size)
-{
- kmem_alloc_used_sub(size);
- spl_kmem_free_impl(ptr, size);
-}
-
-/*
- * When DEBUG_KMEM_TRACKING is enabled not only will total bytes be tracked
- * but also the location of every alloc and free. When the SPL module is
- * unloaded a list of all leaked addresses and where they were allocated
- * will be dumped to the console. Enabling this feature has a significant
- * impact on performance but it makes finding memory leaks straight forward.
- *
- * Not surprisingly with debugging enabled the xmem_locks are very highly
- * contended particularly on xfree(). If we want to run with this detailed
- * debugging enabled for anything other than debugging we need to minimize
- * the contention by moving to a lock per xmem_table entry model.
- *
- * ./configure --enable-debug-kmem-tracking
- */
-#ifdef DEBUG_KMEM_TRACKING
-
-#include <linux/hash.h>
-#include <linux/ctype.h>
-
-#define KMEM_HASH_BITS 10
-#define KMEM_TABLE_SIZE (1 << KMEM_HASH_BITS)
-
-typedef struct kmem_debug {
- struct hlist_node kd_hlist; /* Hash node linkage */
- struct list_head kd_list; /* List of all allocations */
- void *kd_addr; /* Allocation pointer */
- size_t kd_size; /* Allocation size */
- const char *kd_func; /* Allocation function */
- int kd_line; /* Allocation line */
-} kmem_debug_t;
-
-static spinlock_t kmem_lock;
-static struct hlist_head kmem_table[KMEM_TABLE_SIZE];
-static struct list_head kmem_list;
-
-static kmem_debug_t *
-kmem_del_init(spinlock_t *lock, struct hlist_head *table,
- int bits, const void *addr)
-{
- struct hlist_head *head;
- struct hlist_node *node;
- struct kmem_debug *p;
- unsigned long flags;
-
- spin_lock_irqsave(lock, flags);
-
- head = &table[hash_ptr((void *)addr, bits)];
- hlist_for_each(node, head) {
- p = list_entry(node, struct kmem_debug, kd_hlist);
- if (p->kd_addr == addr) {
- hlist_del_init(&p->kd_hlist);
- list_del_init(&p->kd_list);
- spin_unlock_irqrestore(lock, flags);
- return (p);
- }
- }
-
- spin_unlock_irqrestore(lock, flags);
-
- return (NULL);
-}
-
-inline void *
-spl_kmem_alloc_track(size_t size, int flags,
- const char *func, int line, int node)
-{
- void *ptr = NULL;
- kmem_debug_t *dptr;
- unsigned long irq_flags;
-
- dptr = kmalloc(sizeof (kmem_debug_t), kmem_flags_convert(flags));
- if (dptr == NULL)
- return (NULL);
-
- dptr->kd_func = __strdup(func, flags);
- if (dptr->kd_func == NULL) {
- kfree(dptr);
- return (NULL);
- }
-
- ptr = spl_kmem_alloc_debug(size, flags, node);
- if (ptr == NULL) {
- kfree(dptr->kd_func);
- kfree(dptr);
- return (NULL);
- }
-
- INIT_HLIST_NODE(&dptr->kd_hlist);
- INIT_LIST_HEAD(&dptr->kd_list);
-
- dptr->kd_addr = ptr;
- dptr->kd_size = size;
- dptr->kd_line = line;
-
- spin_lock_irqsave(&kmem_lock, irq_flags);
- hlist_add_head(&dptr->kd_hlist,
- &kmem_table[hash_ptr(ptr, KMEM_HASH_BITS)]);
- list_add_tail(&dptr->kd_list, &kmem_list);
- spin_unlock_irqrestore(&kmem_lock, irq_flags);
-
- return (ptr);
-}
-
-inline void
-spl_kmem_free_track(const void *ptr, size_t size)
-{
- kmem_debug_t *dptr;
-
- /* Ignore NULL pointer since we haven't tracked it at all */
- if (ptr == NULL)
- return;
-
- /* Must exist in hash due to kmem_alloc() */
- dptr = kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr);
- ASSERT3P(dptr, !=, NULL);
- ASSERT3S(dptr->kd_size, ==, size);
-
- kfree(dptr->kd_func);
- kfree(dptr);
-
- spl_kmem_free_debug(ptr, size);
-}
-#endif /* DEBUG_KMEM_TRACKING */
-#endif /* DEBUG_KMEM */
-
-/*
- * Public kmem_alloc(), kmem_zalloc() and kmem_free() interfaces.
- */
-void *
-spl_kmem_alloc(size_t size, int flags, const char *func, int line)
-{
- ASSERT0(flags & ~KM_PUBLIC_MASK);
-
-#if !defined(DEBUG_KMEM)
- return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
-#elif !defined(DEBUG_KMEM_TRACKING)
- return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
-#else
- return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
-#endif
-}
-EXPORT_SYMBOL(spl_kmem_alloc);
-
-void *
-spl_kmem_zalloc(size_t size, int flags, const char *func, int line)
-{
- ASSERT0(flags & ~KM_PUBLIC_MASK);
-
- flags |= KM_ZERO;
-
-#if !defined(DEBUG_KMEM)
- return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
-#elif !defined(DEBUG_KMEM_TRACKING)
- return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
-#else
- return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
-#endif
-}
-EXPORT_SYMBOL(spl_kmem_zalloc);
-
-void
-spl_kmem_free(const void *buf, size_t size)
-{
-#if !defined(DEBUG_KMEM)
- return (spl_kmem_free_impl(buf, size));
-#elif !defined(DEBUG_KMEM_TRACKING)
- return (spl_kmem_free_debug(buf, size));
-#else
- return (spl_kmem_free_track(buf, size));
-#endif
-}
-EXPORT_SYMBOL(spl_kmem_free);
-
-#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
-static char *
-spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
-{
- int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
- int i, flag = 1;
-
- ASSERT(str != NULL && len >= 17);
- memset(str, 0, len);
-
- /*
- * Check for a fully printable string, and while we are at
- * it place the printable characters in the passed buffer.
- */
- for (i = 0; i < size; i++) {
- str[i] = ((char *)(kd->kd_addr))[i];
- if (isprint(str[i])) {
- continue;
- } else {
- /*
- * Minimum number of printable characters found
- * to make it worthwhile to print this as ascii.
- */
- if (i > min)
- break;
-
- flag = 0;
- break;
- }
- }
-
- if (!flag) {
- sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
- *((uint8_t *)kd->kd_addr),
- *((uint8_t *)kd->kd_addr + 2),
- *((uint8_t *)kd->kd_addr + 4),
- *((uint8_t *)kd->kd_addr + 6),
- *((uint8_t *)kd->kd_addr + 8),
- *((uint8_t *)kd->kd_addr + 10),
- *((uint8_t *)kd->kd_addr + 12),
- *((uint8_t *)kd->kd_addr + 14));
- }
-
- return (str);
-}
-
-static int
-spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size)
-{
- int i;
-
- spin_lock_init(lock);
- INIT_LIST_HEAD(list);
-
- for (i = 0; i < size; i++)
- INIT_HLIST_HEAD(&kmem_table[i]);
-
- return (0);
-}
-
-static void
-spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
-{
- unsigned long flags;
- kmem_debug_t *kd;
- char str[17];
-
- spin_lock_irqsave(lock, flags);
- if (!list_empty(list))
- printk(KERN_WARNING "%-16s %-5s %-16s %s:%s\n", "address",
- "size", "data", "func", "line");
-
- list_for_each_entry(kd, list, kd_list) {
- printk(KERN_WARNING "%p %-5d %-16s %s:%d\n", kd->kd_addr,
- (int)kd->kd_size, spl_sprintf_addr(kd, str, 17, 8),
- kd->kd_func, kd->kd_line);
- }
-
- spin_unlock_irqrestore(lock, flags);
-}
-#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
-
-int
-spl_kmem_init(void)
-{
-
-#ifdef DEBUG_KMEM
- kmem_alloc_used_set(0);
-
-
-
-#ifdef DEBUG_KMEM_TRACKING
- spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE);
-#endif /* DEBUG_KMEM_TRACKING */
-#endif /* DEBUG_KMEM */
-
- return (0);
-}
-
-void
-spl_kmem_fini(void)
-{
-#ifdef DEBUG_KMEM
- /*
- * Display all unreclaimed memory addresses, including the
- * allocation size and the first few bytes of what's located
- * at that address to aid in debugging. Performance is not
- * a serious concern here since it is module unload time.
- */
- if (kmem_alloc_used_read() != 0)
- printk(KERN_WARNING "kmem leaked %ld/%llu bytes\n",
- (unsigned long)kmem_alloc_used_read(), kmem_alloc_max);
-
-#ifdef DEBUG_KMEM_TRACKING
- spl_kmem_fini_tracking(&kmem_list, &kmem_lock);
-#endif /* DEBUG_KMEM_TRACKING */
-#endif /* DEBUG_KMEM */
-}
diff --git a/module/spl/spl-kobj.c b/module/spl/spl-kobj.c
deleted file mode 100644
index 7019369bd..000000000
--- a/module/spl/spl-kobj.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) Kobj Implementation.
- */
-
-#include <sys/kobj.h>
-
-struct _buf *
-kobj_open_file(const char *name)
-{
- struct _buf *file;
- vnode_t *vp;
- int rc;
-
- file = kmalloc(sizeof (_buf_t), kmem_flags_convert(KM_SLEEP));
- if (file == NULL)
- return ((_buf_t *)-1UL);
-
- if ((rc = vn_open(name, UIO_SYSSPACE, FREAD, 0644, &vp, 0, 0))) {
- kfree(file);
- return ((_buf_t *)-1UL);
- }
-
- file->vp = vp;
-
- return (file);
-} /* kobj_open_file() */
-EXPORT_SYMBOL(kobj_open_file);
-
-void
-kobj_close_file(struct _buf *file)
-{
- VOP_CLOSE(file->vp, 0, 0, 0, 0, 0);
- kfree(file);
-} /* kobj_close_file() */
-EXPORT_SYMBOL(kobj_close_file);
-
-int
-kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
-{
- ssize_t resid;
-
- if (vn_rdwr(UIO_READ, file->vp, buf, size, (offset_t)off,
- UIO_SYSSPACE, 0, 0, 0, &resid) != 0)
- return (-1);
-
- return (size - resid);
-} /* kobj_read_file() */
-EXPORT_SYMBOL(kobj_read_file);
-
-int
-kobj_get_filesize(struct _buf *file, uint64_t *size)
-{
- vattr_t vap;
- int rc;
-
- rc = VOP_GETATTR(file->vp, &vap, 0, 0, NULL);
- if (rc)
- return (rc);
-
- *size = vap.va_size;
-
- return (rc);
-} /* kobj_get_filesize() */
-EXPORT_SYMBOL(kobj_get_filesize);
diff --git a/module/spl/spl-kstat.c b/module/spl/spl-kstat.c
deleted file mode 100644
index 1f67bf157..000000000
--- a/module/spl/spl-kstat.c
+++ /dev/null
@@ -1,770 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) Kstat Implementation.
- */
-
-#include <linux/seq_file.h>
-#include <sys/kstat.h>
-#include <sys/vmem.h>
-#include <sys/cmn_err.h>
-#include <sys/sysmacros.h>
-
-static kmutex_t kstat_module_lock;
-static struct list_head kstat_module_list;
-static kid_t kstat_id;
-
-static int
-kstat_resize_raw(kstat_t *ksp)
-{
- if (ksp->ks_raw_bufsize == KSTAT_RAW_MAX)
- return (ENOMEM);
-
- vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
- ksp->ks_raw_bufsize = MIN(ksp->ks_raw_bufsize * 2, KSTAT_RAW_MAX);
- ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
-
- return (0);
-}
-
-void
-kstat_waitq_enter(kstat_io_t *kiop)
-{
- hrtime_t new, delta;
- ulong_t wcnt;
-
- new = gethrtime();
- delta = new - kiop->wlastupdate;
- kiop->wlastupdate = new;
- wcnt = kiop->wcnt++;
- if (wcnt != 0) {
- kiop->wlentime += delta * wcnt;
- kiop->wtime += delta;
- }
-}
-EXPORT_SYMBOL(kstat_waitq_enter);
-
-void
-kstat_waitq_exit(kstat_io_t *kiop)
-{
- hrtime_t new, delta;
- ulong_t wcnt;
-
- new = gethrtime();
- delta = new - kiop->wlastupdate;
- kiop->wlastupdate = new;
- wcnt = kiop->wcnt--;
- ASSERT((int)wcnt > 0);
- kiop->wlentime += delta * wcnt;
- kiop->wtime += delta;
-}
-EXPORT_SYMBOL(kstat_waitq_exit);
-
-void
-kstat_runq_enter(kstat_io_t *kiop)
-{
- hrtime_t new, delta;
- ulong_t rcnt;
-
- new = gethrtime();
- delta = new - kiop->rlastupdate;
- kiop->rlastupdate = new;
- rcnt = kiop->rcnt++;
- if (rcnt != 0) {
- kiop->rlentime += delta * rcnt;
- kiop->rtime += delta;
- }
-}
-EXPORT_SYMBOL(kstat_runq_enter);
-
-void
-kstat_runq_exit(kstat_io_t *kiop)
-{
- hrtime_t new, delta;
- ulong_t rcnt;
-
- new = gethrtime();
- delta = new - kiop->rlastupdate;
- kiop->rlastupdate = new;
- rcnt = kiop->rcnt--;
- ASSERT((int)rcnt > 0);
- kiop->rlentime += delta * rcnt;
- kiop->rtime += delta;
-}
-EXPORT_SYMBOL(kstat_runq_exit);
-
-static int
-kstat_seq_show_headers(struct seq_file *f)
-{
- kstat_t *ksp = (kstat_t *)f->private;
- int rc = 0;
-
- ASSERT(ksp->ks_magic == KS_MAGIC);
-
- seq_printf(f, "%d %d 0x%02x %d %d %lld %lld\n",
- ksp->ks_kid, ksp->ks_type, ksp->ks_flags,
- ksp->ks_ndata, (int)ksp->ks_data_size,
- ksp->ks_crtime, ksp->ks_snaptime);
-
- switch (ksp->ks_type) {
- case KSTAT_TYPE_RAW:
-restart:
- if (ksp->ks_raw_ops.headers) {
- rc = ksp->ks_raw_ops.headers(
- ksp->ks_raw_buf, ksp->ks_raw_bufsize);
- if (rc == ENOMEM && !kstat_resize_raw(ksp))
- goto restart;
- if (!rc)
- seq_puts(f, ksp->ks_raw_buf);
- } else {
- seq_printf(f, "raw data\n");
- }
- break;
- case KSTAT_TYPE_NAMED:
- seq_printf(f, "%-31s %-4s %s\n",
- "name", "type", "data");
- break;
- case KSTAT_TYPE_INTR:
- seq_printf(f, "%-8s %-8s %-8s %-8s %-8s\n",
- "hard", "soft", "watchdog",
- "spurious", "multsvc");
- break;
- case KSTAT_TYPE_IO:
- seq_printf(f,
- "%-8s %-8s %-8s %-8s %-8s %-8s "
- "%-8s %-8s %-8s %-8s %-8s %-8s\n",
- "nread", "nwritten", "reads", "writes",
- "wtime", "wlentime", "wupdate",
- "rtime", "rlentime", "rupdate",
- "wcnt", "rcnt");
- break;
- case KSTAT_TYPE_TIMER:
- seq_printf(f,
- "%-31s %-8s "
- "%-8s %-8s %-8s %-8s %-8s\n",
- "name", "events", "elapsed",
- "min", "max", "start", "stop");
- break;
- default:
- PANIC("Undefined kstat type %d\n", ksp->ks_type);
- }
-
- return (-rc);
-}
-
-static int
-kstat_seq_show_raw(struct seq_file *f, unsigned char *p, int l)
-{
- int i, j;
-
- for (i = 0; ; i++) {
- seq_printf(f, "%03x:", i);
-
- for (j = 0; j < 16; j++) {
- if (i * 16 + j >= l) {
- seq_printf(f, "\n");
- goto out;
- }
-
- seq_printf(f, " %02x", (unsigned char)p[i * 16 + j]);
- }
- seq_printf(f, "\n");
- }
-out:
- return (0);
-}
-
-static int
-kstat_seq_show_named(struct seq_file *f, kstat_named_t *knp)
-{
- seq_printf(f, "%-31s %-4d ", knp->name, knp->data_type);
-
- switch (knp->data_type) {
- case KSTAT_DATA_CHAR:
- knp->value.c[15] = '\0'; /* NULL terminate */
- seq_printf(f, "%-16s", knp->value.c);
- break;
- /*
- * NOTE - We need to be more careful able what tokens are
- * used for each arch, for now this is correct for x86_64.
- */
- case KSTAT_DATA_INT32:
- seq_printf(f, "%d", knp->value.i32);
- break;
- case KSTAT_DATA_UINT32:
- seq_printf(f, "%u", knp->value.ui32);
- break;
- case KSTAT_DATA_INT64:
- seq_printf(f, "%lld", (signed long long)knp->value.i64);
- break;
- case KSTAT_DATA_UINT64:
- seq_printf(f, "%llu",
- (unsigned long long)knp->value.ui64);
- break;
- case KSTAT_DATA_LONG:
- seq_printf(f, "%ld", knp->value.l);
- break;
- case KSTAT_DATA_ULONG:
- seq_printf(f, "%lu", knp->value.ul);
- break;
- case KSTAT_DATA_STRING:
- KSTAT_NAMED_STR_PTR(knp)
- [KSTAT_NAMED_STR_BUFLEN(knp)-1] = '\0';
- seq_printf(f, "%s", KSTAT_NAMED_STR_PTR(knp));
- break;
- default:
- PANIC("Undefined kstat data type %d\n", knp->data_type);
- }
-
- seq_printf(f, "\n");
-
- return (0);
-}
-
-static int
-kstat_seq_show_intr(struct seq_file *f, kstat_intr_t *kip)
-{
- seq_printf(f, "%-8u %-8u %-8u %-8u %-8u\n",
- kip->intrs[KSTAT_INTR_HARD],
- kip->intrs[KSTAT_INTR_SOFT],
- kip->intrs[KSTAT_INTR_WATCHDOG],
- kip->intrs[KSTAT_INTR_SPURIOUS],
- kip->intrs[KSTAT_INTR_MULTSVC]);
-
- return (0);
-}
-
-static int
-kstat_seq_show_io(struct seq_file *f, kstat_io_t *kip)
-{
- /* though wlentime & friends are signed, they will never be negative */
- seq_printf(f,
- "%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
- "%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
- kip->nread, kip->nwritten,
- kip->reads, kip->writes,
- kip->wtime, kip->wlentime, kip->wlastupdate,
- kip->rtime, kip->rlentime, kip->rlastupdate,
- kip->wcnt, kip->rcnt);
-
- return (0);
-}
-
-static int
-kstat_seq_show_timer(struct seq_file *f, kstat_timer_t *ktp)
-{
- seq_printf(f,
- "%-31s %-8llu %-8llu %-8llu %-8llu %-8llu %-8llu\n",
- ktp->name, ktp->num_events, ktp->elapsed_time,
- ktp->min_time, ktp->max_time,
- ktp->start_time, ktp->stop_time);
-
- return (0);
-}
-
-static int
-kstat_seq_show(struct seq_file *f, void *p)
-{
- kstat_t *ksp = (kstat_t *)f->private;
- int rc = 0;
-
- ASSERT(ksp->ks_magic == KS_MAGIC);
-
- switch (ksp->ks_type) {
- case KSTAT_TYPE_RAW:
-restart:
- if (ksp->ks_raw_ops.data) {
- rc = ksp->ks_raw_ops.data(
- ksp->ks_raw_buf, ksp->ks_raw_bufsize, p);
- if (rc == ENOMEM && !kstat_resize_raw(ksp))
- goto restart;
- if (!rc)
- seq_puts(f, ksp->ks_raw_buf);
- } else {
- ASSERT(ksp->ks_ndata == 1);
- rc = kstat_seq_show_raw(f, ksp->ks_data,
- ksp->ks_data_size);
- }
- break;
- case KSTAT_TYPE_NAMED:
- rc = kstat_seq_show_named(f, (kstat_named_t *)p);
- break;
- case KSTAT_TYPE_INTR:
- rc = kstat_seq_show_intr(f, (kstat_intr_t *)p);
- break;
- case KSTAT_TYPE_IO:
- rc = kstat_seq_show_io(f, (kstat_io_t *)p);
- break;
- case KSTAT_TYPE_TIMER:
- rc = kstat_seq_show_timer(f, (kstat_timer_t *)p);
- break;
- default:
- PANIC("Undefined kstat type %d\n", ksp->ks_type);
- }
-
- return (-rc);
-}
-
-static int
-kstat_default_update(kstat_t *ksp, int rw)
-{
- ASSERT(ksp != NULL);
-
- if (rw == KSTAT_WRITE)
- return (EACCES);
-
- return (0);
-}
-
-static void *
-kstat_seq_data_addr(kstat_t *ksp, loff_t n)
-{
- void *rc = NULL;
-
- switch (ksp->ks_type) {
- case KSTAT_TYPE_RAW:
- if (ksp->ks_raw_ops.addr)
- rc = ksp->ks_raw_ops.addr(ksp, n);
- else
- rc = ksp->ks_data;
- break;
- case KSTAT_TYPE_NAMED:
- rc = ksp->ks_data + n * sizeof (kstat_named_t);
- break;
- case KSTAT_TYPE_INTR:
- rc = ksp->ks_data + n * sizeof (kstat_intr_t);
- break;
- case KSTAT_TYPE_IO:
- rc = ksp->ks_data + n * sizeof (kstat_io_t);
- break;
- case KSTAT_TYPE_TIMER:
- rc = ksp->ks_data + n * sizeof (kstat_timer_t);
- break;
- default:
- PANIC("Undefined kstat type %d\n", ksp->ks_type);
- }
-
- return (rc);
-}
-
-static void *
-kstat_seq_start(struct seq_file *f, loff_t *pos)
-{
- loff_t n = *pos;
- kstat_t *ksp = (kstat_t *)f->private;
- ASSERT(ksp->ks_magic == KS_MAGIC);
-
- mutex_enter(ksp->ks_lock);
-
- if (ksp->ks_type == KSTAT_TYPE_RAW) {
- ksp->ks_raw_bufsize = PAGE_SIZE;
- ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
- }
-
- /* Dynamically update kstat, on error existing kstats are used */
- (void) ksp->ks_update(ksp, KSTAT_READ);
-
- ksp->ks_snaptime = gethrtime();
-
- if (!(ksp->ks_flags & KSTAT_FLAG_NO_HEADERS) && !n &&
- kstat_seq_show_headers(f))
- return (NULL);
-
- if (n >= ksp->ks_ndata)
- return (NULL);
-
- return (kstat_seq_data_addr(ksp, n));
-}
-
-static void *
-kstat_seq_next(struct seq_file *f, void *p, loff_t *pos)
-{
- kstat_t *ksp = (kstat_t *)f->private;
- ASSERT(ksp->ks_magic == KS_MAGIC);
-
- ++*pos;
- if (*pos >= ksp->ks_ndata)
- return (NULL);
-
- return (kstat_seq_data_addr(ksp, *pos));
-}
-
-static void
-kstat_seq_stop(struct seq_file *f, void *v)
-{
- kstat_t *ksp = (kstat_t *)f->private;
- ASSERT(ksp->ks_magic == KS_MAGIC);
-
- if (ksp->ks_type == KSTAT_TYPE_RAW)
- vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
-
- mutex_exit(ksp->ks_lock);
-}
-
-static struct seq_operations kstat_seq_ops = {
- .show = kstat_seq_show,
- .start = kstat_seq_start,
- .next = kstat_seq_next,
- .stop = kstat_seq_stop,
-};
-
-static kstat_module_t *
-kstat_find_module(char *name)
-{
- kstat_module_t *module;
-
- list_for_each_entry(module, &kstat_module_list, ksm_module_list) {
- if (strncmp(name, module->ksm_name, KSTAT_STRLEN) == 0)
- return (module);
- }
-
- return (NULL);
-}
-
-static kstat_module_t *
-kstat_create_module(char *name)
-{
- kstat_module_t *module;
- struct proc_dir_entry *pde;
-
- pde = proc_mkdir(name, proc_spl_kstat);
- if (pde == NULL)
- return (NULL);
-
- module = kmem_alloc(sizeof (kstat_module_t), KM_SLEEP);
- module->ksm_proc = pde;
- strlcpy(module->ksm_name, name, KSTAT_STRLEN+1);
- INIT_LIST_HEAD(&module->ksm_kstat_list);
- list_add_tail(&module->ksm_module_list, &kstat_module_list);
-
- return (module);
-
-}
-
-static void
-kstat_delete_module(kstat_module_t *module)
-{
- ASSERT(list_empty(&module->ksm_kstat_list));
- remove_proc_entry(module->ksm_name, proc_spl_kstat);
- list_del(&module->ksm_module_list);
- kmem_free(module, sizeof (kstat_module_t));
-}
-
-static int
-proc_kstat_open(struct inode *inode, struct file *filp)
-{
- struct seq_file *f;
- int rc;
-
- rc = seq_open(filp, &kstat_seq_ops);
- if (rc)
- return (rc);
-
- f = filp->private_data;
- f->private = PDE_DATA(inode);
-
- return (rc);
-}
-
-static ssize_t
-proc_kstat_write(struct file *filp, const char __user *buf, size_t len,
- loff_t *ppos)
-{
- struct seq_file *f = filp->private_data;
- kstat_t *ksp = f->private;
- int rc;
-
- ASSERT(ksp->ks_magic == KS_MAGIC);
-
- mutex_enter(ksp->ks_lock);
- rc = ksp->ks_update(ksp, KSTAT_WRITE);
- mutex_exit(ksp->ks_lock);
-
- if (rc)
- return (-rc);
-
- *ppos += len;
- return (len);
-}
-
-static struct file_operations proc_kstat_operations = {
- .open = proc_kstat_open,
- .write = proc_kstat_write,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-void
-__kstat_set_raw_ops(kstat_t *ksp,
- int (*headers)(char *buf, size_t size),
- int (*data)(char *buf, size_t size, void *data),
- void *(*addr)(kstat_t *ksp, loff_t index))
-{
- ksp->ks_raw_ops.headers = headers;
- ksp->ks_raw_ops.data = data;
- ksp->ks_raw_ops.addr = addr;
-}
-EXPORT_SYMBOL(__kstat_set_raw_ops);
-
-void
-kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module,
- const char *name)
-{
- kpep->kpe_owner = NULL;
- kpep->kpe_proc = NULL;
- INIT_LIST_HEAD(&kpep->kpe_list);
- strncpy(kpep->kpe_module, module, KSTAT_STRLEN);
- strncpy(kpep->kpe_name, name, KSTAT_STRLEN);
-}
-EXPORT_SYMBOL(kstat_proc_entry_init);
-
-kstat_t *
-__kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
- const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
- uchar_t ks_flags)
-{
- kstat_t *ksp;
-
- ASSERT(ks_module);
- ASSERT(ks_instance == 0);
- ASSERT(ks_name);
-
- if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
- ASSERT(ks_ndata == 1);
-
- ksp = kmem_zalloc(sizeof (*ksp), KM_SLEEP);
- if (ksp == NULL)
- return (ksp);
-
- mutex_enter(&kstat_module_lock);
- ksp->ks_kid = kstat_id;
- kstat_id++;
- mutex_exit(&kstat_module_lock);
-
- ksp->ks_magic = KS_MAGIC;
- mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
- ksp->ks_lock = &ksp->ks_private_lock;
-
- ksp->ks_crtime = gethrtime();
- ksp->ks_snaptime = ksp->ks_crtime;
- ksp->ks_instance = ks_instance;
- strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
- ksp->ks_type = ks_type;
- ksp->ks_flags = ks_flags;
- ksp->ks_update = kstat_default_update;
- ksp->ks_private = NULL;
- ksp->ks_raw_ops.headers = NULL;
- ksp->ks_raw_ops.data = NULL;
- ksp->ks_raw_ops.addr = NULL;
- ksp->ks_raw_buf = NULL;
- ksp->ks_raw_bufsize = 0;
- kstat_proc_entry_init(&ksp->ks_proc, ks_module, ks_name);
-
- switch (ksp->ks_type) {
- case KSTAT_TYPE_RAW:
- ksp->ks_ndata = 1;
- ksp->ks_data_size = ks_ndata;
- break;
- case KSTAT_TYPE_NAMED:
- ksp->ks_ndata = ks_ndata;
- ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
- break;
- case KSTAT_TYPE_INTR:
- ksp->ks_ndata = ks_ndata;
- ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
- break;
- case KSTAT_TYPE_IO:
- ksp->ks_ndata = ks_ndata;
- ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
- break;
- case KSTAT_TYPE_TIMER:
- ksp->ks_ndata = ks_ndata;
- ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
- break;
- default:
- PANIC("Undefined kstat type %d\n", ksp->ks_type);
- }
-
- if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
- ksp->ks_data = NULL;
- } else {
- ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
- if (ksp->ks_data == NULL) {
- kmem_free(ksp, sizeof (*ksp));
- ksp = NULL;
- }
- }
-
- return (ksp);
-}
-EXPORT_SYMBOL(__kstat_create);
-
-static int
-kstat_detect_collision(kstat_proc_entry_t *kpep)
-{
- kstat_module_t *module;
- kstat_proc_entry_t *tmp;
- char *parent;
- char *cp;
-
- parent = kmem_asprintf("%s", kpep->kpe_module);
-
- if ((cp = strrchr(parent, '/')) == NULL) {
- strfree(parent);
- return (0);
- }
-
- cp[0] = '\0';
- if ((module = kstat_find_module(parent)) != NULL) {
- list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
- if (strncmp(tmp->kpe_name, cp+1, KSTAT_STRLEN) == 0) {
- strfree(parent);
- return (EEXIST);
- }
- }
- }
-
- strfree(parent);
- return (0);
-}
-
-/*
- * Add a file to the proc filesystem under the kstat namespace (i.e.
- * /proc/spl/kstat/). The file need not necessarily be implemented as a
- * kstat.
- */
-void
-kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode,
- const struct file_operations *file_ops, void *data)
-{
- kstat_module_t *module;
- kstat_proc_entry_t *tmp;
-
- ASSERT(kpep);
-
- mutex_enter(&kstat_module_lock);
-
- module = kstat_find_module(kpep->kpe_module);
- if (module == NULL) {
- if (kstat_detect_collision(kpep) != 0) {
- cmn_err(CE_WARN, "kstat_create('%s', '%s'): namespace" \
- " collision", kpep->kpe_module, kpep->kpe_name);
- goto out;
- }
- module = kstat_create_module(kpep->kpe_module);
- if (module == NULL)
- goto out;
- }
-
- /*
- * Only one entry by this name per-module, on failure the module
- * shouldn't be deleted because we know it has at least one entry.
- */
- list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
- if (strncmp(tmp->kpe_name, kpep->kpe_name, KSTAT_STRLEN) == 0)
- goto out;
- }
-
- list_add_tail(&kpep->kpe_list, &module->ksm_kstat_list);
-
- kpep->kpe_owner = module;
- kpep->kpe_proc = proc_create_data(kpep->kpe_name, mode,
- module->ksm_proc, file_ops, data);
- if (kpep->kpe_proc == NULL) {
- list_del_init(&kpep->kpe_list);
- if (list_empty(&module->ksm_kstat_list))
- kstat_delete_module(module);
- }
-out:
- mutex_exit(&kstat_module_lock);
-
-}
-EXPORT_SYMBOL(kstat_proc_entry_install);
-
-void
-__kstat_install(kstat_t *ksp)
-{
- ASSERT(ksp);
- mode_t mode;
- /* Specify permission modes for different kstats */
- if (strncmp(ksp->ks_proc.kpe_name, "dbufs", KSTAT_STRLEN) == 0) {
- mode = 0600;
- } else {
- mode = 0644;
- }
- kstat_proc_entry_install(
- &ksp->ks_proc, mode, &proc_kstat_operations, ksp);
-}
-EXPORT_SYMBOL(__kstat_install);
-
-void
-kstat_proc_entry_delete(kstat_proc_entry_t *kpep)
-{
- kstat_module_t *module = kpep->kpe_owner;
- if (kpep->kpe_proc)
- remove_proc_entry(kpep->kpe_name, module->ksm_proc);
-
- mutex_enter(&kstat_module_lock);
- list_del_init(&kpep->kpe_list);
-
- /*
- * Remove top level module directory if it wasn't empty before, but now
- * is.
- */
- if (kpep->kpe_proc && list_empty(&module->ksm_kstat_list))
- kstat_delete_module(module);
- mutex_exit(&kstat_module_lock);
-
-}
-EXPORT_SYMBOL(kstat_proc_entry_delete);
-
-void
-__kstat_delete(kstat_t *ksp)
-{
- kstat_proc_entry_delete(&ksp->ks_proc);
-
- if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
- kmem_free(ksp->ks_data, ksp->ks_data_size);
-
- ksp->ks_lock = NULL;
- mutex_destroy(&ksp->ks_private_lock);
- kmem_free(ksp, sizeof (*ksp));
-}
-EXPORT_SYMBOL(__kstat_delete);
-
-int
-spl_kstat_init(void)
-{
- mutex_init(&kstat_module_lock, NULL, MUTEX_DEFAULT, NULL);
- INIT_LIST_HEAD(&kstat_module_list);
- kstat_id = 0;
- return (0);
-}
-
-void
-spl_kstat_fini(void)
-{
- ASSERT(list_empty(&kstat_module_list));
- mutex_destroy(&kstat_module_lock);
-}
diff --git a/module/spl/spl-proc.c b/module/spl/spl-proc.c
deleted file mode 100644
index a75bcc214..000000000
--- a/module/spl/spl-proc.c
+++ /dev/null
@@ -1,782 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) Proc Implementation.
- */
-
-#include <sys/systeminfo.h>
-#include <sys/kstat.h>
-#include <sys/kmem.h>
-#include <sys/kmem_cache.h>
-#include <sys/vmem.h>
-#include <sys/taskq.h>
-#include <sys/proc.h>
-#include <linux/ctype.h>
-#include <linux/kmod.h>
-#include <linux/seq_file.h>
-#include <linux/uaccess.h>
-#include <linux/version.h>
-
-#if defined(CONSTIFY_PLUGIN) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
-typedef struct ctl_table __no_const spl_ctl_table;
-#else
-typedef struct ctl_table spl_ctl_table;
-#endif
-
-static unsigned long table_min = 0;
-static unsigned long table_max = ~0;
-
-static struct ctl_table_header *spl_header = NULL;
-static struct proc_dir_entry *proc_spl = NULL;
-static struct proc_dir_entry *proc_spl_kmem = NULL;
-static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
-static struct proc_dir_entry *proc_spl_taskq_all = NULL;
-static struct proc_dir_entry *proc_spl_taskq = NULL;
-struct proc_dir_entry *proc_spl_kstat = NULL;
-
-static int
-proc_copyin_string(char *kbuffer, int kbuffer_size, const char *ubuffer,
- int ubuffer_size)
-{
- int size;
-
- if (ubuffer_size > kbuffer_size)
- return (-EOVERFLOW);
-
- if (copy_from_user((void *)kbuffer, (void *)ubuffer, ubuffer_size))
- return (-EFAULT);
-
- /* strip trailing whitespace */
- size = strnlen(kbuffer, ubuffer_size);
- while (size-- >= 0)
- if (!isspace(kbuffer[size]))
- break;
-
- /* empty string */
- if (size < 0)
- return (-EINVAL);
-
- /* no space to terminate */
- if (size == kbuffer_size)
- return (-EOVERFLOW);
-
- kbuffer[size + 1] = 0;
- return (0);
-}
-
-static int
-proc_copyout_string(char *ubuffer, int ubuffer_size, const char *kbuffer,
- char *append)
-{
- /*
- * NB if 'append' != NULL, it's a single character to append to the
- * copied out string - usually "\n", for /proc entries and
- * (i.e. a terminating zero byte) for sysctl entries
- */
- int size = MIN(strlen(kbuffer), ubuffer_size);
-
- if (copy_to_user(ubuffer, kbuffer, size))
- return (-EFAULT);
-
- if (append != NULL && size < ubuffer_size) {
- if (copy_to_user(ubuffer + size, append, 1))
- return (-EFAULT);
-
- size++;
- }
-
- return (size);
-}
-
-#ifdef DEBUG_KMEM
-static int
-proc_domemused(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int rc = 0;
- unsigned long min = 0, max = ~0, val;
- spl_ctl_table dummy = *table;
-
- dummy.data = &val;
- dummy.proc_handler = &proc_dointvec;
- dummy.extra1 = &min;
- dummy.extra2 = &max;
-
- if (write) {
- *ppos += *lenp;
- } else {
-#ifdef HAVE_ATOMIC64_T
- val = atomic64_read((atomic64_t *)table->data);
-#else
- val = atomic_read((atomic_t *)table->data);
-#endif /* HAVE_ATOMIC64_T */
- rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
- }
-
- return (rc);
-}
-#endif /* DEBUG_KMEM */
-
-static int
-proc_doslab(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int rc = 0;
- unsigned long min = 0, max = ~0, val = 0, mask;
- spl_ctl_table dummy = *table;
- spl_kmem_cache_t *skc;
-
- dummy.data = &val;
- dummy.proc_handler = &proc_dointvec;
- dummy.extra1 = &min;
- dummy.extra2 = &max;
-
- if (write) {
- *ppos += *lenp;
- } else {
- down_read(&spl_kmem_cache_sem);
- mask = (unsigned long)table->data;
-
- list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
-
- /* Only use slabs of the correct kmem/vmem type */
- if (!(skc->skc_flags & mask))
- continue;
-
- /* Sum the specified field for selected slabs */
- switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
- case KMC_TOTAL:
- val += skc->skc_slab_size * skc->skc_slab_total;
- break;
- case KMC_ALLOC:
- val += skc->skc_obj_size * skc->skc_obj_alloc;
- break;
- case KMC_MAX:
- val += skc->skc_obj_size * skc->skc_obj_max;
- break;
- }
- }
-
- up_read(&spl_kmem_cache_sem);
- rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
- }
-
- return (rc);
-}
-
-static int
-proc_dohostid(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int len, rc = 0;
- char *end, str[32];
-
- if (write) {
- /*
- * We can't use proc_doulongvec_minmax() in the write
- * case here because hostid while a hex value has no
- * leading 0x which confuses the helper function.
- */
- rc = proc_copyin_string(str, sizeof (str), buffer, *lenp);
- if (rc < 0)
- return (rc);
-
- spl_hostid = simple_strtoul(str, &end, 16);
- if (str == end)
- return (-EINVAL);
-
- } else {
- len = snprintf(str, sizeof (str), "%lx",
- (unsigned long) zone_get_hostid(NULL));
- if (*ppos >= len)
- rc = 0;
- else
- rc = proc_copyout_string(buffer,
- *lenp, str + *ppos, "\n");
-
- if (rc >= 0) {
- *lenp = rc;
- *ppos += rc;
- }
- }
-
- return (rc);
-}
-
-static void
-taskq_seq_show_headers(struct seq_file *f)
-{
- seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n",
- "taskq", "act", "nthr", "spwn", "maxt", "pri",
- "mina", "maxa", "cura", "flags");
-}
-
-/* indices into the lheads array below */
-#define LHEAD_PEND 0
-#define LHEAD_PRIO 1
-#define LHEAD_DELAY 2
-#define LHEAD_WAIT 3
-#define LHEAD_ACTIVE 4
-#define LHEAD_SIZE 5
-
-/* BEGIN CSTYLED */
-static unsigned int spl_max_show_tasks = 512;
-module_param(spl_max_show_tasks, uint, 0644);
-MODULE_PARM_DESC(spl_max_show_tasks, "Max number of tasks shown in taskq proc");
-/* END CSTYLED */
-
-static int
-taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag)
-{
- taskq_t *tq = p;
- taskq_thread_t *tqt;
- spl_wait_queue_entry_t *wq;
- struct task_struct *tsk;
- taskq_ent_t *tqe;
- char name[100];
- struct list_head *lheads[LHEAD_SIZE], *lh;
- static char *list_names[LHEAD_SIZE] =
- {"pend", "prio", "delay", "wait", "active" };
- int i, j, have_lheads = 0;
- unsigned long wflags, flags;
-
- spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
- spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags);
-
- /* get the various lists and check whether they're empty */
- lheads[LHEAD_PEND] = &tq->tq_pend_list;
- lheads[LHEAD_PRIO] = &tq->tq_prio_list;
- lheads[LHEAD_DELAY] = &tq->tq_delay_list;
-#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
- lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.head;
-#else
- lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list;
-#endif
- lheads[LHEAD_ACTIVE] = &tq->tq_active_list;
-
- for (i = 0; i < LHEAD_SIZE; ++i) {
- if (list_empty(lheads[i]))
- lheads[i] = NULL;
- else
- ++have_lheads;
- }
-
- /* early return in non-"all" mode if lists are all empty */
- if (!allflag && !have_lheads) {
- spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
- spin_unlock_irqrestore(&tq->tq_lock, flags);
- return (0);
- }
-
- /* unlock the waitq quickly */
- if (!lheads[LHEAD_WAIT])
- spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
-
- /* show the base taskq contents */
- snprintf(name, sizeof (name), "%s/%d", tq->tq_name, tq->tq_instance);
- seq_printf(f, "%-25s ", name);
- seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n",
- tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn,
- tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc,
- tq->tq_nalloc, tq->tq_flags);
-
- /* show the active list */
- if (lheads[LHEAD_ACTIVE]) {
- j = 0;
- list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) {
- if (j == 0)
- seq_printf(f, "\t%s:",
- list_names[LHEAD_ACTIVE]);
- else if (j == 2) {
- seq_printf(f, "\n\t ");
- j = 0;
- }
- seq_printf(f, " [%d]%pf(%ps)",
- tqt->tqt_thread->pid,
- tqt->tqt_task->tqent_func,
- tqt->tqt_task->tqent_arg);
- ++j;
- }
- seq_printf(f, "\n");
- }
-
- for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i)
- if (lheads[i]) {
- j = 0;
- list_for_each(lh, lheads[i]) {
- if (spl_max_show_tasks != 0 &&
- j >= spl_max_show_tasks) {
- seq_printf(f, "\n\t(truncated)");
- break;
- }
- /* show the wait waitq list */
- if (i == LHEAD_WAIT) {
-#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
- wq = list_entry(lh,
- spl_wait_queue_entry_t, entry);
-#else
- wq = list_entry(lh,
- spl_wait_queue_entry_t, task_list);
-#endif
- if (j == 0)
- seq_printf(f, "\t%s:",
- list_names[i]);
- else if (j % 8 == 0)
- seq_printf(f, "\n\t ");
-
- tsk = wq->private;
- seq_printf(f, " %d", tsk->pid);
- /* pend, prio and delay lists */
- } else {
- tqe = list_entry(lh, taskq_ent_t,
- tqent_list);
- if (j == 0)
- seq_printf(f, "\t%s:",
- list_names[i]);
- else if (j % 2 == 0)
- seq_printf(f, "\n\t ");
-
- seq_printf(f, " %pf(%ps)",
- tqe->tqent_func,
- tqe->tqent_arg);
- }
- ++j;
- }
- seq_printf(f, "\n");
- }
- if (lheads[LHEAD_WAIT])
- spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- return (0);
-}
-
-static int
-taskq_all_seq_show(struct seq_file *f, void *p)
-{
- return (taskq_seq_show_impl(f, p, B_TRUE));
-}
-
-static int
-taskq_seq_show(struct seq_file *f, void *p)
-{
- return (taskq_seq_show_impl(f, p, B_FALSE));
-}
-
-static void *
-taskq_seq_start(struct seq_file *f, loff_t *pos)
-{
- struct list_head *p;
- loff_t n = *pos;
-
- down_read(&tq_list_sem);
- if (!n)
- taskq_seq_show_headers(f);
-
- p = tq_list.next;
- while (n--) {
- p = p->next;
- if (p == &tq_list)
- return (NULL);
- }
-
- return (list_entry(p, taskq_t, tq_taskqs));
-}
-
-static void *
-taskq_seq_next(struct seq_file *f, void *p, loff_t *pos)
-{
- taskq_t *tq = p;
-
- ++*pos;
- return ((tq->tq_taskqs.next == &tq_list) ?
- NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs));
-}
-
-static void
-slab_seq_show_headers(struct seq_file *f)
-{
- seq_printf(f,
- "--------------------- cache ----------"
- "--------------------------------------------- "
- "----- slab ------ "
- "---- object ----- "
- "--- emergency ---\n");
- seq_printf(f,
- "name "
- " flags size alloc slabsize objsize "
- "total alloc max "
- "total alloc max "
- "dlock alloc max\n");
-}
-
-static int
-slab_seq_show(struct seq_file *f, void *p)
-{
- spl_kmem_cache_t *skc = p;
-
- ASSERT(skc->skc_magic == SKC_MAGIC);
-
- /*
- * Backed by Linux slab see /proc/slabinfo.
- */
- if (skc->skc_flags & KMC_SLAB)
- return (0);
-
- spin_lock(&skc->skc_lock);
- seq_printf(f, "%-36s ", skc->skc_name);
- seq_printf(f, "0x%05lx %9lu %9lu %8u %8u "
- "%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n",
- (long unsigned)skc->skc_flags,
- (long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
- (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
- (unsigned)skc->skc_slab_size,
- (unsigned)skc->skc_obj_size,
- (long unsigned)skc->skc_slab_total,
- (long unsigned)skc->skc_slab_alloc,
- (long unsigned)skc->skc_slab_max,
- (long unsigned)skc->skc_obj_total,
- (long unsigned)skc->skc_obj_alloc,
- (long unsigned)skc->skc_obj_max,
- (long unsigned)skc->skc_obj_deadlock,
- (long unsigned)skc->skc_obj_emergency,
- (long unsigned)skc->skc_obj_emergency_max);
-
- spin_unlock(&skc->skc_lock);
-
- return (0);
-}
-
-static void *
-slab_seq_start(struct seq_file *f, loff_t *pos)
-{
- struct list_head *p;
- loff_t n = *pos;
-
- down_read(&spl_kmem_cache_sem);
- if (!n)
- slab_seq_show_headers(f);
-
- p = spl_kmem_cache_list.next;
- while (n--) {
- p = p->next;
- if (p == &spl_kmem_cache_list)
- return (NULL);
- }
-
- return (list_entry(p, spl_kmem_cache_t, skc_list));
-}
-
-static void *
-slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
-{
- spl_kmem_cache_t *skc = p;
-
- ++*pos;
- return ((skc->skc_list.next == &spl_kmem_cache_list) ?
- NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
-}
-
-static void
-slab_seq_stop(struct seq_file *f, void *v)
-{
- up_read(&spl_kmem_cache_sem);
-}
-
-static struct seq_operations slab_seq_ops = {
- .show = slab_seq_show,
- .start = slab_seq_start,
- .next = slab_seq_next,
- .stop = slab_seq_stop,
-};
-
-static int
-proc_slab_open(struct inode *inode, struct file *filp)
-{
- return (seq_open(filp, &slab_seq_ops));
-}
-
-static struct file_operations proc_slab_operations = {
- .open = proc_slab_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static void
-taskq_seq_stop(struct seq_file *f, void *v)
-{
- up_read(&tq_list_sem);
-}
-
-static struct seq_operations taskq_all_seq_ops = {
- .show = taskq_all_seq_show,
- .start = taskq_seq_start,
- .next = taskq_seq_next,
- .stop = taskq_seq_stop,
-};
-
-static struct seq_operations taskq_seq_ops = {
- .show = taskq_seq_show,
- .start = taskq_seq_start,
- .next = taskq_seq_next,
- .stop = taskq_seq_stop,
-};
-
-static int
-proc_taskq_all_open(struct inode *inode, struct file *filp)
-{
- return (seq_open(filp, &taskq_all_seq_ops));
-}
-
-static int
-proc_taskq_open(struct inode *inode, struct file *filp)
-{
- return (seq_open(filp, &taskq_seq_ops));
-}
-
-static struct file_operations proc_taskq_all_operations = {
- .open = proc_taskq_all_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static struct file_operations proc_taskq_operations = {
- .open = proc_taskq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static struct ctl_table spl_kmem_table[] = {
-#ifdef DEBUG_KMEM
- {
- .procname = "kmem_used",
- .data = &kmem_alloc_used,
-#ifdef HAVE_ATOMIC64_T
- .maxlen = sizeof (atomic64_t),
-#else
- .maxlen = sizeof (atomic_t),
-#endif /* HAVE_ATOMIC64_T */
- .mode = 0444,
- .proc_handler = &proc_domemused,
- },
- {
- .procname = "kmem_max",
- .data = &kmem_alloc_max,
- .maxlen = sizeof (unsigned long),
- .extra1 = &table_min,
- .extra2 = &table_max,
- .mode = 0444,
- .proc_handler = &proc_doulongvec_minmax,
- },
-#endif /* DEBUG_KMEM */
- {
- .procname = "slab_kmem_total",
- .data = (void *)(KMC_KMEM | KMC_TOTAL),
- .maxlen = sizeof (unsigned long),
- .extra1 = &table_min,
- .extra2 = &table_max,
- .mode = 0444,
- .proc_handler = &proc_doslab,
- },
- {
- .procname = "slab_kmem_alloc",
- .data = (void *)(KMC_KMEM | KMC_ALLOC),
- .maxlen = sizeof (unsigned long),
- .extra1 = &table_min,
- .extra2 = &table_max,
- .mode = 0444,
- .proc_handler = &proc_doslab,
- },
- {
- .procname = "slab_kmem_max",
- .data = (void *)(KMC_KMEM | KMC_MAX),
- .maxlen = sizeof (unsigned long),
- .extra1 = &table_min,
- .extra2 = &table_max,
- .mode = 0444,
- .proc_handler = &proc_doslab,
- },
- {
- .procname = "slab_vmem_total",
- .data = (void *)(KMC_VMEM | KMC_TOTAL),
- .maxlen = sizeof (unsigned long),
- .extra1 = &table_min,
- .extra2 = &table_max,
- .mode = 0444,
- .proc_handler = &proc_doslab,
- },
- {
- .procname = "slab_vmem_alloc",
- .data = (void *)(KMC_VMEM | KMC_ALLOC),
- .maxlen = sizeof (unsigned long),
- .extra1 = &table_min,
- .extra2 = &table_max,
- .mode = 0444,
- .proc_handler = &proc_doslab,
- },
- {
- .procname = "slab_vmem_max",
- .data = (void *)(KMC_VMEM | KMC_MAX),
- .maxlen = sizeof (unsigned long),
- .extra1 = &table_min,
- .extra2 = &table_max,
- .mode = 0444,
- .proc_handler = &proc_doslab,
- },
- {},
-};
-
-static struct ctl_table spl_kstat_table[] = {
- {},
-};
-
-static struct ctl_table spl_table[] = {
- /*
- * NB No .strategy entries have been provided since
- * sysctl(8) prefers to go via /proc for portability.
- */
- {
- .procname = "gitrev",
- .data = spl_gitrev,
- .maxlen = sizeof (spl_gitrev),
- .mode = 0444,
- .proc_handler = &proc_dostring,
- },
- {
- .procname = "hostid",
- .data = &spl_hostid,
- .maxlen = sizeof (unsigned long),
- .mode = 0644,
- .proc_handler = &proc_dohostid,
- },
- {
- .procname = "kmem",
- .mode = 0555,
- .child = spl_kmem_table,
- },
- {
- .procname = "kstat",
- .mode = 0555,
- .child = spl_kstat_table,
- },
- {},
-};
-
-static struct ctl_table spl_dir[] = {
- {
- .procname = "spl",
- .mode = 0555,
- .child = spl_table,
- },
- {}
-};
-
-static struct ctl_table spl_root[] = {
- {
-#ifdef HAVE_CTL_NAME
- .ctl_name = CTL_KERN,
-#endif
- .procname = "kernel",
- .mode = 0555,
- .child = spl_dir,
- },
- {}
-};
-
-int
-spl_proc_init(void)
-{
- int rc = 0;
-
- spl_header = register_sysctl_table(spl_root);
- if (spl_header == NULL)
- return (-EUNATCH);
-
- proc_spl = proc_mkdir("spl", NULL);
- if (proc_spl == NULL) {
- rc = -EUNATCH;
- goto out;
- }
-
- proc_spl_taskq_all = proc_create_data("taskq-all", 0444, proc_spl,
- &proc_taskq_all_operations, NULL);
- if (proc_spl_taskq_all == NULL) {
- rc = -EUNATCH;
- goto out;
- }
-
- proc_spl_taskq = proc_create_data("taskq", 0444, proc_spl,
- &proc_taskq_operations, NULL);
- if (proc_spl_taskq == NULL) {
- rc = -EUNATCH;
- goto out;
- }
-
- proc_spl_kmem = proc_mkdir("kmem", proc_spl);
- if (proc_spl_kmem == NULL) {
- rc = -EUNATCH;
- goto out;
- }
-
- proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem,
- &proc_slab_operations, NULL);
- if (proc_spl_kmem_slab == NULL) {
- rc = -EUNATCH;
- goto out;
- }
-
- proc_spl_kstat = proc_mkdir("kstat", proc_spl);
- if (proc_spl_kstat == NULL) {
- rc = -EUNATCH;
- goto out;
- }
-out:
- if (rc) {
- remove_proc_entry("kstat", proc_spl);
- remove_proc_entry("slab", proc_spl_kmem);
- remove_proc_entry("kmem", proc_spl);
- remove_proc_entry("taskq-all", proc_spl);
- remove_proc_entry("taskq", proc_spl);
- remove_proc_entry("spl", NULL);
- unregister_sysctl_table(spl_header);
- }
-
- return (rc);
-}
-
-void
-spl_proc_fini(void)
-{
- remove_proc_entry("kstat", proc_spl);
- remove_proc_entry("slab", proc_spl_kmem);
- remove_proc_entry("kmem", proc_spl);
- remove_proc_entry("taskq-all", proc_spl);
- remove_proc_entry("taskq", proc_spl);
- remove_proc_entry("spl", NULL);
-
- ASSERT(spl_header != NULL);
- unregister_sysctl_table(spl_header);
-}
diff --git a/module/spl/spl-procfs-list.c b/module/spl/spl-procfs-list.c
deleted file mode 100644
index f6a00da5c..000000000
--- a/module/spl/spl-procfs-list.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2018 by Delphix. All rights reserved.
- */
-
-#include <sys/list.h>
-#include <sys/mutex.h>
-#include <sys/procfs_list.h>
-#include <linux/proc_fs.h>
-
-/*
- * A procfs_list is a wrapper around a linked list which implements the seq_file
- * interface, allowing the contents of the list to be exposed through procfs.
- * The kernel already has some utilities to help implement the seq_file
- * interface for linked lists (seq_list_*), but they aren't appropriate for use
- * with lists that have many entries, because seq_list_start walks the list at
- * the start of each read syscall to find where it left off, so reading a file
- * ends up being quadratic in the number of entries in the list.
- *
- * This implementation avoids this penalty by maintaining a separate cursor into
- * the list per instance of the file that is open. It also maintains some extra
- * information in each node of the list to prevent reads of entries that have
- * been dropped from the list.
- *
- * Callers should only add elements to the list using procfs_list_add, which
- * adds an element to the tail of the list. Other operations can be performed
- * directly on the wrapped list using the normal list manipulation functions,
- * but elements should only be removed from the head of the list.
- */
-
-#define NODE_ID(procfs_list, obj) \
- (((procfs_list_node_t *)(((char *)obj) + \
- (procfs_list)->pl_node_offset))->pln_id)
-
-typedef struct procfs_list_cursor {
- procfs_list_t *procfs_list; /* List into which this cursor points */
- void *cached_node; /* Most recently accessed node */
- loff_t cached_pos; /* Position of cached_node */
-} procfs_list_cursor_t;
-
-static int
-procfs_list_seq_show(struct seq_file *f, void *p)
-{
- procfs_list_cursor_t *cursor = f->private;
- procfs_list_t *procfs_list = cursor->procfs_list;
-
- ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
- if (p == SEQ_START_TOKEN) {
- if (procfs_list->pl_show_header != NULL)
- return (procfs_list->pl_show_header(f));
- else
- return (0);
- }
- return (procfs_list->pl_show(f, p));
-}
-
-static void *
-procfs_list_next_node(procfs_list_cursor_t *cursor, loff_t *pos)
-{
- void *next_node;
- procfs_list_t *procfs_list = cursor->procfs_list;
-
- if (cursor->cached_node == SEQ_START_TOKEN)
- next_node = list_head(&procfs_list->pl_list);
- else
- next_node = list_next(&procfs_list->pl_list,
- cursor->cached_node);
-
- if (next_node != NULL) {
- cursor->cached_node = next_node;
- cursor->cached_pos = NODE_ID(procfs_list, cursor->cached_node);
- *pos = cursor->cached_pos;
- }
- return (next_node);
-}
-
-static void *
-procfs_list_seq_start(struct seq_file *f, loff_t *pos)
-{
- procfs_list_cursor_t *cursor = f->private;
- procfs_list_t *procfs_list = cursor->procfs_list;
-
- mutex_enter(&procfs_list->pl_lock);
-
- if (*pos == 0) {
- cursor->cached_node = SEQ_START_TOKEN;
- cursor->cached_pos = 0;
- return (SEQ_START_TOKEN);
- }
-
- /*
- * Check if our cached pointer has become stale, which happens if the
- * the message where we left off has been dropped from the list since
- * the last read syscall completed.
- */
- void *oldest_node = list_head(&procfs_list->pl_list);
- if (cursor->cached_node != SEQ_START_TOKEN && (oldest_node == NULL ||
- NODE_ID(procfs_list, oldest_node) > cursor->cached_pos))
- return (ERR_PTR(-EIO));
-
- /*
- * If it isn't starting from the beginning of the file, the seq_file
- * code will either pick up at the same position it visited last or the
- * following one.
- */
- if (*pos == cursor->cached_pos) {
- return (cursor->cached_node);
- } else {
- ASSERT3U(*pos, ==, cursor->cached_pos + 1);
- return (procfs_list_next_node(cursor, pos));
- }
-}
-
-static void *
-procfs_list_seq_next(struct seq_file *f, void *p, loff_t *pos)
-{
- procfs_list_cursor_t *cursor = f->private;
- ASSERT(MUTEX_HELD(&cursor->procfs_list->pl_lock));
- return (procfs_list_next_node(cursor, pos));
-}
-
-static void
-procfs_list_seq_stop(struct seq_file *f, void *p)
-{
- procfs_list_cursor_t *cursor = f->private;
- procfs_list_t *procfs_list = cursor->procfs_list;
- mutex_exit(&procfs_list->pl_lock);
-}
-
-static struct seq_operations procfs_list_seq_ops = {
- .show = procfs_list_seq_show,
- .start = procfs_list_seq_start,
- .next = procfs_list_seq_next,
- .stop = procfs_list_seq_stop,
-};
-
-static int
-procfs_list_open(struct inode *inode, struct file *filp)
-{
- int rc = seq_open_private(filp, &procfs_list_seq_ops,
- sizeof (procfs_list_cursor_t));
- if (rc != 0)
- return (rc);
-
- struct seq_file *f = filp->private_data;
- procfs_list_cursor_t *cursor = f->private;
- cursor->procfs_list = PDE_DATA(inode);
- cursor->cached_node = NULL;
- cursor->cached_pos = 0;
-
- return (0);
-}
-
-static ssize_t
-procfs_list_write(struct file *filp, const char __user *buf, size_t len,
- loff_t *ppos)
-{
- struct seq_file *f = filp->private_data;
- procfs_list_cursor_t *cursor = f->private;
- procfs_list_t *procfs_list = cursor->procfs_list;
- int rc;
-
- if (procfs_list->pl_clear != NULL &&
- (rc = procfs_list->pl_clear(procfs_list)) != 0)
- return (-rc);
- return (len);
-}
-
-static struct file_operations procfs_list_operations = {
- .owner = THIS_MODULE,
- .open = procfs_list_open,
- .write = procfs_list_write,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
-/*
- * Initialize a procfs_list and create a file for it in the proc filesystem
- * under the kstat namespace.
- */
-void
-procfs_list_install(const char *module,
- const char *name,
- mode_t mode,
- procfs_list_t *procfs_list,
- int (*show)(struct seq_file *f, void *p),
- int (*show_header)(struct seq_file *f),
- int (*clear)(procfs_list_t *procfs_list),
- size_t procfs_list_node_off)
-{
- mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&procfs_list->pl_list,
- procfs_list_node_off + sizeof (procfs_list_node_t),
- procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
- procfs_list->pl_next_id = 1; /* Save id 0 for SEQ_START_TOKEN */
- procfs_list->pl_show = show;
- procfs_list->pl_show_header = show_header;
- procfs_list->pl_clear = clear;
- procfs_list->pl_node_offset = procfs_list_node_off;
-
- kstat_proc_entry_init(&procfs_list->pl_kstat_entry, module, name);
- kstat_proc_entry_install(&procfs_list->pl_kstat_entry, mode,
- &procfs_list_operations, procfs_list);
-}
-EXPORT_SYMBOL(procfs_list_install);
-
-/* Remove the proc filesystem file corresponding to the given list */
-void
-procfs_list_uninstall(procfs_list_t *procfs_list)
-{
- kstat_proc_entry_delete(&procfs_list->pl_kstat_entry);
-}
-EXPORT_SYMBOL(procfs_list_uninstall);
-
-void
-procfs_list_destroy(procfs_list_t *procfs_list)
-{
- ASSERT(list_is_empty(&procfs_list->pl_list));
- list_destroy(&procfs_list->pl_list);
- mutex_destroy(&procfs_list->pl_lock);
-}
-EXPORT_SYMBOL(procfs_list_destroy);
-
-/*
- * Add a new node to the tail of the list. While the standard list manipulation
- * functions can be use for all other operation, adding elements to the list
- * should only be done using this helper so that the id of the new node is set
- * correctly.
- */
-void
-procfs_list_add(procfs_list_t *procfs_list, void *p)
-{
- ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
- NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
- list_insert_tail(&procfs_list->pl_list, p);
-}
-EXPORT_SYMBOL(procfs_list_add);
diff --git a/module/spl/spl-taskq.c b/module/spl/spl-taskq.c
deleted file mode 100644
index 90e1d0a4d..000000000
--- a/module/spl/spl-taskq.c
+++ /dev/null
@@ -1,1292 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) Task Queue Implementation.
- */
-
-#include <sys/timer.h>
-#include <sys/taskq.h>
-#include <sys/kmem.h>
-#include <sys/tsd.h>
-#include <sys/simd.h>
-
-int spl_taskq_thread_bind = 0;
-module_param(spl_taskq_thread_bind, int, 0644);
-MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
-
-
-int spl_taskq_thread_dynamic = 1;
-module_param(spl_taskq_thread_dynamic, int, 0644);
-MODULE_PARM_DESC(spl_taskq_thread_dynamic, "Allow dynamic taskq threads");
-
-int spl_taskq_thread_priority = 1;
-module_param(spl_taskq_thread_priority, int, 0644);
-MODULE_PARM_DESC(spl_taskq_thread_priority,
- "Allow non-default priority for taskq threads");
-
-int spl_taskq_thread_sequential = 4;
-module_param(spl_taskq_thread_sequential, int, 0644);
-MODULE_PARM_DESC(spl_taskq_thread_sequential,
- "Create new taskq threads after N sequential tasks");
-
-/* Global system-wide dynamic task queue available for all consumers */
-taskq_t *system_taskq;
-EXPORT_SYMBOL(system_taskq);
-/* Global dynamic task queue for long delay */
-taskq_t *system_delay_taskq;
-EXPORT_SYMBOL(system_delay_taskq);
-
-/* Private dedicated taskq for creating new taskq threads on demand. */
-static taskq_t *dynamic_taskq;
-static taskq_thread_t *taskq_thread_create(taskq_t *);
-
-/* List of all taskqs */
-LIST_HEAD(tq_list);
-struct rw_semaphore tq_list_sem;
-static uint_t taskq_tsd;
-
-static int
-task_km_flags(uint_t flags)
-{
- if (flags & TQ_NOSLEEP)
- return (KM_NOSLEEP);
-
- if (flags & TQ_PUSHPAGE)
- return (KM_PUSHPAGE);
-
- return (KM_SLEEP);
-}
-
-/*
- * taskq_find_by_name - Find the largest instance number of a named taskq.
- */
-static int
-taskq_find_by_name(const char *name)
-{
- struct list_head *tql;
- taskq_t *tq;
-
- list_for_each_prev(tql, &tq_list) {
- tq = list_entry(tql, taskq_t, tq_taskqs);
- if (strcmp(name, tq->tq_name) == 0)
- return (tq->tq_instance);
- }
- return (-1);
-}
-
-/*
- * NOTE: Must be called with tq->tq_lock held, returns a list_t which
- * is not attached to the free, work, or pending taskq lists.
- */
-static taskq_ent_t *
-task_alloc(taskq_t *tq, uint_t flags, unsigned long *irqflags)
-{
- taskq_ent_t *t;
- int count = 0;
-
- ASSERT(tq);
-retry:
- /* Acquire taskq_ent_t's from free list if available */
- if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
- t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
-
- ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
- ASSERT(!(t->tqent_flags & TQENT_FLAG_CANCEL));
- ASSERT(!timer_pending(&t->tqent_timer));
-
- list_del_init(&t->tqent_list);
- return (t);
- }
-
- /* Free list is empty and memory allocations are prohibited */
- if (flags & TQ_NOALLOC)
- return (NULL);
-
- /* Hit maximum taskq_ent_t pool size */
- if (tq->tq_nalloc >= tq->tq_maxalloc) {
- if (flags & TQ_NOSLEEP)
- return (NULL);
-
- /*
- * Sleep periodically polling the free list for an available
- * taskq_ent_t. Dispatching with TQ_SLEEP should always succeed
- * but we cannot block forever waiting for an taskq_ent_t to
- * show up in the free list, otherwise a deadlock can happen.
- *
- * Therefore, we need to allocate a new task even if the number
- * of allocated tasks is above tq->tq_maxalloc, but we still
- * end up delaying the task allocation by one second, thereby
- * throttling the task dispatch rate.
- */
- spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
- schedule_timeout(HZ / 100);
- spin_lock_irqsave_nested(&tq->tq_lock, *irqflags,
- tq->tq_lock_class);
- if (count < 100) {
- count++;
- goto retry;
- }
- }
-
- spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
- t = kmem_alloc(sizeof (taskq_ent_t), task_km_flags(flags));
- spin_lock_irqsave_nested(&tq->tq_lock, *irqflags, tq->tq_lock_class);
-
- if (t) {
- taskq_init_ent(t);
- tq->tq_nalloc++;
- }
-
- return (t);
-}
-
-/*
- * NOTE: Must be called with tq->tq_lock held, expects the taskq_ent_t
- * to already be removed from the free, work, or pending taskq lists.
- */
-static void
-task_free(taskq_t *tq, taskq_ent_t *t)
-{
- ASSERT(tq);
- ASSERT(t);
- ASSERT(list_empty(&t->tqent_list));
- ASSERT(!timer_pending(&t->tqent_timer));
-
- kmem_free(t, sizeof (taskq_ent_t));
- tq->tq_nalloc--;
-}
-
-/*
- * NOTE: Must be called with tq->tq_lock held, either destroys the
- * taskq_ent_t if too many exist or moves it to the free list for later use.
- */
-static void
-task_done(taskq_t *tq, taskq_ent_t *t)
-{
- ASSERT(tq);
- ASSERT(t);
-
- /* Wake tasks blocked in taskq_wait_id() */
- wake_up_all(&t->tqent_waitq);
-
- list_del_init(&t->tqent_list);
-
- if (tq->tq_nalloc <= tq->tq_minalloc) {
- t->tqent_id = TASKQID_INVALID;
- t->tqent_func = NULL;
- t->tqent_arg = NULL;
- t->tqent_flags = 0;
-
- list_add_tail(&t->tqent_list, &tq->tq_free_list);
- } else {
- task_free(tq, t);
- }
-}
-
-/*
- * When a delayed task timer expires remove it from the delay list and
- * add it to the priority list in order for immediate processing.
- */
-static void
-task_expire_impl(taskq_ent_t *t)
-{
- taskq_ent_t *w;
- taskq_t *tq = t->tqent_taskq;
- struct list_head *l;
- unsigned long flags;
-
- spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
-
- if (t->tqent_flags & TQENT_FLAG_CANCEL) {
- ASSERT(list_empty(&t->tqent_list));
- spin_unlock_irqrestore(&tq->tq_lock, flags);
- return;
- }
-
- t->tqent_birth = jiffies;
- /*
- * The priority list must be maintained in strict task id order
- * from lowest to highest for lowest_id to be easily calculable.
- */
- list_del(&t->tqent_list);
- list_for_each_prev(l, &tq->tq_prio_list) {
- w = list_entry(l, taskq_ent_t, tqent_list);
- if (w->tqent_id < t->tqent_id) {
- list_add(&t->tqent_list, l);
- break;
- }
- }
- if (l == &tq->tq_prio_list)
- list_add(&t->tqent_list, &tq->tq_prio_list);
-
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- wake_up(&tq->tq_work_waitq);
-}
-
-static void
-task_expire(spl_timer_list_t tl)
-{
- struct timer_list *tmr = (struct timer_list *)tl;
- taskq_ent_t *t = from_timer(t, tmr, tqent_timer);
- task_expire_impl(t);
-}
-
-/*
- * Returns the lowest incomplete taskqid_t. The taskqid_t may
- * be queued on the pending list, on the priority list, on the
- * delay list, or on the work list currently being handled, but
- * it is not 100% complete yet.
- */
-static taskqid_t
-taskq_lowest_id(taskq_t *tq)
-{
- taskqid_t lowest_id = tq->tq_next_id;
- taskq_ent_t *t;
- taskq_thread_t *tqt;
-
- ASSERT(tq);
-
- if (!list_empty(&tq->tq_pend_list)) {
- t = list_entry(tq->tq_pend_list.next, taskq_ent_t, tqent_list);
- lowest_id = MIN(lowest_id, t->tqent_id);
- }
-
- if (!list_empty(&tq->tq_prio_list)) {
- t = list_entry(tq->tq_prio_list.next, taskq_ent_t, tqent_list);
- lowest_id = MIN(lowest_id, t->tqent_id);
- }
-
- if (!list_empty(&tq->tq_delay_list)) {
- t = list_entry(tq->tq_delay_list.next, taskq_ent_t, tqent_list);
- lowest_id = MIN(lowest_id, t->tqent_id);
- }
-
- if (!list_empty(&tq->tq_active_list)) {
- tqt = list_entry(tq->tq_active_list.next, taskq_thread_t,
- tqt_active_list);
- ASSERT(tqt->tqt_id != TASKQID_INVALID);
- lowest_id = MIN(lowest_id, tqt->tqt_id);
- }
-
- return (lowest_id);
-}
-
-/*
- * Insert a task into a list keeping the list sorted by increasing taskqid.
- */
-static void
-taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt)
-{
- taskq_thread_t *w;
- struct list_head *l;
-
- ASSERT(tq);
- ASSERT(tqt);
-
- list_for_each_prev(l, &tq->tq_active_list) {
- w = list_entry(l, taskq_thread_t, tqt_active_list);
- if (w->tqt_id < tqt->tqt_id) {
- list_add(&tqt->tqt_active_list, l);
- break;
- }
- }
- if (l == &tq->tq_active_list)
- list_add(&tqt->tqt_active_list, &tq->tq_active_list);
-}
-
-/*
- * Find and return a task from the given list if it exists. The list
- * must be in lowest to highest task id order.
- */
-static taskq_ent_t *
-taskq_find_list(taskq_t *tq, struct list_head *lh, taskqid_t id)
-{
- struct list_head *l;
- taskq_ent_t *t;
-
- list_for_each(l, lh) {
- t = list_entry(l, taskq_ent_t, tqent_list);
-
- if (t->tqent_id == id)
- return (t);
-
- if (t->tqent_id > id)
- break;
- }
-
- return (NULL);
-}
-
-/*
- * Find an already dispatched task given the task id regardless of what
- * state it is in. If a task is still pending it will be returned.
- * If a task is executing, then -EBUSY will be returned instead.
- * If the task has already been run then NULL is returned.
- */
-static taskq_ent_t *
-taskq_find(taskq_t *tq, taskqid_t id)
-{
- taskq_thread_t *tqt;
- struct list_head *l;
- taskq_ent_t *t;
-
- t = taskq_find_list(tq, &tq->tq_delay_list, id);
- if (t)
- return (t);
-
- t = taskq_find_list(tq, &tq->tq_prio_list, id);
- if (t)
- return (t);
-
- t = taskq_find_list(tq, &tq->tq_pend_list, id);
- if (t)
- return (t);
-
- list_for_each(l, &tq->tq_active_list) {
- tqt = list_entry(l, taskq_thread_t, tqt_active_list);
- if (tqt->tqt_id == id) {
- /*
- * Instead of returning tqt_task, we just return a non
- * NULL value to prevent misuse, since tqt_task only
- * has two valid fields.
- */
- return (ERR_PTR(-EBUSY));
- }
- }
-
- return (NULL);
-}
-
-/*
- * Theory for the taskq_wait_id(), taskq_wait_outstanding(), and
- * taskq_wait() functions below.
- *
- * Taskq waiting is accomplished by tracking the lowest outstanding task
- * id and the next available task id. As tasks are dispatched they are
- * added to the tail of the pending, priority, or delay lists. As worker
- * threads become available the tasks are removed from the heads of these
- * lists and linked to the worker threads. This ensures the lists are
- * kept sorted by lowest to highest task id.
- *
- * Therefore the lowest outstanding task id can be quickly determined by
- * checking the head item from all of these lists. This value is stored
- * with the taskq as the lowest id. It only needs to be recalculated when
- * either the task with the current lowest id completes or is canceled.
- *
- * By blocking until the lowest task id exceeds the passed task id the
- * taskq_wait_outstanding() function can be easily implemented. Similarly,
- * by blocking until the lowest task id matches the next task id taskq_wait()
- * can be implemented.
- *
- * Callers should be aware that when there are multiple worked threads it
- * is possible for larger task ids to complete before smaller ones. Also
- * when the taskq contains delay tasks with small task ids callers may
- * block for a considerable length of time waiting for them to expire and
- * execute.
- */
-static int
-taskq_wait_id_check(taskq_t *tq, taskqid_t id)
-{
- int rc;
- unsigned long flags;
-
- spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
- rc = (taskq_find(tq, id) == NULL);
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- return (rc);
-}
-
-/*
- * The taskq_wait_id() function blocks until the passed task id completes.
- * This does not guarantee that all lower task ids have completed.
- */
-void
-taskq_wait_id(taskq_t *tq, taskqid_t id)
-{
- wait_event(tq->tq_wait_waitq, taskq_wait_id_check(tq, id));
-}
-EXPORT_SYMBOL(taskq_wait_id);
-
-static int
-taskq_wait_outstanding_check(taskq_t *tq, taskqid_t id)
-{
- int rc;
- unsigned long flags;
-
- spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
- rc = (id < tq->tq_lowest_id);
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- return (rc);
-}
-
-/*
- * The taskq_wait_outstanding() function will block until all tasks with a
- * lower taskqid than the passed 'id' have been completed. Note that all
- * task id's are assigned monotonically at dispatch time. Zero may be
- * passed for the id to indicate all tasks dispatch up to this point,
- * but not after, should be waited for.
- */
-void
-taskq_wait_outstanding(taskq_t *tq, taskqid_t id)
-{
- id = id ? id : tq->tq_next_id - 1;
- wait_event(tq->tq_wait_waitq, taskq_wait_outstanding_check(tq, id));
-}
-EXPORT_SYMBOL(taskq_wait_outstanding);
-
-static int
-taskq_wait_check(taskq_t *tq)
-{
- int rc;
- unsigned long flags;
-
- spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
- rc = (tq->tq_lowest_id == tq->tq_next_id);
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- return (rc);
-}
-
-/*
- * The taskq_wait() function will block until the taskq is empty.
- * This means that if a taskq re-dispatches work to itself taskq_wait()
- * callers will block indefinitely.
- */
-void
-taskq_wait(taskq_t *tq)
-{
- wait_event(tq->tq_wait_waitq, taskq_wait_check(tq));
-}
-EXPORT_SYMBOL(taskq_wait);
-
-int
-taskq_member(taskq_t *tq, kthread_t *t)
-{
- return (tq == (taskq_t *)tsd_get_by_thread(taskq_tsd, t));
-}
-EXPORT_SYMBOL(taskq_member);
-
-/*
- * Cancel an already dispatched task given the task id. Still pending tasks
- * will be immediately canceled, and if the task is active the function will
- * block until it completes. Preallocated tasks which are canceled must be
- * freed by the caller.
- */
-int
-taskq_cancel_id(taskq_t *tq, taskqid_t id)
-{
- taskq_ent_t *t;
- int rc = ENOENT;
- unsigned long flags;
-
- ASSERT(tq);
-
- spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
- t = taskq_find(tq, id);
- if (t && t != ERR_PTR(-EBUSY)) {
- list_del_init(&t->tqent_list);
- t->tqent_flags |= TQENT_FLAG_CANCEL;
-
- /*
- * When canceling the lowest outstanding task id we
- * must recalculate the new lowest outstanding id.
- */
- if (tq->tq_lowest_id == t->tqent_id) {
- tq->tq_lowest_id = taskq_lowest_id(tq);
- ASSERT3S(tq->tq_lowest_id, >, t->tqent_id);
- }
-
- /*
- * The task_expire() function takes the tq->tq_lock so drop
- * drop the lock before synchronously cancelling the timer.
- */
- if (timer_pending(&t->tqent_timer)) {
- spin_unlock_irqrestore(&tq->tq_lock, flags);
- del_timer_sync(&t->tqent_timer);
- spin_lock_irqsave_nested(&tq->tq_lock, flags,
- tq->tq_lock_class);
- }
-
- if (!(t->tqent_flags & TQENT_FLAG_PREALLOC))
- task_done(tq, t);
-
- rc = 0;
- }
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- if (t == ERR_PTR(-EBUSY)) {
- taskq_wait_id(tq, id);
- rc = EBUSY;
- }
-
- return (rc);
-}
-EXPORT_SYMBOL(taskq_cancel_id);
-
-static int taskq_thread_spawn(taskq_t *tq);
-
-taskqid_t
-taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
-{
- taskq_ent_t *t;
- taskqid_t rc = TASKQID_INVALID;
- unsigned long irqflags;
-
- ASSERT(tq);
- ASSERT(func);
-
- spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
-
- /* Taskq being destroyed and all tasks drained */
- if (!(tq->tq_flags & TASKQ_ACTIVE))
- goto out;
-
- /* Do not queue the task unless there is idle thread for it */
- ASSERT(tq->tq_nactive <= tq->tq_nthreads);
- if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
- /* Dynamic taskq may be able to spawn another thread */
- if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
- taskq_thread_spawn(tq) == 0)
- goto out;
- }
-
- if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
- goto out;
-
- spin_lock(&t->tqent_lock);
-
- /* Queue to the front of the list to enforce TQ_NOQUEUE semantics */
- if (flags & TQ_NOQUEUE)
- list_add(&t->tqent_list, &tq->tq_prio_list);
- /* Queue to the priority list instead of the pending list */
- else if (flags & TQ_FRONT)
- list_add_tail(&t->tqent_list, &tq->tq_prio_list);
- else
- list_add_tail(&t->tqent_list, &tq->tq_pend_list);
-
- t->tqent_id = rc = tq->tq_next_id;
- tq->tq_next_id++;
- t->tqent_func = func;
- t->tqent_arg = arg;
- t->tqent_taskq = tq;
- t->tqent_timer.function = NULL;
- t->tqent_timer.expires = 0;
- t->tqent_birth = jiffies;
-
- ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
-
- spin_unlock(&t->tqent_lock);
-
- wake_up(&tq->tq_work_waitq);
-out:
- /* Spawn additional taskq threads if required. */
- if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads)
- (void) taskq_thread_spawn(tq);
-
- spin_unlock_irqrestore(&tq->tq_lock, irqflags);
- return (rc);
-}
-EXPORT_SYMBOL(taskq_dispatch);
-
-taskqid_t
-taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
- uint_t flags, clock_t expire_time)
-{
- taskqid_t rc = TASKQID_INVALID;
- taskq_ent_t *t;
- unsigned long irqflags;
-
- ASSERT(tq);
- ASSERT(func);
-
- spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
-
- /* Taskq being destroyed and all tasks drained */
- if (!(tq->tq_flags & TASKQ_ACTIVE))
- goto out;
-
- if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
- goto out;
-
- spin_lock(&t->tqent_lock);
-
- /* Queue to the delay list for subsequent execution */
- list_add_tail(&t->tqent_list, &tq->tq_delay_list);
-
- t->tqent_id = rc = tq->tq_next_id;
- tq->tq_next_id++;
- t->tqent_func = func;
- t->tqent_arg = arg;
- t->tqent_taskq = tq;
- t->tqent_timer.function = task_expire;
- t->tqent_timer.expires = (unsigned long)expire_time;
- add_timer(&t->tqent_timer);
-
- ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
-
- spin_unlock(&t->tqent_lock);
-out:
- /* Spawn additional taskq threads if required. */
- if (tq->tq_nactive == tq->tq_nthreads)
- (void) taskq_thread_spawn(tq);
- spin_unlock_irqrestore(&tq->tq_lock, irqflags);
- return (rc);
-}
-EXPORT_SYMBOL(taskq_dispatch_delay);
-
-void
-taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
- taskq_ent_t *t)
-{
- unsigned long irqflags;
- ASSERT(tq);
- ASSERT(func);
-
- spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
- tq->tq_lock_class);
-
- /* Taskq being destroyed and all tasks drained */
- if (!(tq->tq_flags & TASKQ_ACTIVE)) {
- t->tqent_id = TASKQID_INVALID;
- goto out;
- }
-
- if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
- /* Dynamic taskq may be able to spawn another thread */
- if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
- taskq_thread_spawn(tq) == 0)
- goto out2;
- flags |= TQ_FRONT;
- }
-
- spin_lock(&t->tqent_lock);
-
- /*
- * Make sure the entry is not on some other taskq; it is important to
- * ASSERT() under lock
- */
- ASSERT(taskq_empty_ent(t));
-
- /*
- * Mark it as a prealloc'd task. This is important
- * to ensure that we don't free it later.
- */
- t->tqent_flags |= TQENT_FLAG_PREALLOC;
-
- /* Queue to the priority list instead of the pending list */
- if (flags & TQ_FRONT)
- list_add_tail(&t->tqent_list, &tq->tq_prio_list);
- else
- list_add_tail(&t->tqent_list, &tq->tq_pend_list);
-
- t->tqent_id = tq->tq_next_id;
- tq->tq_next_id++;
- t->tqent_func = func;
- t->tqent_arg = arg;
- t->tqent_taskq = tq;
- t->tqent_birth = jiffies;
-
- spin_unlock(&t->tqent_lock);
-
- wake_up(&tq->tq_work_waitq);
-out:
- /* Spawn additional taskq threads if required. */
- if (tq->tq_nactive == tq->tq_nthreads)
- (void) taskq_thread_spawn(tq);
-out2:
- spin_unlock_irqrestore(&tq->tq_lock, irqflags);
-}
-EXPORT_SYMBOL(taskq_dispatch_ent);
-
-int
-taskq_empty_ent(taskq_ent_t *t)
-{
- return (list_empty(&t->tqent_list));
-}
-EXPORT_SYMBOL(taskq_empty_ent);
-
-void
-taskq_init_ent(taskq_ent_t *t)
-{
- spin_lock_init(&t->tqent_lock);
- init_waitqueue_head(&t->tqent_waitq);
- timer_setup(&t->tqent_timer, NULL, 0);
- INIT_LIST_HEAD(&t->tqent_list);
- t->tqent_id = 0;
- t->tqent_func = NULL;
- t->tqent_arg = NULL;
- t->tqent_flags = 0;
- t->tqent_taskq = NULL;
-}
-EXPORT_SYMBOL(taskq_init_ent);
-
-/*
- * Return the next pending task, preference is given to tasks on the
- * priority list which were dispatched with TQ_FRONT.
- */
-static taskq_ent_t *
-taskq_next_ent(taskq_t *tq)
-{
- struct list_head *list;
-
- if (!list_empty(&tq->tq_prio_list))
- list = &tq->tq_prio_list;
- else if (!list_empty(&tq->tq_pend_list))
- list = &tq->tq_pend_list;
- else
- return (NULL);
-
- return (list_entry(list->next, taskq_ent_t, tqent_list));
-}
-
-/*
- * Spawns a new thread for the specified taskq.
- */
-static void
-taskq_thread_spawn_task(void *arg)
-{
- taskq_t *tq = (taskq_t *)arg;
- unsigned long flags;
-
- if (taskq_thread_create(tq) == NULL) {
- /* restore spawning count if failed */
- spin_lock_irqsave_nested(&tq->tq_lock, flags,
- tq->tq_lock_class);
- tq->tq_nspawn--;
- spin_unlock_irqrestore(&tq->tq_lock, flags);
- }
-}
-
-/*
- * Spawn addition threads for dynamic taskqs (TASKQ_DYNAMIC) the current
- * number of threads is insufficient to handle the pending tasks. These
- * new threads must be created by the dedicated dynamic_taskq to avoid
- * deadlocks between thread creation and memory reclaim. The system_taskq
- * which is also a dynamic taskq cannot be safely used for this.
- */
-static int
-taskq_thread_spawn(taskq_t *tq)
-{
- int spawning = 0;
-
- if (!(tq->tq_flags & TASKQ_DYNAMIC))
- return (0);
-
- if ((tq->tq_nthreads + tq->tq_nspawn < tq->tq_maxthreads) &&
- (tq->tq_flags & TASKQ_ACTIVE)) {
- spawning = (++tq->tq_nspawn);
- taskq_dispatch(dynamic_taskq, taskq_thread_spawn_task,
- tq, TQ_NOSLEEP);
- }
-
- return (spawning);
-}
-
-/*
- * Threads in a dynamic taskq should only exit once it has been completely
- * drained and no other threads are actively servicing tasks. This prevents
- * threads from being created and destroyed more than is required.
- *
- * The first thread is the thread list is treated as the primary thread.
- * There is nothing special about the primary thread but in order to avoid
- * all the taskq pids from changing we opt to make it long running.
- */
-static int
-taskq_thread_should_stop(taskq_t *tq, taskq_thread_t *tqt)
-{
- if (!(tq->tq_flags & TASKQ_DYNAMIC))
- return (0);
-
- if (list_first_entry(&(tq->tq_thread_list), taskq_thread_t,
- tqt_thread_list) == tqt)
- return (0);
-
- return
- ((tq->tq_nspawn == 0) && /* No threads are being spawned */
- (tq->tq_nactive == 0) && /* No threads are handling tasks */
- (tq->tq_nthreads > 1) && /* More than 1 thread is running */
- (!taskq_next_ent(tq)) && /* There are no pending tasks */
- (spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */
-}
-
-static int
-taskq_thread(void *args)
-{
- DECLARE_WAITQUEUE(wait, current);
- sigset_t blocked;
- taskq_thread_t *tqt = args;
- taskq_t *tq;
- taskq_ent_t *t;
- int seq_tasks = 0;
- unsigned long flags;
- taskq_ent_t dup_task = {};
-
- ASSERT(tqt);
- ASSERT(tqt->tqt_tq);
- tq = tqt->tqt_tq;
- current->flags |= PF_NOFREEZE;
-
- (void) spl_fstrans_mark();
-
- sigfillset(&blocked);
- sigprocmask(SIG_BLOCK, &blocked, NULL);
- flush_signals(current);
- kfpu_initialize();
-
- tsd_set(taskq_tsd, tq);
- spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
- /*
- * If we are dynamically spawned, decrease spawning count. Note that
- * we could be created during taskq_create, in which case we shouldn't
- * do the decrement. But it's fine because taskq_create will reset
- * tq_nspawn later.
- */
- if (tq->tq_flags & TASKQ_DYNAMIC)
- tq->tq_nspawn--;
-
- /* Immediately exit if more threads than allowed were created. */
- if (tq->tq_nthreads >= tq->tq_maxthreads)
- goto error;
-
- tq->tq_nthreads++;
- list_add_tail(&tqt->tqt_thread_list, &tq->tq_thread_list);
- wake_up(&tq->tq_wait_waitq);
- set_current_state(TASK_INTERRUPTIBLE);
-
- while (!kthread_should_stop()) {
-
- if (list_empty(&tq->tq_pend_list) &&
- list_empty(&tq->tq_prio_list)) {
-
- if (taskq_thread_should_stop(tq, tqt)) {
- wake_up_all(&tq->tq_wait_waitq);
- break;
- }
-
- add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- schedule();
- seq_tasks = 0;
-
- spin_lock_irqsave_nested(&tq->tq_lock, flags,
- tq->tq_lock_class);
- remove_wait_queue(&tq->tq_work_waitq, &wait);
- } else {
- __set_current_state(TASK_RUNNING);
- }
-
- if ((t = taskq_next_ent(tq)) != NULL) {
- list_del_init(&t->tqent_list);
-
- /*
- * A TQENT_FLAG_PREALLOC task may be reused or freed
- * during the task function call. Store tqent_id and
- * tqent_flags here.
- *
- * Also use an on stack taskq_ent_t for tqt_task
- * assignment in this case. We only populate the two
- * fields used by the only user in taskq proc file.
- */
- tqt->tqt_id = t->tqent_id;
- tqt->tqt_flags = t->tqent_flags;
-
- if (t->tqent_flags & TQENT_FLAG_PREALLOC) {
- dup_task.tqent_func = t->tqent_func;
- dup_task.tqent_arg = t->tqent_arg;
- t = &dup_task;
- }
- tqt->tqt_task = t;
-
- taskq_insert_in_order(tq, tqt);
- tq->tq_nactive++;
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- /* Perform the requested task */
- t->tqent_func(t->tqent_arg);
-
- spin_lock_irqsave_nested(&tq->tq_lock, flags,
- tq->tq_lock_class);
- tq->tq_nactive--;
- list_del_init(&tqt->tqt_active_list);
- tqt->tqt_task = NULL;
-
- /* For prealloc'd tasks, we don't free anything. */
- if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
- task_done(tq, t);
-
- /*
- * When the current lowest outstanding taskqid is
- * done calculate the new lowest outstanding id
- */
- if (tq->tq_lowest_id == tqt->tqt_id) {
- tq->tq_lowest_id = taskq_lowest_id(tq);
- ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
- }
-
- /* Spawn additional taskq threads if required. */
- if ((++seq_tasks) > spl_taskq_thread_sequential &&
- taskq_thread_spawn(tq))
- seq_tasks = 0;
-
- tqt->tqt_id = TASKQID_INVALID;
- tqt->tqt_flags = 0;
- wake_up_all(&tq->tq_wait_waitq);
- } else {
- if (taskq_thread_should_stop(tq, tqt))
- break;
- }
-
- set_current_state(TASK_INTERRUPTIBLE);
-
- }
-
- __set_current_state(TASK_RUNNING);
- tq->tq_nthreads--;
- list_del_init(&tqt->tqt_thread_list);
-error:
- kmem_free(tqt, sizeof (taskq_thread_t));
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- tsd_set(taskq_tsd, NULL);
-
- return (0);
-}
-
-static taskq_thread_t *
-taskq_thread_create(taskq_t *tq)
-{
- static int last_used_cpu = 0;
- taskq_thread_t *tqt;
-
- tqt = kmem_alloc(sizeof (*tqt), KM_PUSHPAGE);
- INIT_LIST_HEAD(&tqt->tqt_thread_list);
- INIT_LIST_HEAD(&tqt->tqt_active_list);
- tqt->tqt_tq = tq;
- tqt->tqt_id = TASKQID_INVALID;
-
- tqt->tqt_thread = spl_kthread_create(taskq_thread, tqt,
- "%s", tq->tq_name);
- if (tqt->tqt_thread == NULL) {
- kmem_free(tqt, sizeof (taskq_thread_t));
- return (NULL);
- }
-
- if (spl_taskq_thread_bind) {
- last_used_cpu = (last_used_cpu + 1) % num_online_cpus();
- kthread_bind(tqt->tqt_thread, last_used_cpu);
- }
-
- if (spl_taskq_thread_priority)
- set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(tq->tq_pri));
-
- wake_up_process(tqt->tqt_thread);
-
- return (tqt);
-}
-
-taskq_t *
-taskq_create(const char *name, int nthreads, pri_t pri,
- int minalloc, int maxalloc, uint_t flags)
-{
- taskq_t *tq;
- taskq_thread_t *tqt;
- int count = 0, rc = 0, i;
- unsigned long irqflags;
-
- ASSERT(name != NULL);
- ASSERT(minalloc >= 0);
- ASSERT(maxalloc <= INT_MAX);
- ASSERT(!(flags & (TASKQ_CPR_SAFE))); /* Unsupported */
-
- /* Scale the number of threads using nthreads as a percentage */
- if (flags & TASKQ_THREADS_CPU_PCT) {
- ASSERT(nthreads <= 100);
- ASSERT(nthreads >= 0);
- nthreads = MIN(nthreads, 100);
- nthreads = MAX(nthreads, 0);
- nthreads = MAX((num_online_cpus() * nthreads) / 100, 1);
- }
-
- tq = kmem_alloc(sizeof (*tq), KM_PUSHPAGE);
- if (tq == NULL)
- return (NULL);
-
- spin_lock_init(&tq->tq_lock);
- INIT_LIST_HEAD(&tq->tq_thread_list);
- INIT_LIST_HEAD(&tq->tq_active_list);
- tq->tq_name = strdup(name);
- tq->tq_nactive = 0;
- tq->tq_nthreads = 0;
- tq->tq_nspawn = 0;
- tq->tq_maxthreads = nthreads;
- tq->tq_pri = pri;
- tq->tq_minalloc = minalloc;
- tq->tq_maxalloc = maxalloc;
- tq->tq_nalloc = 0;
- tq->tq_flags = (flags | TASKQ_ACTIVE);
- tq->tq_next_id = TASKQID_INITIAL;
- tq->tq_lowest_id = TASKQID_INITIAL;
- INIT_LIST_HEAD(&tq->tq_free_list);
- INIT_LIST_HEAD(&tq->tq_pend_list);
- INIT_LIST_HEAD(&tq->tq_prio_list);
- INIT_LIST_HEAD(&tq->tq_delay_list);
- init_waitqueue_head(&tq->tq_work_waitq);
- init_waitqueue_head(&tq->tq_wait_waitq);
- tq->tq_lock_class = TQ_LOCK_GENERAL;
- INIT_LIST_HEAD(&tq->tq_taskqs);
-
- if (flags & TASKQ_PREPOPULATE) {
- spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
- tq->tq_lock_class);
-
- for (i = 0; i < minalloc; i++)
- task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW,
- &irqflags));
-
- spin_unlock_irqrestore(&tq->tq_lock, irqflags);
- }
-
- if ((flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic)
- nthreads = 1;
-
- for (i = 0; i < nthreads; i++) {
- tqt = taskq_thread_create(tq);
- if (tqt == NULL)
- rc = 1;
- else
- count++;
- }
-
- /* Wait for all threads to be started before potential destroy */
- wait_event(tq->tq_wait_waitq, tq->tq_nthreads == count);
- /*
- * taskq_thread might have touched nspawn, but we don't want them to
- * because they're not dynamically spawned. So we reset it to 0
- */
- tq->tq_nspawn = 0;
-
- if (rc) {
- taskq_destroy(tq);
- tq = NULL;
- } else {
- down_write(&tq_list_sem);
- tq->tq_instance = taskq_find_by_name(name) + 1;
- list_add_tail(&tq->tq_taskqs, &tq_list);
- up_write(&tq_list_sem);
- }
-
- return (tq);
-}
-EXPORT_SYMBOL(taskq_create);
-
-void
-taskq_destroy(taskq_t *tq)
-{
- struct task_struct *thread;
- taskq_thread_t *tqt;
- taskq_ent_t *t;
- unsigned long flags;
-
- ASSERT(tq);
- spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
- tq->tq_flags &= ~TASKQ_ACTIVE;
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- /*
- * When TASKQ_ACTIVE is clear new tasks may not be added nor may
- * new worker threads be spawned for dynamic taskq.
- */
- if (dynamic_taskq != NULL)
- taskq_wait_outstanding(dynamic_taskq, 0);
-
- taskq_wait(tq);
-
- /* remove taskq from global list used by the kstats */
- down_write(&tq_list_sem);
- list_del(&tq->tq_taskqs);
- up_write(&tq_list_sem);
-
- spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
- /* wait for spawning threads to insert themselves to the list */
- while (tq->tq_nspawn) {
- spin_unlock_irqrestore(&tq->tq_lock, flags);
- schedule_timeout_interruptible(1);
- spin_lock_irqsave_nested(&tq->tq_lock, flags,
- tq->tq_lock_class);
- }
-
- /*
- * Signal each thread to exit and block until it does. Each thread
- * is responsible for removing itself from the list and freeing its
- * taskq_thread_t. This allows for idle threads to opt to remove
- * themselves from the taskq. They can be recreated as needed.
- */
- while (!list_empty(&tq->tq_thread_list)) {
- tqt = list_entry(tq->tq_thread_list.next,
- taskq_thread_t, tqt_thread_list);
- thread = tqt->tqt_thread;
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- kthread_stop(thread);
-
- spin_lock_irqsave_nested(&tq->tq_lock, flags,
- tq->tq_lock_class);
- }
-
- while (!list_empty(&tq->tq_free_list)) {
- t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
-
- ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
-
- list_del_init(&t->tqent_list);
- task_free(tq, t);
- }
-
- ASSERT0(tq->tq_nthreads);
- ASSERT0(tq->tq_nalloc);
- ASSERT0(tq->tq_nspawn);
- ASSERT(list_empty(&tq->tq_thread_list));
- ASSERT(list_empty(&tq->tq_active_list));
- ASSERT(list_empty(&tq->tq_free_list));
- ASSERT(list_empty(&tq->tq_pend_list));
- ASSERT(list_empty(&tq->tq_prio_list));
- ASSERT(list_empty(&tq->tq_delay_list));
-
- spin_unlock_irqrestore(&tq->tq_lock, flags);
-
- strfree(tq->tq_name);
- kmem_free(tq, sizeof (taskq_t));
-}
-EXPORT_SYMBOL(taskq_destroy);
-
-
-static unsigned int spl_taskq_kick = 0;
-
-/*
- * 2.6.36 API Change
- * module_param_cb is introduced to take kernel_param_ops and
- * module_param_call is marked as obsolete. Also set and get operations
- * were changed to take a 'const struct kernel_param *'.
- */
-static int
-#ifdef module_param_cb
-param_set_taskq_kick(const char *val, const struct kernel_param *kp)
-#else
-param_set_taskq_kick(const char *val, struct kernel_param *kp)
-#endif
-{
- int ret;
- taskq_t *tq;
- taskq_ent_t *t;
- unsigned long flags;
-
- ret = param_set_uint(val, kp);
- if (ret < 0 || !spl_taskq_kick)
- return (ret);
- /* reset value */
- spl_taskq_kick = 0;
-
- down_read(&tq_list_sem);
- list_for_each_entry(tq, &tq_list, tq_taskqs) {
- spin_lock_irqsave_nested(&tq->tq_lock, flags,
- tq->tq_lock_class);
- /* Check if the first pending is older than 5 seconds */
- t = taskq_next_ent(tq);
- if (t && time_after(jiffies, t->tqent_birth + 5*HZ)) {
- (void) taskq_thread_spawn(tq);
- printk(KERN_INFO "spl: Kicked taskq %s/%d\n",
- tq->tq_name, tq->tq_instance);
- }
- spin_unlock_irqrestore(&tq->tq_lock, flags);
- }
- up_read(&tq_list_sem);
- return (ret);
-}
-
-#ifdef module_param_cb
-static const struct kernel_param_ops param_ops_taskq_kick = {
- .set = param_set_taskq_kick,
- .get = param_get_uint,
-};
-module_param_cb(spl_taskq_kick, &param_ops_taskq_kick, &spl_taskq_kick, 0644);
-#else
-module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint,
- &spl_taskq_kick, 0644);
-#endif
-MODULE_PARM_DESC(spl_taskq_kick,
- "Write nonzero to kick stuck taskqs to spawn more threads");
-
-int
-spl_taskq_init(void)
-{
- init_rwsem(&tq_list_sem);
- tsd_create(&taskq_tsd, NULL);
-
- system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64),
- maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
- if (system_taskq == NULL)
- return (1);
-
- system_delay_taskq = taskq_create("spl_delay_taskq", MAX(boot_ncpus, 4),
- maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
- if (system_delay_taskq == NULL) {
- taskq_destroy(system_taskq);
- return (1);
- }
-
- dynamic_taskq = taskq_create("spl_dynamic_taskq", 1,
- maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE);
- if (dynamic_taskq == NULL) {
- taskq_destroy(system_taskq);
- taskq_destroy(system_delay_taskq);
- return (1);
- }
-
- /*
- * This is used to annotate tq_lock, so
- * taskq_dispatch -> taskq_thread_spawn -> taskq_dispatch
- * does not trigger a lockdep warning re: possible recursive locking
- */
- dynamic_taskq->tq_lock_class = TQ_LOCK_DYNAMIC;
-
- return (0);
-}
-
-void
-spl_taskq_fini(void)
-{
- taskq_destroy(dynamic_taskq);
- dynamic_taskq = NULL;
-
- taskq_destroy(system_delay_taskq);
- system_delay_taskq = NULL;
-
- taskq_destroy(system_taskq);
- system_taskq = NULL;
-
- tsd_destroy(&taskq_tsd);
-}
diff --git a/module/spl/spl-thread.c b/module/spl/spl-thread.c
deleted file mode 100644
index 29de9252a..000000000
--- a/module/spl/spl-thread.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) Thread Implementation.
- */
-
-#include <sys/thread.h>
-#include <sys/kmem.h>
-#include <sys/tsd.h>
-#include <sys/simd.h>
-
-/*
- * Thread interfaces
- */
-typedef struct thread_priv_s {
- unsigned long tp_magic; /* Magic */
- int tp_name_size; /* Name size */
- char *tp_name; /* Name (without _thread suffix) */
- void (*tp_func)(void *); /* Registered function */
- void *tp_args; /* Args to be passed to function */
- size_t tp_len; /* Len to be passed to function */
- int tp_state; /* State to start thread at */
- pri_t tp_pri; /* Priority to start threat at */
-} thread_priv_t;
-
-static int
-thread_generic_wrapper(void *arg)
-{
- thread_priv_t *tp = (thread_priv_t *)arg;
- void (*func)(void *);
- void *args;
-
- ASSERT(tp->tp_magic == TP_MAGIC);
- func = tp->tp_func;
- args = tp->tp_args;
- set_current_state(tp->tp_state);
- set_user_nice((kthread_t *)current, PRIO_TO_NICE(tp->tp_pri));
- kfpu_initialize();
- kmem_free(tp->tp_name, tp->tp_name_size);
- kmem_free(tp, sizeof (thread_priv_t));
-
- if (func)
- func(args);
-
- return (0);
-}
-
-void
-__thread_exit(void)
-{
- tsd_exit();
- complete_and_exit(NULL, 0);
- /* Unreachable */
-}
-EXPORT_SYMBOL(__thread_exit);
-
-/*
- * thread_create() may block forever if it cannot create a thread or
- * allocate memory. This is preferable to returning a NULL which Solaris
- * style callers likely never check for... since it can't fail.
- */
-kthread_t *
-__thread_create(caddr_t stk, size_t stksize, thread_func_t func,
- const char *name, void *args, size_t len, proc_t *pp, int state, pri_t pri)
-{
- thread_priv_t *tp;
- struct task_struct *tsk;
- char *p;
-
- /* Option pp is simply ignored */
- /* Variable stack size unsupported */
- ASSERT(stk == NULL);
-
- tp = kmem_alloc(sizeof (thread_priv_t), KM_PUSHPAGE);
- if (tp == NULL)
- return (NULL);
-
- tp->tp_magic = TP_MAGIC;
- tp->tp_name_size = strlen(name) + 1;
-
- tp->tp_name = kmem_alloc(tp->tp_name_size, KM_PUSHPAGE);
- if (tp->tp_name == NULL) {
- kmem_free(tp, sizeof (thread_priv_t));
- return (NULL);
- }
-
- strncpy(tp->tp_name, name, tp->tp_name_size);
-
- /*
- * Strip trailing "_thread" from passed name which will be the func
- * name since the exposed API has no parameter for passing a name.
- */
- p = strstr(tp->tp_name, "_thread");
- if (p)
- p[0] = '\0';
-
- tp->tp_func = func;
- tp->tp_args = args;
- tp->tp_len = len;
- tp->tp_state = state;
- tp->tp_pri = pri;
-
- tsk = spl_kthread_create(thread_generic_wrapper, (void *)tp,
- "%s", tp->tp_name);
- if (IS_ERR(tsk))
- return (NULL);
-
- wake_up_process(tsk);
- return ((kthread_t *)tsk);
-}
-EXPORT_SYMBOL(__thread_create);
-
-/*
- * spl_kthread_create - Wrapper providing pre-3.13 semantics for
- * kthread_create() in which it is not killable and less likely
- * to return -ENOMEM.
- */
-struct task_struct *
-spl_kthread_create(int (*func)(void *), void *data, const char namefmt[], ...)
-{
- struct task_struct *tsk;
- va_list args;
- char name[TASK_COMM_LEN];
-
- va_start(args, namefmt);
- vsnprintf(name, sizeof (name), namefmt, args);
- va_end(args);
- do {
- tsk = kthread_create(func, data, "%s", name);
- if (IS_ERR(tsk)) {
- if (signal_pending(current)) {
- clear_thread_flag(TIF_SIGPENDING);
- continue;
- }
- if (PTR_ERR(tsk) == -ENOMEM)
- continue;
- return (NULL);
- } else {
- return (tsk);
- }
- } while (1);
-}
-EXPORT_SYMBOL(spl_kthread_create);
diff --git a/module/spl/spl-tsd.c b/module/spl/spl-tsd.c
deleted file mode 100644
index 14342d5a6..000000000
--- a/module/spl/spl-tsd.c
+++ /dev/null
@@ -1,720 +0,0 @@
-/*
- * Copyright (C) 2010 Lawrence Livermore National Security, LLC.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Solaris Porting Layer (SPL) Thread Specific Data Implementation.
- *
- * Thread specific data has implemented using a hash table, this avoids
- * the need to add a member to the task structure and allows maximum
- * portability between kernels. This implementation has been optimized
- * to keep the tsd_set() and tsd_get() times as small as possible.
- *
- * The majority of the entries in the hash table are for specific tsd
- * entries. These entries are hashed by the product of their key and
- * pid because by design the key and pid are guaranteed to be unique.
- * Their product also has the desirable properly that it will be uniformly
- * distributed over the hash bins providing neither the pid nor key is zero.
- * Under linux the zero pid is always the init process and thus won't be
- * used, and this implementation is careful to never to assign a zero key.
- * By default the hash table is sized to 512 bins which is expected to
- * be sufficient for light to moderate usage of thread specific data.
- *
- * The hash table contains two additional type of entries. They first
- * type is entry is called a 'key' entry and it is added to the hash during
- * tsd_create(). It is used to store the address of the destructor function
- * and it is used as an anchor point. All tsd entries which use the same
- * key will be linked to this entry. This is used during tsd_destroy() to
- * quickly call the destructor function for all tsd associated with the key.
- * The 'key' entry may be looked up with tsd_hash_search() by passing the
- * key you wish to lookup and DTOR_PID constant as the pid.
- *
- * The second type of entry is called a 'pid' entry and it is added to the
- * hash the first time a process set a key. The 'pid' entry is also used
- * as an anchor and all tsd for the process will be linked to it. This
- * list is using during tsd_exit() to ensure all registered destructors
- * are run for the process. The 'pid' entry may be looked up with
- * tsd_hash_search() by passing the PID_KEY constant as the key, and
- * the process pid. Note that tsd_exit() is called by thread_exit()
- * so if your using the Solaris thread API you should not need to call
- * tsd_exit() directly.
- *
- */
-
-#include <sys/kmem.h>
-#include <sys/thread.h>
-#include <sys/tsd.h>
-#include <linux/hash.h>
-
-typedef struct tsd_hash_bin {
- spinlock_t hb_lock;
- struct hlist_head hb_head;
-} tsd_hash_bin_t;
-
-typedef struct tsd_hash_table {
- spinlock_t ht_lock;
- uint_t ht_bits;
- uint_t ht_key;
- tsd_hash_bin_t *ht_bins;
-} tsd_hash_table_t;
-
-typedef struct tsd_hash_entry {
- uint_t he_key;
- pid_t he_pid;
- dtor_func_t he_dtor;
- void *he_value;
- struct hlist_node he_list;
- struct list_head he_key_list;
- struct list_head he_pid_list;
-} tsd_hash_entry_t;
-
-static tsd_hash_table_t *tsd_hash_table = NULL;
-
-
-/*
- * tsd_hash_search - searches hash table for tsd_hash_entry
- * @table: hash table
- * @key: search key
- * @pid: search pid
- */
-static tsd_hash_entry_t *
-tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid)
-{
- struct hlist_node *node;
- tsd_hash_entry_t *entry;
- tsd_hash_bin_t *bin;
- ulong_t hash;
-
- hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
- bin = &table->ht_bins[hash];
- spin_lock(&bin->hb_lock);
- hlist_for_each(node, &bin->hb_head) {
- entry = list_entry(node, tsd_hash_entry_t, he_list);
- if ((entry->he_key == key) && (entry->he_pid == pid)) {
- spin_unlock(&bin->hb_lock);
- return (entry);
- }
- }
-
- spin_unlock(&bin->hb_lock);
- return (NULL);
-}
-
-/*
- * tsd_hash_dtor - call the destructor and free all entries on the list
- * @work: list of hash entries
- *
- * For a list of entries which have all already been removed from the
- * hash call their registered destructor then free the associated memory.
- */
-static void
-tsd_hash_dtor(struct hlist_head *work)
-{
- tsd_hash_entry_t *entry;
-
- while (!hlist_empty(work)) {
- entry = hlist_entry(work->first, tsd_hash_entry_t, he_list);
- hlist_del(&entry->he_list);
-
- if (entry->he_dtor && entry->he_pid != DTOR_PID)
- entry->he_dtor(entry->he_value);
-
- kmem_free(entry, sizeof (tsd_hash_entry_t));
- }
-}
-
-/*
- * tsd_hash_add - adds an entry to hash table
- * @table: hash table
- * @key: search key
- * @pid: search pid
- *
- * The caller is responsible for ensuring the unique key/pid do not
- * already exist in the hash table. This possible because all entries
- * are thread specific thus a concurrent thread will never attempt to
- * add this key/pid. Because multiple bins must be checked to add
- * links to the dtor and pid entries the entire table is locked.
- */
-static int
-tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value)
-{
- tsd_hash_entry_t *entry, *dtor_entry, *pid_entry;
- tsd_hash_bin_t *bin;
- ulong_t hash;
- int rc = 0;
-
- ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL);
-
- /* New entry allocate structure, set value, and add to hash */
- entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
- if (entry == NULL)
- return (ENOMEM);
-
- entry->he_key = key;
- entry->he_pid = pid;
- entry->he_value = value;
- INIT_HLIST_NODE(&entry->he_list);
- INIT_LIST_HEAD(&entry->he_key_list);
- INIT_LIST_HEAD(&entry->he_pid_list);
-
- spin_lock(&table->ht_lock);
-
- /* Destructor entry must exist for all valid keys */
- dtor_entry = tsd_hash_search(table, entry->he_key, DTOR_PID);
- ASSERT3P(dtor_entry, !=, NULL);
- entry->he_dtor = dtor_entry->he_dtor;
-
- /* Process entry must exist for all valid processes */
- pid_entry = tsd_hash_search(table, PID_KEY, entry->he_pid);
- ASSERT3P(pid_entry, !=, NULL);
-
- hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
- bin = &table->ht_bins[hash];
- spin_lock(&bin->hb_lock);
-
- /* Add to the hash, key, and pid lists */
- hlist_add_head(&entry->he_list, &bin->hb_head);
- list_add(&entry->he_key_list, &dtor_entry->he_key_list);
- list_add(&entry->he_pid_list, &pid_entry->he_pid_list);
-
- spin_unlock(&bin->hb_lock);
- spin_unlock(&table->ht_lock);
-
- return (rc);
-}
-
-/*
- * tsd_hash_add_key - adds a destructor entry to the hash table
- * @table: hash table
- * @keyp: search key
- * @dtor: key destructor
- *
- * For every unique key there is a single entry in the hash which is used
- * as anchor. All other thread specific entries for this key are linked
- * to this anchor via the 'he_key_list' list head. On return they keyp
- * will be set to the next available key for the hash table.
- */
-static int
-tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor)
-{
- tsd_hash_entry_t *tmp_entry, *entry;
- tsd_hash_bin_t *bin;
- ulong_t hash;
- int keys_checked = 0;
-
- ASSERT3P(table, !=, NULL);
-
- /* Allocate entry to be used as a destructor for this key */
- entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
- if (entry == NULL)
- return (ENOMEM);
-
- /* Determine next available key value */
- spin_lock(&table->ht_lock);
- do {
- /* Limited to TSD_KEYS_MAX concurrent unique keys */
- if (table->ht_key++ > TSD_KEYS_MAX)
- table->ht_key = 1;
-
- /* Ensure failure when all TSD_KEYS_MAX keys are in use */
- if (keys_checked++ >= TSD_KEYS_MAX) {
- spin_unlock(&table->ht_lock);
- return (ENOENT);
- }
-
- tmp_entry = tsd_hash_search(table, table->ht_key, DTOR_PID);
- } while (tmp_entry);
-
- /* Add destructor entry in to hash table */
- entry->he_key = *keyp = table->ht_key;
- entry->he_pid = DTOR_PID;
- entry->he_dtor = dtor;
- entry->he_value = NULL;
- INIT_HLIST_NODE(&entry->he_list);
- INIT_LIST_HEAD(&entry->he_key_list);
- INIT_LIST_HEAD(&entry->he_pid_list);
-
- hash = hash_long((ulong_t)*keyp * (ulong_t)DTOR_PID, table->ht_bits);
- bin = &table->ht_bins[hash];
- spin_lock(&bin->hb_lock);
-
- hlist_add_head(&entry->he_list, &bin->hb_head);
-
- spin_unlock(&bin->hb_lock);
- spin_unlock(&table->ht_lock);
-
- return (0);
-}
-
-/*
- * tsd_hash_add_pid - adds a process entry to the hash table
- * @table: hash table
- * @pid: search pid
- *
- * For every process there is a single entry in the hash which is used
- * as anchor. All other thread specific entries for this process are
- * linked to this anchor via the 'he_pid_list' list head.
- */
-static int
-tsd_hash_add_pid(tsd_hash_table_t *table, pid_t pid)
-{
- tsd_hash_entry_t *entry;
- tsd_hash_bin_t *bin;
- ulong_t hash;
-
- /* Allocate entry to be used as the process reference */
- entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
- if (entry == NULL)
- return (ENOMEM);
-
- spin_lock(&table->ht_lock);
- entry->he_key = PID_KEY;
- entry->he_pid = pid;
- entry->he_dtor = NULL;
- entry->he_value = NULL;
- INIT_HLIST_NODE(&entry->he_list);
- INIT_LIST_HEAD(&entry->he_key_list);
- INIT_LIST_HEAD(&entry->he_pid_list);
-
- hash = hash_long((ulong_t)PID_KEY * (ulong_t)pid, table->ht_bits);
- bin = &table->ht_bins[hash];
- spin_lock(&bin->hb_lock);
-
- hlist_add_head(&entry->he_list, &bin->hb_head);
-
- spin_unlock(&bin->hb_lock);
- spin_unlock(&table->ht_lock);
-
- return (0);
-}
-
-/*
- * tsd_hash_del - delete an entry from hash table, key, and pid lists
- * @table: hash table
- * @key: search key
- * @pid: search pid
- */
-static void
-tsd_hash_del(tsd_hash_table_t *table, tsd_hash_entry_t *entry)
-{
- hlist_del(&entry->he_list);
- list_del_init(&entry->he_key_list);
- list_del_init(&entry->he_pid_list);
-}
-
-/*
- * tsd_hash_table_init - allocate a hash table
- * @bits: hash table size
- *
- * A hash table with 2^bits bins will be created, it may not be resized
- * after the fact and must be free'd with tsd_hash_table_fini().
- */
-static tsd_hash_table_t *
-tsd_hash_table_init(uint_t bits)
-{
- tsd_hash_table_t *table;
- int hash, size = (1 << bits);
-
- table = kmem_zalloc(sizeof (tsd_hash_table_t), KM_SLEEP);
- if (table == NULL)
- return (NULL);
-
- table->ht_bins = kmem_zalloc(sizeof (tsd_hash_bin_t) * size, KM_SLEEP);
- if (table->ht_bins == NULL) {
- kmem_free(table, sizeof (tsd_hash_table_t));
- return (NULL);
- }
-
- for (hash = 0; hash < size; hash++) {
- spin_lock_init(&table->ht_bins[hash].hb_lock);
- INIT_HLIST_HEAD(&table->ht_bins[hash].hb_head);
- }
-
- spin_lock_init(&table->ht_lock);
- table->ht_bits = bits;
- table->ht_key = 1;
-
- return (table);
-}
-
-/*
- * tsd_hash_table_fini - free a hash table
- * @table: hash table
- *
- * Free a hash table allocated by tsd_hash_table_init(). If the hash
- * table is not empty this function will call the proper destructor for
- * all remaining entries before freeing the memory used by those entries.
- */
-static void
-tsd_hash_table_fini(tsd_hash_table_t *table)
-{
- HLIST_HEAD(work);
- tsd_hash_bin_t *bin;
- tsd_hash_entry_t *entry;
- int size, i;
-
- ASSERT3P(table, !=, NULL);
- spin_lock(&table->ht_lock);
- for (i = 0, size = (1 << table->ht_bits); i < size; i++) {
- bin = &table->ht_bins[i];
- spin_lock(&bin->hb_lock);
- while (!hlist_empty(&bin->hb_head)) {
- entry = hlist_entry(bin->hb_head.first,
- tsd_hash_entry_t, he_list);
- tsd_hash_del(table, entry);
- hlist_add_head(&entry->he_list, &work);
- }
- spin_unlock(&bin->hb_lock);
- }
- spin_unlock(&table->ht_lock);
-
- tsd_hash_dtor(&work);
- kmem_free(table->ht_bins, sizeof (tsd_hash_bin_t)*(1<<table->ht_bits));
- kmem_free(table, sizeof (tsd_hash_table_t));
-}
-
-/*
- * tsd_remove_entry - remove a tsd entry for this thread
- * @entry: entry to remove
- *
- * Remove the thread specific data @entry for this thread.
- * If this is the last entry for this thread, also remove the PID entry.
- */
-static void
-tsd_remove_entry(tsd_hash_entry_t *entry)
-{
- HLIST_HEAD(work);
- tsd_hash_table_t *table;
- tsd_hash_entry_t *pid_entry;
- tsd_hash_bin_t *pid_entry_bin, *entry_bin;
- ulong_t hash;
-
- table = tsd_hash_table;
- ASSERT3P(table, !=, NULL);
- ASSERT3P(entry, !=, NULL);
-
- spin_lock(&table->ht_lock);
-
- hash = hash_long((ulong_t)entry->he_key *
- (ulong_t)entry->he_pid, table->ht_bits);
- entry_bin = &table->ht_bins[hash];
-
- /* save the possible pid_entry */
- pid_entry = list_entry(entry->he_pid_list.next, tsd_hash_entry_t,
- he_pid_list);
-
- /* remove entry */
- spin_lock(&entry_bin->hb_lock);
- tsd_hash_del(table, entry);
- hlist_add_head(&entry->he_list, &work);
- spin_unlock(&entry_bin->hb_lock);
-
- /* if pid_entry is indeed pid_entry, then remove it if it's empty */
- if (pid_entry->he_key == PID_KEY &&
- list_empty(&pid_entry->he_pid_list)) {
- hash = hash_long((ulong_t)pid_entry->he_key *
- (ulong_t)pid_entry->he_pid, table->ht_bits);
- pid_entry_bin = &table->ht_bins[hash];
-
- spin_lock(&pid_entry_bin->hb_lock);
- tsd_hash_del(table, pid_entry);
- hlist_add_head(&pid_entry->he_list, &work);
- spin_unlock(&pid_entry_bin->hb_lock);
- }
-
- spin_unlock(&table->ht_lock);
-
- tsd_hash_dtor(&work);
-}
-
-/*
- * tsd_set - set thread specific data
- * @key: lookup key
- * @value: value to set
- *
- * Caller must prevent racing tsd_create() or tsd_destroy(), protected
- * from racing tsd_get() or tsd_set() because it is thread specific.
- * This function has been optimized to be fast for the update case.
- * When setting the tsd initially it will be slower due to additional
- * required locking and potential memory allocations.
- */
-int
-tsd_set(uint_t key, void *value)
-{
- tsd_hash_table_t *table;
- tsd_hash_entry_t *entry;
- pid_t pid;
- int rc;
- /* mark remove if value is NULL */
- boolean_t remove = (value == NULL);
-
- table = tsd_hash_table;
- pid = curthread->pid;
- ASSERT3P(table, !=, NULL);
-
- if ((key == 0) || (key > TSD_KEYS_MAX))
- return (EINVAL);
-
- /* Entry already exists in hash table update value */
- entry = tsd_hash_search(table, key, pid);
- if (entry) {
- entry->he_value = value;
- /* remove the entry */
- if (remove)
- tsd_remove_entry(entry);
- return (0);
- }
-
- /* don't create entry if value is NULL */
- if (remove)
- return (0);
-
- /* Add a process entry to the hash if not yet exists */
- entry = tsd_hash_search(table, PID_KEY, pid);
- if (entry == NULL) {
- rc = tsd_hash_add_pid(table, pid);
- if (rc)
- return (rc);
- }
-
- rc = tsd_hash_add(table, key, pid, value);
- return (rc);
-}
-EXPORT_SYMBOL(tsd_set);
-
-/*
- * tsd_get - get thread specific data
- * @key: lookup key
- *
- * Caller must prevent racing tsd_create() or tsd_destroy(). This
- * implementation is designed to be fast and scalable, it does not
- * lock the entire table only a single hash bin.
- */
-void *
-tsd_get(uint_t key)
-{
- tsd_hash_entry_t *entry;
-
- ASSERT3P(tsd_hash_table, !=, NULL);
-
- if ((key == 0) || (key > TSD_KEYS_MAX))
- return (NULL);
-
- entry = tsd_hash_search(tsd_hash_table, key, curthread->pid);
- if (entry == NULL)
- return (NULL);
-
- return (entry->he_value);
-}
-EXPORT_SYMBOL(tsd_get);
-
-/*
- * tsd_get_by_thread - get thread specific data for specified thread
- * @key: lookup key
- * @thread: thread to lookup
- *
- * Caller must prevent racing tsd_create() or tsd_destroy(). This
- * implementation is designed to be fast and scalable, it does not
- * lock the entire table only a single hash bin.
- */
-void *
-tsd_get_by_thread(uint_t key, kthread_t *thread)
-{
- tsd_hash_entry_t *entry;
-
- ASSERT3P(tsd_hash_table, !=, NULL);
-
- if ((key == 0) || (key > TSD_KEYS_MAX))
- return (NULL);
-
- entry = tsd_hash_search(tsd_hash_table, key, thread->pid);
- if (entry == NULL)
- return (NULL);
-
- return (entry->he_value);
-}
-EXPORT_SYMBOL(tsd_get_by_thread);
-
-/*
- * tsd_create - create thread specific data key
- * @keyp: lookup key address
- * @dtor: destructor called during tsd_destroy() or tsd_exit()
- *
- * Provided key must be set to 0 or it assumed to be already in use.
- * The dtor is allowed to be NULL in which case no additional cleanup
- * for the data is performed during tsd_destroy() or tsd_exit().
- *
- * Caller must prevent racing tsd_set() or tsd_get(), this function is
- * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
- */
-void
-tsd_create(uint_t *keyp, dtor_func_t dtor)
-{
- ASSERT3P(keyp, !=, NULL);
- if (*keyp)
- return;
-
- (void) tsd_hash_add_key(tsd_hash_table, keyp, dtor);
-}
-EXPORT_SYMBOL(tsd_create);
-
-/*
- * tsd_destroy - destroy thread specific data
- * @keyp: lookup key address
- *
- * Destroys the thread specific data on all threads which use this key.
- *
- * Caller must prevent racing tsd_set() or tsd_get(), this function is
- * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
- */
-void
-tsd_destroy(uint_t *keyp)
-{
- HLIST_HEAD(work);
- tsd_hash_table_t *table;
- tsd_hash_entry_t *dtor_entry, *entry;
- tsd_hash_bin_t *dtor_entry_bin, *entry_bin;
- ulong_t hash;
-
- table = tsd_hash_table;
- ASSERT3P(table, !=, NULL);
-
- spin_lock(&table->ht_lock);
- dtor_entry = tsd_hash_search(table, *keyp, DTOR_PID);
- if (dtor_entry == NULL) {
- spin_unlock(&table->ht_lock);
- return;
- }
-
- /*
- * All threads which use this key must be linked off of the
- * DTOR_PID entry. They are removed from the hash table and
- * linked in to a private working list to be destroyed.
- */
- while (!list_empty(&dtor_entry->he_key_list)) {
- entry = list_entry(dtor_entry->he_key_list.next,
- tsd_hash_entry_t, he_key_list);
- ASSERT3U(dtor_entry->he_key, ==, entry->he_key);
- ASSERT3P(dtor_entry->he_dtor, ==, entry->he_dtor);
-
- hash = hash_long((ulong_t)entry->he_key *
- (ulong_t)entry->he_pid, table->ht_bits);
- entry_bin = &table->ht_bins[hash];
-
- spin_lock(&entry_bin->hb_lock);
- tsd_hash_del(table, entry);
- hlist_add_head(&entry->he_list, &work);
- spin_unlock(&entry_bin->hb_lock);
- }
-
- hash = hash_long((ulong_t)dtor_entry->he_key *
- (ulong_t)dtor_entry->he_pid, table->ht_bits);
- dtor_entry_bin = &table->ht_bins[hash];
-
- spin_lock(&dtor_entry_bin->hb_lock);
- tsd_hash_del(table, dtor_entry);
- hlist_add_head(&dtor_entry->he_list, &work);
- spin_unlock(&dtor_entry_bin->hb_lock);
- spin_unlock(&table->ht_lock);
-
- tsd_hash_dtor(&work);
- *keyp = 0;
-}
-EXPORT_SYMBOL(tsd_destroy);
-
-/*
- * tsd_exit - destroys all thread specific data for this thread
- *
- * Destroys all the thread specific data for this thread.
- *
- * Caller must prevent racing tsd_set() or tsd_get(), this function is
- * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
- */
-void
-tsd_exit(void)
-{
- HLIST_HEAD(work);
- tsd_hash_table_t *table;
- tsd_hash_entry_t *pid_entry, *entry;
- tsd_hash_bin_t *pid_entry_bin, *entry_bin;
- ulong_t hash;
-
- table = tsd_hash_table;
- ASSERT3P(table, !=, NULL);
-
- spin_lock(&table->ht_lock);
- pid_entry = tsd_hash_search(table, PID_KEY, curthread->pid);
- if (pid_entry == NULL) {
- spin_unlock(&table->ht_lock);
- return;
- }
-
- /*
- * All keys associated with this pid must be linked off of the
- * PID_KEY entry. They are removed from the hash table and
- * linked in to a private working list to be destroyed.
- */
-
- while (!list_empty(&pid_entry->he_pid_list)) {
- entry = list_entry(pid_entry->he_pid_list.next,
- tsd_hash_entry_t, he_pid_list);
- ASSERT3U(pid_entry->he_pid, ==, entry->he_pid);
-
- hash = hash_long((ulong_t)entry->he_key *
- (ulong_t)entry->he_pid, table->ht_bits);
- entry_bin = &table->ht_bins[hash];
-
- spin_lock(&entry_bin->hb_lock);
- tsd_hash_del(table, entry);
- hlist_add_head(&entry->he_list, &work);
- spin_unlock(&entry_bin->hb_lock);
- }
-
- hash = hash_long((ulong_t)pid_entry->he_key *
- (ulong_t)pid_entry->he_pid, table->ht_bits);
- pid_entry_bin = &table->ht_bins[hash];
-
- spin_lock(&pid_entry_bin->hb_lock);
- tsd_hash_del(table, pid_entry);
- hlist_add_head(&pid_entry->he_list, &work);
- spin_unlock(&pid_entry_bin->hb_lock);
- spin_unlock(&table->ht_lock);
-
- tsd_hash_dtor(&work);
-}
-EXPORT_SYMBOL(tsd_exit);
-
-int
-spl_tsd_init(void)
-{
- tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
- if (tsd_hash_table == NULL)
- return (1);
-
- return (0);
-}
-
-void
-spl_tsd_fini(void)
-{
- tsd_hash_table_fini(tsd_hash_table);
- tsd_hash_table = NULL;
-}
diff --git a/module/spl/spl-vmem.c b/module/spl/spl-vmem.c
deleted file mode 100644
index e1a84a911..000000000
--- a/module/spl/spl-vmem.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <sys/debug.h>
-#include <sys/vmem.h>
-#include <sys/kmem_cache.h>
-#include <sys/shrinker.h>
-#include <linux/module.h>
-
-vmem_t *heap_arena = NULL;
-EXPORT_SYMBOL(heap_arena);
-
-vmem_t *zio_alloc_arena = NULL;
-EXPORT_SYMBOL(zio_alloc_arena);
-
-vmem_t *zio_arena = NULL;
-EXPORT_SYMBOL(zio_arena);
-
-#define VMEM_FLOOR_SIZE (4 * 1024 * 1024) /* 4MB floor */
-
-/*
- * Return approximate virtual memory usage based on these assumptions:
- *
- * 1) The major SPL consumer of virtual memory is the kmem cache.
- * 2) Memory allocated with vmem_alloc() is short lived and can be ignored.
- * 3) Allow a 4MB floor as a generous pad given normal consumption.
- * 4) The spl_kmem_cache_sem only contends with cache create/destroy.
- */
-size_t
-vmem_size(vmem_t *vmp, int typemask)
-{
- spl_kmem_cache_t *skc;
- size_t alloc = VMEM_FLOOR_SIZE;
-
- if ((typemask & VMEM_ALLOC) && (typemask & VMEM_FREE))
- return (VMALLOC_TOTAL);
-
-
- down_read(&spl_kmem_cache_sem);
- list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
- if (skc->skc_flags & KMC_VMEM)
- alloc += skc->skc_slab_size * skc->skc_slab_total;
- }
- up_read(&spl_kmem_cache_sem);
-
- if (typemask & VMEM_ALLOC)
- return (MIN(alloc, VMALLOC_TOTAL));
- else if (typemask & VMEM_FREE)
- return (MAX(VMALLOC_TOTAL - alloc, 0));
- else
- return (0);
-}
-EXPORT_SYMBOL(vmem_size);
-
-/*
- * Public vmem_alloc(), vmem_zalloc() and vmem_free() interfaces.
- */
-void *
-spl_vmem_alloc(size_t size, int flags, const char *func, int line)
-{
- ASSERT0(flags & ~KM_PUBLIC_MASK);
-
- flags |= KM_VMEM;
-
-#if !defined(DEBUG_KMEM)
- return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
-#elif !defined(DEBUG_KMEM_TRACKING)
- return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
-#else
- return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
-#endif
-}
-EXPORT_SYMBOL(spl_vmem_alloc);
-
-void *
-spl_vmem_zalloc(size_t size, int flags, const char *func, int line)
-{
- ASSERT0(flags & ~KM_PUBLIC_MASK);
-
- flags |= (KM_VMEM | KM_ZERO);
-
-#if !defined(DEBUG_KMEM)
- return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
-#elif !defined(DEBUG_KMEM_TRACKING)
- return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
-#else
- return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
-#endif
-}
-EXPORT_SYMBOL(spl_vmem_zalloc);
-
-void
-spl_vmem_free(const void *buf, size_t size)
-{
-#if !defined(DEBUG_KMEM)
- return (spl_kmem_free_impl(buf, size));
-#elif !defined(DEBUG_KMEM_TRACKING)
- return (spl_kmem_free_debug(buf, size));
-#else
- return (spl_kmem_free_track(buf, size));
-#endif
-}
-EXPORT_SYMBOL(spl_vmem_free);
-
-int
-spl_vmem_init(void)
-{
- return (0);
-}
-
-void
-spl_vmem_fini(void)
-{
-}
diff --git a/module/spl/spl-vnode.c b/module/spl/spl-vnode.c
deleted file mode 100644
index d9056c964..000000000
--- a/module/spl/spl-vnode.c
+++ /dev/null
@@ -1,719 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) Vnode Implementation.
- */
-
-#include <sys/cred.h>
-#include <sys/vnode.h>
-#include <sys/kmem_cache.h>
-#include <linux/falloc.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#ifdef HAVE_FDTABLE_HEADER
-#include <linux/fdtable.h>
-#endif
-
-vnode_t *rootdir = (vnode_t *)0xabcd1234;
-EXPORT_SYMBOL(rootdir);
-
-static spl_kmem_cache_t *vn_cache;
-static spl_kmem_cache_t *vn_file_cache;
-
-static spinlock_t vn_file_lock;
-static LIST_HEAD(vn_file_list);
-
-static int
-spl_filp_fallocate(struct file *fp, int mode, loff_t offset, loff_t len)
-{
- int error = -EOPNOTSUPP;
-
-#ifdef HAVE_FILE_FALLOCATE
- if (fp->f_op->fallocate)
- error = fp->f_op->fallocate(fp, mode, offset, len);
-#else
-#ifdef HAVE_INODE_FALLOCATE
- if (fp->f_dentry && fp->f_dentry->d_inode &&
- fp->f_dentry->d_inode->i_op->fallocate)
- error = fp->f_dentry->d_inode->i_op->fallocate(
- fp->f_dentry->d_inode, mode, offset, len);
-#endif /* HAVE_INODE_FALLOCATE */
-#endif /* HAVE_FILE_FALLOCATE */
-
- return (error);
-}
-
-static int
-spl_filp_fsync(struct file *fp, int sync)
-{
-#ifdef HAVE_2ARGS_VFS_FSYNC
- return (vfs_fsync(fp, sync));
-#else
- return (vfs_fsync(fp, (fp)->f_dentry, sync));
-#endif /* HAVE_2ARGS_VFS_FSYNC */
-}
-
-static ssize_t
-spl_kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
-{
-#if defined(HAVE_KERNEL_WRITE_PPOS)
- return (kernel_write(file, buf, count, pos));
-#else
- mm_segment_t saved_fs;
- ssize_t ret;
-
- saved_fs = get_fs();
- set_fs(KERNEL_DS);
-
- ret = vfs_write(file, (__force const char __user *)buf, count, pos);
-
- set_fs(saved_fs);
-
- return (ret);
-#endif
-}
-
-static ssize_t
-spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
-{
-#if defined(HAVE_KERNEL_READ_PPOS)
- return (kernel_read(file, buf, count, pos));
-#else
- mm_segment_t saved_fs;
- ssize_t ret;
-
- saved_fs = get_fs();
- set_fs(KERNEL_DS);
-
- ret = vfs_read(file, (void __user *)buf, count, pos);
-
- set_fs(saved_fs);
-
- return (ret);
-#endif
-}
-
-vtype_t
-vn_mode_to_vtype(mode_t mode)
-{
- if (S_ISREG(mode))
- return (VREG);
-
- if (S_ISDIR(mode))
- return (VDIR);
-
- if (S_ISCHR(mode))
- return (VCHR);
-
- if (S_ISBLK(mode))
- return (VBLK);
-
- if (S_ISFIFO(mode))
- return (VFIFO);
-
- if (S_ISLNK(mode))
- return (VLNK);
-
- if (S_ISSOCK(mode))
- return (VSOCK);
-
- return (VNON);
-} /* vn_mode_to_vtype() */
-EXPORT_SYMBOL(vn_mode_to_vtype);
-
-mode_t
-vn_vtype_to_mode(vtype_t vtype)
-{
- if (vtype == VREG)
- return (S_IFREG);
-
- if (vtype == VDIR)
- return (S_IFDIR);
-
- if (vtype == VCHR)
- return (S_IFCHR);
-
- if (vtype == VBLK)
- return (S_IFBLK);
-
- if (vtype == VFIFO)
- return (S_IFIFO);
-
- if (vtype == VLNK)
- return (S_IFLNK);
-
- if (vtype == VSOCK)
- return (S_IFSOCK);
-
- return (VNON);
-} /* vn_vtype_to_mode() */
-EXPORT_SYMBOL(vn_vtype_to_mode);
-
-vnode_t *
-vn_alloc(int flag)
-{
- vnode_t *vp;
-
- vp = kmem_cache_alloc(vn_cache, flag);
- if (vp != NULL) {
- vp->v_file = NULL;
- vp->v_type = 0;
- }
-
- return (vp);
-} /* vn_alloc() */
-EXPORT_SYMBOL(vn_alloc);
-
-void
-vn_free(vnode_t *vp)
-{
- kmem_cache_free(vn_cache, vp);
-} /* vn_free() */
-EXPORT_SYMBOL(vn_free);
-
-int
-vn_open(const char *path, uio_seg_t seg, int flags, int mode, vnode_t **vpp,
- int x1, void *x2)
-{
- struct file *fp;
- struct kstat stat;
- int rc, saved_umask = 0;
- gfp_t saved_gfp;
- vnode_t *vp;
-
- ASSERT(flags & (FWRITE | FREAD));
- ASSERT(seg == UIO_SYSSPACE);
- ASSERT(vpp);
- *vpp = NULL;
-
- if (!(flags & FCREAT) && (flags & FWRITE))
- flags |= FEXCL;
-
- /*
- * Note for filp_open() the two low bits must be remapped to mean:
- * 01 - read-only -> 00 read-only
- * 10 - write-only -> 01 write-only
- * 11 - read-write -> 10 read-write
- */
- flags--;
-
- if (flags & FCREAT)
- saved_umask = xchg(&current->fs->umask, 0);
-
- fp = filp_open(path, flags, mode);
-
- if (flags & FCREAT)
- (void) xchg(&current->fs->umask, saved_umask);
-
- if (IS_ERR(fp))
- return (-PTR_ERR(fp));
-
-#if defined(HAVE_4ARGS_VFS_GETATTR)
- rc = vfs_getattr(&fp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
-#elif defined(HAVE_2ARGS_VFS_GETATTR)
- rc = vfs_getattr(&fp->f_path, &stat);
-#else
- rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
-#endif
- if (rc) {
- filp_close(fp, 0);
- return (-rc);
- }
-
- vp = vn_alloc(KM_SLEEP);
- if (!vp) {
- filp_close(fp, 0);
- return (ENOMEM);
- }
-
- saved_gfp = mapping_gfp_mask(fp->f_mapping);
- mapping_set_gfp_mask(fp->f_mapping, saved_gfp & ~(__GFP_IO|__GFP_FS));
-
- mutex_enter(&vp->v_lock);
- vp->v_type = vn_mode_to_vtype(stat.mode);
- vp->v_file = fp;
- vp->v_gfp_mask = saved_gfp;
- *vpp = vp;
- mutex_exit(&vp->v_lock);
-
- return (0);
-} /* vn_open() */
-EXPORT_SYMBOL(vn_open);
-
-int
-vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
- vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd)
-{
- char *realpath;
- int len, rc;
-
- ASSERT(vp == rootdir);
-
- len = strlen(path) + 2;
- realpath = kmalloc(len, kmem_flags_convert(KM_SLEEP));
- if (!realpath)
- return (ENOMEM);
-
- (void) snprintf(realpath, len, "/%s", path);
- rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2);
- kfree(realpath);
-
- return (rc);
-} /* vn_openat() */
-EXPORT_SYMBOL(vn_openat);
-
-int
-vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
- uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp)
-{
- struct file *fp = vp->v_file;
- loff_t offset = off;
- int rc;
-
- ASSERT(uio == UIO_WRITE || uio == UIO_READ);
- ASSERT(seg == UIO_SYSSPACE);
- ASSERT((ioflag & ~FAPPEND) == 0);
-
- if (ioflag & FAPPEND)
- offset = fp->f_pos;
-
- if (uio & UIO_WRITE)
- rc = spl_kernel_write(fp, addr, len, &offset);
- else
- rc = spl_kernel_read(fp, addr, len, &offset);
-
- fp->f_pos = offset;
-
- if (rc < 0)
- return (-rc);
-
- if (residp) {
- *residp = len - rc;
- } else {
- if (rc != len)
- return (EIO);
- }
-
- return (0);
-} /* vn_rdwr() */
-EXPORT_SYMBOL(vn_rdwr);
-
-int
-vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4)
-{
- int rc;
-
- ASSERT(vp);
- ASSERT(vp->v_file);
-
- mapping_set_gfp_mask(vp->v_file->f_mapping, vp->v_gfp_mask);
- rc = filp_close(vp->v_file, 0);
- vn_free(vp);
-
- return (-rc);
-} /* vn_close() */
-EXPORT_SYMBOL(vn_close);
-
-/*
- * vn_seek() does not actually seek it only performs bounds checking on the
- * proposed seek. We perform minimal checking and allow vn_rdwr() to catch
- * anything more serious.
- */
-int
-vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, void *ct)
-{
- return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
-}
-EXPORT_SYMBOL(vn_seek);
-
-int
-vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4)
-{
- struct file *fp;
- struct kstat stat;
- int rc;
-
- ASSERT(vp);
- ASSERT(vp->v_file);
- ASSERT(vap);
-
- fp = vp->v_file;
-
-#if defined(HAVE_4ARGS_VFS_GETATTR)
- rc = vfs_getattr(&fp->f_path, &stat, STATX_BASIC_STATS,
- AT_STATX_SYNC_AS_STAT);
-#elif defined(HAVE_2ARGS_VFS_GETATTR)
- rc = vfs_getattr(&fp->f_path, &stat);
-#else
- rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
-#endif
- if (rc)
- return (-rc);
-
- vap->va_type = vn_mode_to_vtype(stat.mode);
- vap->va_mode = stat.mode;
- vap->va_uid = KUID_TO_SUID(stat.uid);
- vap->va_gid = KGID_TO_SGID(stat.gid);
- vap->va_fsid = 0;
- vap->va_nodeid = stat.ino;
- vap->va_nlink = stat.nlink;
- vap->va_size = stat.size;
- vap->va_blksize = stat.blksize;
- vap->va_atime = stat.atime;
- vap->va_mtime = stat.mtime;
- vap->va_ctime = stat.ctime;
- vap->va_rdev = stat.rdev;
- vap->va_nblocks = stat.blocks;
-
- return (0);
-}
-EXPORT_SYMBOL(vn_getattr);
-
-int
-vn_fsync(vnode_t *vp, int flags, void *x3, void *x4)
-{
- int datasync = 0;
- int error;
- int fstrans;
-
- ASSERT(vp);
- ASSERT(vp->v_file);
-
- if (flags & FDSYNC)
- datasync = 1;
-
- /*
- * May enter XFS which generates a warning when PF_FSTRANS is set.
- * To avoid this the flag is cleared over vfs_sync() and then reset.
- */
- fstrans = __spl_pf_fstrans_check();
- if (fstrans)
- current->flags &= ~(__SPL_PF_FSTRANS);
-
- error = -spl_filp_fsync(vp->v_file, datasync);
- if (fstrans)
- current->flags |= __SPL_PF_FSTRANS;
-
- return (error);
-} /* vn_fsync() */
-EXPORT_SYMBOL(vn_fsync);
-
-int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
- offset_t offset, void *x6, void *x7)
-{
- int error = EOPNOTSUPP;
-#ifdef FALLOC_FL_PUNCH_HOLE
- int fstrans;
-#endif
-
- if (cmd != F_FREESP || bfp->l_whence != SEEK_SET)
- return (EOPNOTSUPP);
-
- ASSERT(vp);
- ASSERT(vp->v_file);
- ASSERT(bfp->l_start >= 0 && bfp->l_len > 0);
-
-#ifdef FALLOC_FL_PUNCH_HOLE
- /*
- * May enter XFS which generates a warning when PF_FSTRANS is set.
- * To avoid this the flag is cleared over vfs_sync() and then reset.
- */
- fstrans = __spl_pf_fstrans_check();
- if (fstrans)
- current->flags &= ~(__SPL_PF_FSTRANS);
-
- /*
- * When supported by the underlying file system preferentially
- * use the fallocate() callback to preallocate the space.
- */
- error = -spl_filp_fallocate(vp->v_file,
- FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
- bfp->l_start, bfp->l_len);
-
- if (fstrans)
- current->flags |= __SPL_PF_FSTRANS;
-
- if (error == 0)
- return (0);
-#endif
-
-#ifdef HAVE_INODE_TRUNCATE_RANGE
- if (vp->v_file->f_dentry && vp->v_file->f_dentry->d_inode &&
- vp->v_file->f_dentry->d_inode->i_op &&
- vp->v_file->f_dentry->d_inode->i_op->truncate_range) {
- off_t end = bfp->l_start + bfp->l_len;
- /*
- * Judging from the code in shmem_truncate_range(),
- * it seems the kernel expects the end offset to be
- * inclusive and aligned to the end of a page.
- */
- if (end % PAGE_SIZE != 0) {
- end &= ~(off_t)(PAGE_SIZE - 1);
- if (end <= bfp->l_start)
- return (0);
- }
- --end;
-
- vp->v_file->f_dentry->d_inode->i_op->truncate_range(
- vp->v_file->f_dentry->d_inode, bfp->l_start, end);
-
- return (0);
- }
-#endif
-
- return (error);
-}
-EXPORT_SYMBOL(vn_space);
-
-/* Function must be called while holding the vn_file_lock */
-static file_t *
-file_find(int fd, struct task_struct *task)
-{
- file_t *fp;
-
- list_for_each_entry(fp, &vn_file_list, f_list) {
- if (fd == fp->f_fd && fp->f_task == task) {
- ASSERT(atomic_read(&fp->f_ref) != 0);
- return (fp);
- }
- }
-
- return (NULL);
-} /* file_find() */
-
-file_t *
-vn_getf(int fd)
-{
- struct kstat stat;
- struct file *lfp;
- file_t *fp;
- vnode_t *vp;
- int rc = 0;
-
- if (fd < 0)
- return (NULL);
-
- /* Already open just take an extra reference */
- spin_lock(&vn_file_lock);
-
- fp = file_find(fd, current);
- if (fp) {
- lfp = fget(fd);
- fput(fp->f_file);
- /*
- * areleasef() can cause us to see a stale reference when
- * userspace has reused a file descriptor before areleasef()
- * has run. fput() the stale reference and replace it. We
- * retain the original reference count such that the concurrent
- * areleasef() will decrement its reference and terminate.
- */
- if (lfp != fp->f_file) {
- fp->f_file = lfp;
- fp->f_vnode->v_file = lfp;
- }
- atomic_inc(&fp->f_ref);
- spin_unlock(&vn_file_lock);
- return (fp);
- }
-
- spin_unlock(&vn_file_lock);
-
- /* File was not yet opened create the object and setup */
- fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
- if (fp == NULL)
- goto out;
-
- mutex_enter(&fp->f_lock);
-
- fp->f_fd = fd;
- fp->f_task = current;
- fp->f_offset = 0;
- atomic_inc(&fp->f_ref);
-
- lfp = fget(fd);
- if (lfp == NULL)
- goto out_mutex;
-
- vp = vn_alloc(KM_SLEEP);
- if (vp == NULL)
- goto out_fget;
-
-#if defined(HAVE_4ARGS_VFS_GETATTR)
- rc = vfs_getattr(&lfp->f_path, &stat, STATX_TYPE,
- AT_STATX_SYNC_AS_STAT);
-#elif defined(HAVE_2ARGS_VFS_GETATTR)
- rc = vfs_getattr(&lfp->f_path, &stat);
-#else
- rc = vfs_getattr(lfp->f_path.mnt, lfp->f_dentry, &stat);
-#endif
- if (rc)
- goto out_vnode;
-
- mutex_enter(&vp->v_lock);
- vp->v_type = vn_mode_to_vtype(stat.mode);
- vp->v_file = lfp;
- mutex_exit(&vp->v_lock);
-
- fp->f_vnode = vp;
- fp->f_file = lfp;
-
- /* Put it on the tracking list */
- spin_lock(&vn_file_lock);
- list_add(&fp->f_list, &vn_file_list);
- spin_unlock(&vn_file_lock);
-
- mutex_exit(&fp->f_lock);
- return (fp);
-
-out_vnode:
- vn_free(vp);
-out_fget:
- fput(lfp);
-out_mutex:
- mutex_exit(&fp->f_lock);
- kmem_cache_free(vn_file_cache, fp);
-out:
- return (NULL);
-} /* getf() */
-EXPORT_SYMBOL(getf);
-
-static void releasef_locked(file_t *fp)
-{
- ASSERT(fp->f_file);
- ASSERT(fp->f_vnode);
-
- /* Unlinked from list, no refs, safe to free outside mutex */
- fput(fp->f_file);
- vn_free(fp->f_vnode);
-
- kmem_cache_free(vn_file_cache, fp);
-}
-
-void
-vn_releasef(int fd)
-{
- areleasef(fd, P_FINFO(current));
-}
-EXPORT_SYMBOL(releasef);
-
-void
-vn_areleasef(int fd, uf_info_t *fip)
-{
- file_t *fp;
- struct task_struct *task = (struct task_struct *)fip;
-
- if (fd < 0)
- return;
-
- spin_lock(&vn_file_lock);
- fp = file_find(fd, task);
- if (fp) {
- atomic_dec(&fp->f_ref);
- if (atomic_read(&fp->f_ref) > 0) {
- spin_unlock(&vn_file_lock);
- return;
- }
-
- list_del(&fp->f_list);
- releasef_locked(fp);
- }
- spin_unlock(&vn_file_lock);
-} /* releasef() */
-EXPORT_SYMBOL(areleasef);
-
-static int
-vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
-{
- struct vnode *vp = buf;
-
- mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
-
- return (0);
-} /* vn_cache_constructor() */
-
-static void
-vn_cache_destructor(void *buf, void *cdrarg)
-{
- struct vnode *vp = buf;
-
- mutex_destroy(&vp->v_lock);
-} /* vn_cache_destructor() */
-
-static int
-vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags)
-{
- file_t *fp = buf;
-
- atomic_set(&fp->f_ref, 0);
- mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL);
- INIT_LIST_HEAD(&fp->f_list);
-
- return (0);
-} /* vn_file_cache_constructor() */
-
-static void
-vn_file_cache_destructor(void *buf, void *cdrarg)
-{
- file_t *fp = buf;
-
- mutex_destroy(&fp->f_lock);
-} /* vn_file_cache_destructor() */
-
-int
-spl_vn_init(void)
-{
- spin_lock_init(&vn_file_lock);
-
- vn_cache = kmem_cache_create("spl_vn_cache",
- sizeof (struct vnode), 64, vn_cache_constructor,
- vn_cache_destructor, NULL, NULL, NULL, 0);
-
- vn_file_cache = kmem_cache_create("spl_vn_file_cache",
- sizeof (file_t), 64, vn_file_cache_constructor,
- vn_file_cache_destructor, NULL, NULL, NULL, 0);
-
- return (0);
-} /* spl_vn_init() */
-
-void
-spl_vn_fini(void)
-{
- file_t *fp, *next_fp;
- int leaked = 0;
-
- spin_lock(&vn_file_lock);
-
- list_for_each_entry_safe(fp, next_fp, &vn_file_list, f_list) {
- list_del(&fp->f_list);
- releasef_locked(fp);
- leaked++;
- }
-
- spin_unlock(&vn_file_lock);
-
- if (leaked > 0)
- printk(KERN_WARNING "WARNING: %d vnode files leaked\n", leaked);
-
- kmem_cache_destroy(vn_file_cache);
- kmem_cache_destroy(vn_cache);
-} /* spl_vn_fini() */
diff --git a/module/spl/spl-xdr.c b/module/spl/spl-xdr.c
deleted file mode 100644
index 1dd31ffc1..000000000
--- a/module/spl/spl-xdr.c
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- * Copyright (c) 2008-2010 Sun Microsystems, Inc.
- * Written by Ricardo Correia <[email protected]>
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- * Solaris Porting Layer (SPL) XDR Implementation.
- */
-
-#include <linux/string.h>
-#include <sys/kmem.h>
-#include <sys/debug.h>
-#include <sys/types.h>
-#include <sys/sysmacros.h>
-#include <rpc/xdr.h>
-
-/*
- * SPL's XDR mem implementation.
- *
- * This is used by libnvpair to serialize/deserialize the name-value pair data
- * structures into byte arrays in a well-defined and portable manner.
- *
- * These data structures are used by the DMU/ZFS to flexibly manipulate various
- * information in memory and later serialize it/deserialize it to disk.
- * Examples of usages include the pool configuration, lists of pool and dataset
- * properties, etc.
- *
- * Reference documentation for the XDR representation and XDR operations can be
- * found in RFC 1832 and xdr(3), respectively.
- *
- * === Implementation shortcomings ===
- *
- * It is assumed that the following C types have the following sizes:
- *
- * char/unsigned char: 1 byte
- * short/unsigned short: 2 bytes
- * int/unsigned int: 4 bytes
- * longlong_t/u_longlong_t: 8 bytes
- *
- * The C standard allows these types to be larger (and in the case of ints,
- * shorter), so if that is the case on some compiler/architecture, the build
- * will fail (on purpose).
- *
- * If someone wants to fix the code to work properly on such environments, then:
- *
- * 1) Preconditions should be added to xdrmem_enc functions to make sure the
- * caller doesn't pass arguments which exceed the expected range.
- * 2) Functions which take signed integers should be changed to properly do
- * sign extension.
- * 3) For ints with less than 32 bits, well.. I suspect you'll have bigger
- * problems than this implementation.
- *
- * It is also assumed that:
- *
- * 1) Chars have 8 bits.
- * 2) We can always do 32-bit-aligned int memory accesses and byte-aligned
- * memcpy, memset and memcmp.
- * 3) Arrays passed to xdr_array() are packed and the compiler/architecture
- * supports element-sized-aligned memory accesses.
- * 4) Negative integers are natively stored in two's complement binary
- * representation.
- *
- * No checks are done for the 4 assumptions above, though.
- *
- * === Caller expectations ===
- *
- * Existing documentation does not describe the semantics of XDR operations very
- * well. Therefore, some assumptions about failure semantics will be made and
- * will be described below:
- *
- * 1) If any encoding operation fails (e.g., due to lack of buffer space), the
- * the stream should be considered valid only up to the encoding operation
- * previous to the one that first failed. However, the stream size as returned
- * by xdr_control() cannot be considered to be strictly correct (it may be
- * bigger).
- *
- * Putting it another way, if there is an encoding failure it's undefined
- * whether anything is added to the stream in that operation and therefore
- * neither xdr_control() nor future encoding operations on the same stream can
- * be relied upon to produce correct results.
- *
- * 2) If a decoding operation fails, it's undefined whether anything will be
- * decoded into passed buffers/pointers during that operation, or what the
- * values on those buffers will look like.
- *
- * Future decoding operations on the same stream will also have similar
- * undefined behavior.
- *
- * 3) When the first decoding operation fails it is OK to trust the results of
- * previous decoding operations on the same stream, as long as the caller
- * expects a failure to be possible (e.g. due to end-of-stream).
- *
- * However, this is highly discouraged because the caller should know the
- * stream size and should be coded to expect any decoding failure to be data
- * corruption due to hardware, accidental or even malicious causes, which should
- * be handled gracefully in all cases.
- *
- * In very rare situations where there are strong reasons to believe the data
- * can be trusted to be valid and non-tampered with, then the caller may assume
- * a decoding failure to be a bug (e.g. due to mismatched data types) and may
- * fail non-gracefully.
- *
- * 4) Non-zero padding bytes will cause the decoding operation to fail.
- *
- * 5) Zero bytes on string types will also cause the decoding operation to fail.
- *
- * 6) It is assumed that either the pointer to the stream buffer given by the
- * caller is 32-bit aligned or the architecture supports non-32-bit-aligned int
- * memory accesses.
- *
- * 7) The stream buffer and encoding/decoding buffers/ptrs should not overlap.
- *
- * 8) If a caller passes pointers to non-kernel memory (e.g., pointers to user
- * space or MMIO space), the computer may explode.
- */
-
-static struct xdr_ops xdrmem_encode_ops;
-static struct xdr_ops xdrmem_decode_ops;
-
-void
-xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
- const enum xdr_op op)
-{
- switch (op) {
- case XDR_ENCODE:
- xdrs->x_ops = &xdrmem_encode_ops;
- break;
- case XDR_DECODE:
- xdrs->x_ops = &xdrmem_decode_ops;
- break;
- default:
- xdrs->x_ops = NULL; /* Let the caller know we failed */
- return;
- }
-
- xdrs->x_op = op;
- xdrs->x_addr = addr;
- xdrs->x_addr_end = addr + size;
-
- if (xdrs->x_addr_end < xdrs->x_addr) {
- xdrs->x_ops = NULL;
- }
-}
-EXPORT_SYMBOL(xdrmem_create);
-
-static bool_t
-xdrmem_control(XDR *xdrs, int req, void *info)
-{
- struct xdr_bytesrec *rec = (struct xdr_bytesrec *)info;
-
- if (req != XDR_GET_BYTES_AVAIL)
- return (FALSE);
-
- rec->xc_is_last_record = TRUE; /* always TRUE in xdrmem streams */
- rec->xc_num_avail = xdrs->x_addr_end - xdrs->x_addr;
-
- return (TRUE);
-}
-
-static bool_t
-xdrmem_enc_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
-{
- uint_t size = roundup(cnt, 4);
- uint_t pad;
-
- if (size < cnt)
- return (FALSE); /* Integer overflow */
-
- if (xdrs->x_addr > xdrs->x_addr_end)
- return (FALSE);
-
- if (xdrs->x_addr_end - xdrs->x_addr < size)
- return (FALSE);
-
- memcpy(xdrs->x_addr, cp, cnt);
-
- xdrs->x_addr += cnt;
-
- pad = size - cnt;
- if (pad > 0) {
- memset(xdrs->x_addr, 0, pad);
- xdrs->x_addr += pad;
- }
-
- return (TRUE);
-}
-
-static bool_t
-xdrmem_dec_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
-{
- static uint32_t zero = 0;
- uint_t size = roundup(cnt, 4);
- uint_t pad;
-
- if (size < cnt)
- return (FALSE); /* Integer overflow */
-
- if (xdrs->x_addr > xdrs->x_addr_end)
- return (FALSE);
-
- if (xdrs->x_addr_end - xdrs->x_addr < size)
- return (FALSE);
-
- memcpy(cp, xdrs->x_addr, cnt);
- xdrs->x_addr += cnt;
-
- pad = size - cnt;
- if (pad > 0) {
- /* An inverted memchr() would be useful here... */
- if (memcmp(&zero, xdrs->x_addr, pad) != 0)
- return (FALSE);
-
- xdrs->x_addr += pad;
- }
-
- return (TRUE);
-}
-
-static bool_t
-xdrmem_enc_uint32(XDR *xdrs, uint32_t val)
-{
- if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
- return (FALSE);
-
- *((uint32_t *)xdrs->x_addr) = cpu_to_be32(val);
-
- xdrs->x_addr += sizeof (uint32_t);
-
- return (TRUE);
-}
-
-static bool_t
-xdrmem_dec_uint32(XDR *xdrs, uint32_t *val)
-{
- if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
- return (FALSE);
-
- *val = be32_to_cpu(*((uint32_t *)xdrs->x_addr));
-
- xdrs->x_addr += sizeof (uint32_t);
-
- return (TRUE);
-}
-
-static bool_t
-xdrmem_enc_char(XDR *xdrs, char *cp)
-{
- uint32_t val;
-
- BUILD_BUG_ON(sizeof (char) != 1);
- val = *((unsigned char *) cp);
-
- return (xdrmem_enc_uint32(xdrs, val));
-}
-
-static bool_t
-xdrmem_dec_char(XDR *xdrs, char *cp)
-{
- uint32_t val;
-
- BUILD_BUG_ON(sizeof (char) != 1);
-
- if (!xdrmem_dec_uint32(xdrs, &val))
- return (FALSE);
-
- /*
- * If any of the 3 other bytes are non-zero then val will be greater
- * than 0xff and we fail because according to the RFC, this block does
- * not have a char encoded in it.
- */
- if (val > 0xff)
- return (FALSE);
-
- *((unsigned char *) cp) = val;
-
- return (TRUE);
-}
-
-static bool_t
-xdrmem_enc_ushort(XDR *xdrs, unsigned short *usp)
-{
- BUILD_BUG_ON(sizeof (unsigned short) != 2);
-
- return (xdrmem_enc_uint32(xdrs, *usp));
-}
-
-static bool_t
-xdrmem_dec_ushort(XDR *xdrs, unsigned short *usp)
-{
- uint32_t val;
-
- BUILD_BUG_ON(sizeof (unsigned short) != 2);
-
- if (!xdrmem_dec_uint32(xdrs, &val))
- return (FALSE);
-
- /*
- * Short ints are not in the RFC, but we assume similar logic as in
- * xdrmem_dec_char().
- */
- if (val > 0xffff)
- return (FALSE);
-
- *usp = val;
-
- return (TRUE);
-}
-
-static bool_t
-xdrmem_enc_uint(XDR *xdrs, unsigned *up)
-{
- BUILD_BUG_ON(sizeof (unsigned) != 4);
-
- return (xdrmem_enc_uint32(xdrs, *up));
-}
-
-static bool_t
-xdrmem_dec_uint(XDR *xdrs, unsigned *up)
-{
- BUILD_BUG_ON(sizeof (unsigned) != 4);
-
- return (xdrmem_dec_uint32(xdrs, (uint32_t *)up));
-}
-
-static bool_t
-xdrmem_enc_ulonglong(XDR *xdrs, u_longlong_t *ullp)
-{
- BUILD_BUG_ON(sizeof (u_longlong_t) != 8);
-
- if (!xdrmem_enc_uint32(xdrs, *ullp >> 32))
- return (FALSE);
-
- return (xdrmem_enc_uint32(xdrs, *ullp & 0xffffffff));
-}
-
-static bool_t
-xdrmem_dec_ulonglong(XDR *xdrs, u_longlong_t *ullp)
-{
- uint32_t low, high;
-
- BUILD_BUG_ON(sizeof (u_longlong_t) != 8);
-
- if (!xdrmem_dec_uint32(xdrs, &high))
- return (FALSE);
- if (!xdrmem_dec_uint32(xdrs, &low))
- return (FALSE);
-
- *ullp = ((u_longlong_t)high << 32) | low;
-
- return (TRUE);
-}
-
-static bool_t
-xdr_enc_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
- const uint_t elsize, const xdrproc_t elproc)
-{
- uint_t i;
- caddr_t addr = *arrp;
-
- if (*sizep > maxsize || *sizep > UINT_MAX / elsize)
- return (FALSE);
-
- if (!xdrmem_enc_uint(xdrs, sizep))
- return (FALSE);
-
- for (i = 0; i < *sizep; i++) {
- if (!elproc(xdrs, addr))
- return (FALSE);
- addr += elsize;
- }
-
- return (TRUE);
-}
-
-static bool_t
-xdr_dec_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
- const uint_t elsize, const xdrproc_t elproc)
-{
- uint_t i, size;
- bool_t alloc = FALSE;
- caddr_t addr;
-
- if (!xdrmem_dec_uint(xdrs, sizep))
- return (FALSE);
-
- size = *sizep;
-
- if (size > maxsize || size > UINT_MAX / elsize)
- return (FALSE);
-
- /*
- * The Solaris man page says: "If *arrp is NULL when decoding,
- * xdr_array() allocates memory and *arrp points to it".
- */
- if (*arrp == NULL) {
- BUILD_BUG_ON(sizeof (uint_t) > sizeof (size_t));
-
- *arrp = kmem_alloc(size * elsize, KM_NOSLEEP);
- if (*arrp == NULL)
- return (FALSE);
-
- alloc = TRUE;
- }
-
- addr = *arrp;
-
- for (i = 0; i < size; i++) {
- if (!elproc(xdrs, addr)) {
- if (alloc)
- kmem_free(*arrp, size * elsize);
- return (FALSE);
- }
- addr += elsize;
- }
-
- return (TRUE);
-}
-
-static bool_t
-xdr_enc_string(XDR *xdrs, char **sp, const uint_t maxsize)
-{
- size_t slen = strlen(*sp);
- uint_t len;
-
- if (slen > maxsize)
- return (FALSE);
-
- len = slen;
-
- if (!xdrmem_enc_uint(xdrs, &len))
- return (FALSE);
-
- return (xdrmem_enc_bytes(xdrs, *sp, len));
-}
-
-static bool_t
-xdr_dec_string(XDR *xdrs, char **sp, const uint_t maxsize)
-{
- uint_t size;
- bool_t alloc = FALSE;
-
- if (!xdrmem_dec_uint(xdrs, &size))
- return (FALSE);
-
- if (size > maxsize || size > UINT_MAX - 1)
- return (FALSE);
-
- /*
- * Solaris man page: "If *sp is NULL when decoding, xdr_string()
- * allocates memory and *sp points to it".
- */
- if (*sp == NULL) {
- BUILD_BUG_ON(sizeof (uint_t) > sizeof (size_t));
-
- *sp = kmem_alloc(size + 1, KM_NOSLEEP);
- if (*sp == NULL)
- return (FALSE);
-
- alloc = TRUE;
- }
-
- if (!xdrmem_dec_bytes(xdrs, *sp, size))
- goto fail;
-
- if (memchr(*sp, 0, size) != NULL)
- goto fail;
-
- (*sp)[size] = '\0';
-
- return (TRUE);
-
-fail:
- if (alloc)
- kmem_free(*sp, size + 1);
-
- return (FALSE);
-}
-
-static struct xdr_ops xdrmem_encode_ops = {
- .xdr_control = xdrmem_control,
- .xdr_char = xdrmem_enc_char,
- .xdr_u_short = xdrmem_enc_ushort,
- .xdr_u_int = xdrmem_enc_uint,
- .xdr_u_longlong_t = xdrmem_enc_ulonglong,
- .xdr_opaque = xdrmem_enc_bytes,
- .xdr_string = xdr_enc_string,
- .xdr_array = xdr_enc_array
-};
-
-static struct xdr_ops xdrmem_decode_ops = {
- .xdr_control = xdrmem_control,
- .xdr_char = xdrmem_dec_char,
- .xdr_u_short = xdrmem_dec_ushort,
- .xdr_u_int = xdrmem_dec_uint,
- .xdr_u_longlong_t = xdrmem_dec_ulonglong,
- .xdr_opaque = xdrmem_dec_bytes,
- .xdr_string = xdr_dec_string,
- .xdr_array = xdr_dec_array
-};
diff --git a/module/spl/spl-zlib.c b/module/spl/spl-zlib.c
deleted file mode 100644
index 62423343c..000000000
--- a/module/spl/spl-zlib.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <[email protected]>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- * For details, see <http://zfsonlinux.org/>.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *
- *
- * z_compress_level/z_uncompress are nearly identical copies of the
- * compress2/uncompress functions provided by the official zlib package
- * available at http://zlib.net/. The only changes made we to slightly
- * adapt the functions called to match the linux kernel implementation
- * of zlib. The full zlib license follows:
- *
- * zlib.h -- interface of the 'zlib' general purpose compression library
- * version 1.2.5, April 19th, 2010
- *
- * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
- *
- * This software is provided 'as-is', without any express or implied
- * warranty. In no event will the authors be held liable for any damages
- * arising from the use of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- *
- * 1. The origin of this software must not be misrepresented; you must not
- * claim that you wrote the original software. If you use this software
- * in a product, an acknowledgment in the product documentation would be
- * appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- *
- * Jean-loup Gailly
- * Mark Adler
- */
-
-
-#include <sys/kmem.h>
-#include <sys/kmem_cache.h>
-#include <sys/zmod.h>
-
-static spl_kmem_cache_t *zlib_workspace_cache;
-
-/*
- * A kmem_cache is used for the zlib workspaces to avoid having to vmalloc
- * and vfree for every call. Using a kmem_cache also has the advantage
- * that improves the odds that the memory used will be local to this cpu.
- * To further improve things it might be wise to create a dedicated per-cpu
- * workspace for use. This would take some additional care because we then
- * must disable preemption around the critical section, and verify that
- * zlib_deflate* and zlib_inflate* never internally call schedule().
- */
-static void *
-zlib_workspace_alloc(int flags)
-{
- return (kmem_cache_alloc(zlib_workspace_cache, flags & ~(__GFP_FS)));
-}
-
-static void
-zlib_workspace_free(void *workspace)
-{
- kmem_cache_free(zlib_workspace_cache, workspace);
-}
-
-/*
- * Compresses the source buffer into the destination buffer. The level
- * parameter has the same meaning as in deflateInit. sourceLen is the byte
- * length of the source buffer. Upon entry, destLen is the total size of the
- * destination buffer, which must be at least 0.1% larger than sourceLen plus
- * 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
- *
- * compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
- * memory, Z_BUF_ERROR if there was not enough room in the output buffer,
- * Z_STREAM_ERROR if the level parameter is invalid.
- */
-int
-z_compress_level(void *dest, size_t *destLen, const void *source,
- size_t sourceLen, int level)
-{
- z_stream stream;
- int err;
-
- stream.next_in = (Byte *)source;
- stream.avail_in = (uInt)sourceLen;
- stream.next_out = dest;
- stream.avail_out = (uInt)*destLen;
-
- if ((size_t)stream.avail_out != *destLen)
- return (Z_BUF_ERROR);
-
- stream.workspace = zlib_workspace_alloc(KM_SLEEP);
- if (!stream.workspace)
- return (Z_MEM_ERROR);
-
- err = zlib_deflateInit(&stream, level);
- if (err != Z_OK) {
- zlib_workspace_free(stream.workspace);
- return (err);
- }
-
- err = zlib_deflate(&stream, Z_FINISH);
- if (err != Z_STREAM_END) {
- zlib_deflateEnd(&stream);
- zlib_workspace_free(stream.workspace);
- return (err == Z_OK ? Z_BUF_ERROR : err);
- }
- *destLen = stream.total_out;
-
- err = zlib_deflateEnd(&stream);
- zlib_workspace_free(stream.workspace);
-
- return (err);
-}
-EXPORT_SYMBOL(z_compress_level);
-
-/*
- * Decompresses the source buffer into the destination buffer. sourceLen is
- * the byte length of the source buffer. Upon entry, destLen is the total
- * size of the destination buffer, which must be large enough to hold the
- * entire uncompressed data. (The size of the uncompressed data must have
- * been saved previously by the compressor and transmitted to the decompressor
- * by some mechanism outside the scope of this compression library.)
- * Upon exit, destLen is the actual size of the compressed buffer.
- * This function can be used to decompress a whole file at once if the
- * input file is mmap'ed.
- *
- * uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
- * enough memory, Z_BUF_ERROR if there was not enough room in the output
- * buffer, or Z_DATA_ERROR if the input data was corrupted.
- */
-int
-z_uncompress(void *dest, size_t *destLen, const void *source, size_t sourceLen)
-{
- z_stream stream;
- int err;
-
- stream.next_in = (Byte *)source;
- stream.avail_in = (uInt)sourceLen;
- stream.next_out = dest;
- stream.avail_out = (uInt)*destLen;
-
- if ((size_t)stream.avail_out != *destLen)
- return (Z_BUF_ERROR);
-
- stream.workspace = zlib_workspace_alloc(KM_SLEEP);
- if (!stream.workspace)
- return (Z_MEM_ERROR);
-
- err = zlib_inflateInit(&stream);
- if (err != Z_OK) {
- zlib_workspace_free(stream.workspace);
- return (err);
- }
-
- err = zlib_inflate(&stream, Z_FINISH);
- if (err != Z_STREAM_END) {
- zlib_inflateEnd(&stream);
- zlib_workspace_free(stream.workspace);
-
- if (err == Z_NEED_DICT ||
- (err == Z_BUF_ERROR && stream.avail_in == 0))
- return (Z_DATA_ERROR);
-
- return (err);
- }
- *destLen = stream.total_out;
-
- err = zlib_inflateEnd(&stream);
- zlib_workspace_free(stream.workspace);
-
- return (err);
-}
-EXPORT_SYMBOL(z_uncompress);
-
-int
-spl_zlib_init(void)
-{
- int size;
-
- size = MAX(spl_zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
- zlib_inflate_workspacesize());
-
- zlib_workspace_cache = kmem_cache_create(
- "spl_zlib_workspace_cache",
- size, 0, NULL, NULL, NULL, NULL, NULL,
- KMC_VMEM);
- if (!zlib_workspace_cache)
- return (1);
-
- return (0);
-}
-
-void
-spl_zlib_fini(void)
-{
- kmem_cache_destroy(zlib_workspace_cache);
- zlib_workspace_cache = NULL;
-}