diff options
author | Brian Behlendorf <[email protected]> | 2010-07-26 15:47:55 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2010-07-26 15:47:55 -0700 |
commit | 10129680f860168a61932f0011d9ab864286bfcd (patch) | |
tree | 79a278f1cd24a45b2fac7f96b6a3e3085e0255e6 /include | |
parent | 849c50e7f2487dd3f1dce1417e51dff3a12338d6 (diff) |
Ensure kmem_alloc() and vmem_alloc() never fail
The Solaris semantics for kmem_alloc() and vmem_alloc() are that they
must never fail when called with KM_SLEEP. They may only fail if
called with KM_NOSLEEP otherwise they must block until memory is
available. This is quite different from how the Linux memory
allocators work, under Linux a memory allocation failure is always
possible and must be dealt with.
At one point in the past the kmem code did properly implement this
behavior, however as the code evolved this behavior was overlooked
in places. This patch goes through all three implementations of
the kmem/vmem allocation functions and ensures that they will all
block in the KM_SLEEP case when memory is not available. They
may still fail in the KM_NOSLEEP case in which case the caller
is responsible for handling the failure.
Special care is taken in vmalloc_nofail() to avoid thrashing the
system on the virtual address space spin lock. The down side of
course is if you do see a failure here, which is unlikely for
64-bit systems, your allocation will delay for an entire second.
Still this is preferable to locking up your system and it is the
best we can do given the constraints.
Additionally, the code was cleaned up to be much more readable
and comments were added to describe the various kmem-debug-*
configure options. The default configure options remain:
"--enable-debug-kmem --disable-debug-kmem-tracking"
Diffstat (limited to 'include')
-rw-r--r-- | include/sys/kmem.h | 234 |
1 files changed, 146 insertions, 88 deletions
diff --git a/include/sys/kmem.h b/include/sys/kmem.h index 17b3a2276..e90c6b8ce 100644 --- a/include/sys/kmem.h +++ b/include/sys/kmem.h @@ -87,10 +87,10 @@ kzalloc_nofail(size_t size, gfp_t flags) return ptr; } -#ifdef HAVE_KMALLOC_NODE static inline void * kmalloc_node_nofail(size_t size, gfp_t flags, int node) { +#ifdef HAVE_KMALLOC_NODE void *ptr; do { @@ -98,16 +98,63 @@ kmalloc_node_nofail(size_t size, gfp_t flags, int node) } while (ptr == NULL && (flags & __GFP_WAIT)); return ptr; -} +#else + return kmalloc_nofail(size, flags); #endif /* HAVE_KMALLOC_NODE */ +} + +static inline void * +vmalloc_nofail(size_t size, gfp_t flags) +{ + void *ptr; + + /* + * Retry failed __vmalloc() allocations once every second. The + * rational for the delay is that the likely failure modes are: + * + * 1) The system has completely exhausted memory, in which case + * delaying 1 second for the memory reclaim to run is reasonable + * to avoid thrashing the system. + * 2) The system has memory but has exhausted the small virtual + * address space available on 32-bit systems. Retrying the + * allocation immediately will only result in spinning on the + * virtual address space lock. It is better delay a second and + * hope that another process will free some of the address space. + * But the bottom line is there is not much we can actually do + * since we can never safely return a failure and honor the + * Solaris semantics. + */ + while (1) { + ptr = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL); + if (unlikely((ptr == NULL) && (flags & __GFP_WAIT))) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + } else { + break; + } + } + + return ptr; +} + +static inline void * +vzalloc_nofail(size_t size, gfp_t flags) +{ + void *ptr; + + ptr = vmalloc_nofail(size, flags); + if (ptr) + memset(ptr, 0, (size)); + + return ptr; +} #ifdef DEBUG_KMEM -# ifdef HAVE_ATOMIC64_T -extern atomic64_t kmem_alloc_used; -extern unsigned long long kmem_alloc_max; -extern atomic64_t vmem_alloc_used; -extern unsigned long long vmem_alloc_max; +/* + * Memory accounting functions to be used only when DEBUG_KMEM is set. + */ +# ifdef HAVE_ATOMIC64_T # define kmem_alloc_used_add(size) atomic64_add(size, &kmem_alloc_used) # define kmem_alloc_used_sub(size) atomic64_sub(size, &kmem_alloc_used) @@ -118,13 +165,13 @@ extern unsigned long long vmem_alloc_max; # define vmem_alloc_used_read() atomic64_read(&vmem_alloc_used) # define vmem_alloc_used_set(size) atomic64_set(&vmem_alloc_used, size) -# else - -extern atomic_t kmem_alloc_used; +extern atomic64_t kmem_alloc_used; extern unsigned long long kmem_alloc_max; -extern atomic_t vmem_alloc_used; +extern atomic64_t vmem_alloc_used; extern unsigned long long vmem_alloc_max; +# else /* HAVE_ATOMIC64_T */ + # define kmem_alloc_used_add(size) atomic_add(size, &kmem_alloc_used) # define kmem_alloc_used_sub(size) atomic_sub(size, &kmem_alloc_used) # define kmem_alloc_used_read() atomic_read(&kmem_alloc_used) @@ -134,90 +181,107 @@ extern unsigned long long vmem_alloc_max; # define vmem_alloc_used_read() atomic_read(&vmem_alloc_used) # define vmem_alloc_used_set(size) atomic_set(&vmem_alloc_used, size) -# endif /* _LP64 */ - -# define kmem_alloc(size, flags) __kmem_alloc((size), (flags), 0, 0) -# define kmem_zalloc(size, flags) __kmem_alloc((size), ((flags) | \ - __GFP_ZERO), 0, 0) - -/* The node alloc functions are only used by the SPL code itself */ -# ifdef HAVE_KMALLOC_NODE -# define kmem_alloc_node(size, flags, node) __kmem_alloc((size), (flags), 1, \ - node) -# else -# define kmem_alloc_node(size, flags, node) __kmem_alloc((size), (flags), 0, 0) -# endif +extern atomic_t kmem_alloc_used; +extern unsigned long long kmem_alloc_max; +extern atomic_t vmem_alloc_used; +extern unsigned long long vmem_alloc_max; -# define vmem_zalloc(size, flags) vmem_alloc((size), ((flags) | \ - __GFP_ZERO)) +# endif /* HAVE_ATOMIC64_T */ # ifdef DEBUG_KMEM_TRACKING - -extern void *kmem_alloc_track(size_t size, int flags, const char *func, - int line, int node_alloc, int node); -extern void kmem_free_track(void *ptr, size_t size); -extern void *vmem_alloc_track(size_t size, int flags, const char *func, - int line); -extern void vmem_free_track(void *ptr, size_t size); - -# define __kmem_alloc(size, flags, na, node) kmem_alloc_track((size), \ - (flags), __FUNCTION__, \ - __LINE__, (na), (node)) -# define kmem_free(ptr, size) kmem_free_track((ptr), (size)) -# define vmem_alloc(size, flags) vmem_alloc_track((size), \ - (flags),__FUNCTION__, \ - __LINE__) -# define vmem_free(ptr, size) vmem_free_track((ptr), (size)) +/* + * DEBUG_KMEM && DEBUG_KMEM_TRACKING + * + * The maximum level of memory debugging. All memory will be accounted + * for and each allocation will be explicitly tracked. Any allocation + * which is leaked will be reported on module unload and the exact location + * where that memory was allocation will be reported. This level of memory + * tracking will have a significant impact on performance and should only + * be enabled for debugging. This feature may be enabled by passing + * --enable-debug-kmem-tracking to configure. + */ +# define kmem_alloc(sz, fl) kmem_alloc_track((sz), (fl), \ + __FUNCTION__, __LINE__, 0, 0) +# define kmem_zalloc(sz, fl) kmem_alloc_track((sz), (fl)|__GFP_ZERO,\ + __FUNCTION__, __LINE__, 0, 0) +# define kmem_alloc_node(sz, fl, nd) kmem_alloc_track((sz), (fl), \ + __FUNCTION__, __LINE__, 1, nd) +# define kmem_free(ptr, sz) kmem_free_track((ptr), (sz)) + +# define vmem_alloc(sz, fl) vmem_alloc_track((sz), (fl), \ + __FUNCTION__, __LINE__) +# define vmem_zalloc(sz, fl) vmem_alloc_track((sz), (fl)|__GFP_ZERO,\ + __FUNCTION__, __LINE__) +# define vmem_free(ptr, sz) vmem_free_track((ptr), (sz)) + +extern void *kmem_alloc_track(size_t, int, const char *, int, int, int); +extern void kmem_free_track(void *, size_t); +extern void *vmem_alloc_track(size_t, int, const char *, int); +extern void vmem_free_track(void *, size_t); # else /* DEBUG_KMEM_TRACKING */ - -extern void *kmem_alloc_debug(size_t size, int flags, const char *func, - int line, int node_alloc, int node); -extern void kmem_free_debug(void *ptr, size_t size); -extern void *vmem_alloc_debug(size_t size, int flags, const char *func, - int line); -extern void vmem_free_debug(void *ptr, size_t size); - -# define __kmem_alloc(size, flags, na, node) kmem_alloc_debug((size), \ - (flags), __FUNCTION__, \ - __LINE__, (na), (node)) -# define kmem_free(ptr, size) kmem_free_debug((ptr), (size)) -# define vmem_alloc(size, flags) vmem_alloc_debug((size), \ - (flags), __FUNCTION__, \ - __LINE__) -# define vmem_free(ptr, size) vmem_free_debug((ptr), (size)) +/* + * DEBUG_KMEM && !DEBUG_KMEM_TRACKING + * + * The default build will set DEBUG_KEM. This provides basic memory + * accounting with little to no impact on performance. When the module + * is unloaded in any memory was leaked the total number of leaked bytes + * will be reported on the console. To disable this basic accounting + * pass the --disable-debug-kmem option to configure. + */ +# define kmem_alloc(sz, fl) kmem_alloc_debug((sz), (fl), \ + __FUNCTION__, __LINE__, 0, 0) +# define kmem_zalloc(sz, fl) kmem_alloc_debug((sz), (fl)|__GFP_ZERO,\ + __FUNCTION__, __LINE__, 0, 0) +# define kmem_alloc_node(sz, fl, nd) kmem_alloc_debug((sz), (fl), \ + __FUNCTION__, __LINE__, 1, nd) +# define kmem_free(ptr, sz) kmem_free_debug((ptr), (sz)) + +# define vmem_alloc(sz, fl) vmem_alloc_debug((sz), (fl), \ + __FUNCTION__, __LINE__) +# define vmem_zalloc(sz, fl) vmem_alloc_debug((sz), (fl)|__GFP_ZERO,\ + __FUNCTION__, __LINE__) +# define vmem_free(ptr, sz) vmem_free_debug((ptr), (sz)) + +extern void *kmem_alloc_debug(size_t, int, const char *, int, int, int); +extern void kmem_free_debug(void *, size_t); +extern void *vmem_alloc_debug(size_t, int, const char *, int); +extern void vmem_free_debug(void *, size_t); # endif /* DEBUG_KMEM_TRACKING */ - #else /* DEBUG_KMEM */ +/* + * !DEBUG_KMEM && !DEBUG_KMEM_TRACKING + * + * All debugging is disabled. There will be no overhead even for + * minimal memory accounting. To enable basic accounting pass the + * --enable-debug-kmem option to configure. + */ +# define kmem_alloc(sz, fl) kmalloc_nofail((sz), (fl)) +# define kmem_zalloc(sz, fl) kzalloc_nofail((sz), (fl)) +# define kmem_alloc_node(sz, fl, nd) kmalloc_node_nofail((sz), (fl), (nd)) +# define kmem_free(ptr, sz) ((void)(sz), kfree(ptr)) -# define kmem_alloc(size, flags) kmalloc_nofail((size), (flags)) -# define kmem_zalloc(size, flags) kzalloc_nofail((size), (flags)) -# define kmem_free(ptr, size) ((void)(size), kfree(ptr)) - -# ifdef HAVE_KMALLOC_NODE -# define kmem_alloc_node(size, flags, node) \ - kmalloc_node_nofail((size), (flags), (node)) -# else -# define kmem_alloc_node(size, flags, node) \ - kmalloc_nofail((size), (flags)) -# endif - -# define vmem_alloc(size, flags) __vmalloc((size), ((flags) | \ - __GFP_HIGHMEM), PAGE_KERNEL) -# define vmem_zalloc(size, flags) \ -({ \ - void *_ptr_ = __vmalloc((size),((flags)|__GFP_HIGHMEM),PAGE_KERNEL); \ - if (_ptr_) \ - memset(_ptr_, 0, (size)); \ - _ptr_; \ -}) -# define vmem_free(ptr, size) ((void)(size), vfree(ptr)) +# define vmem_alloc(sz, fl) vmalloc_nofail((sz), (fl)) +# define vmem_zalloc(sz, fl) vzalloc_nofail((sz), (fl)) +# define vmem_free(ptr, sz) ((void)(sz), vfree(ptr)) #endif /* DEBUG_KMEM */ +extern int kmem_debugging(void); +extern char *kmem_vasprintf(const char *fmt, va_list ap); +extern char *kmem_asprintf(const char *fmt, ...); +extern char *strdup(const char *str); +extern void strfree(char *str); + + /* - * Slab allocation interfaces + * Slab allocation interfaces. The SPL slab differs from the standard + * Linux SLAB or SLUB primarily in that each cache may be backed by slabs + * allocated from the physical or virtal memory address space. The virtual + * slabs allow for good behavior when allocation large objects of identical + * size. This slab implementation also supports both constructors and + * destructions which the Linux slab does not. */ enum { KMC_BIT_NOTOUCH = 0, /* Don't update ages */ @@ -246,12 +310,6 @@ enum { #define KMC_REAP_CHUNK INT_MAX #define KMC_DEFAULT_SEEKS 1 -extern int kmem_debugging(void); -extern char *kmem_vasprintf(const char *fmt, va_list ap); -extern char *kmem_asprintf(const char *fmt, ...); -#define strfree(str) kfree(str) -#define strdup(str) kstrdup(str, GFP_KERNEL) - extern struct list_head spl_kmem_cache_list; extern struct rw_semaphore spl_kmem_cache_sem; |