diff options
-rw-r--r-- | include/sys/kmem.h | 234 | ||||
-rw-r--r-- | module/spl/spl-kmem.c | 248 |
2 files changed, 289 insertions, 193 deletions
diff --git a/include/sys/kmem.h b/include/sys/kmem.h index 17b3a2276..e90c6b8ce 100644 --- a/include/sys/kmem.h +++ b/include/sys/kmem.h @@ -87,10 +87,10 @@ kzalloc_nofail(size_t size, gfp_t flags) return ptr; } -#ifdef HAVE_KMALLOC_NODE static inline void * kmalloc_node_nofail(size_t size, gfp_t flags, int node) { +#ifdef HAVE_KMALLOC_NODE void *ptr; do { @@ -98,16 +98,63 @@ kmalloc_node_nofail(size_t size, gfp_t flags, int node) } while (ptr == NULL && (flags & __GFP_WAIT)); return ptr; -} +#else + return kmalloc_nofail(size, flags); #endif /* HAVE_KMALLOC_NODE */ +} + +static inline void * +vmalloc_nofail(size_t size, gfp_t flags) +{ + void *ptr; + + /* + * Retry failed __vmalloc() allocations once every second. The + * rational for the delay is that the likely failure modes are: + * + * 1) The system has completely exhausted memory, in which case + * delaying 1 second for the memory reclaim to run is reasonable + * to avoid thrashing the system. + * 2) The system has memory but has exhausted the small virtual + * address space available on 32-bit systems. Retrying the + * allocation immediately will only result in spinning on the + * virtual address space lock. It is better delay a second and + * hope that another process will free some of the address space. + * But the bottom line is there is not much we can actually do + * since we can never safely return a failure and honor the + * Solaris semantics. + */ + while (1) { + ptr = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL); + if (unlikely((ptr == NULL) && (flags & __GFP_WAIT))) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + } else { + break; + } + } + + return ptr; +} + +static inline void * +vzalloc_nofail(size_t size, gfp_t flags) +{ + void *ptr; + + ptr = vmalloc_nofail(size, flags); + if (ptr) + memset(ptr, 0, (size)); + + return ptr; +} #ifdef DEBUG_KMEM -# ifdef HAVE_ATOMIC64_T -extern atomic64_t kmem_alloc_used; -extern unsigned long long kmem_alloc_max; -extern atomic64_t vmem_alloc_used; -extern unsigned long long vmem_alloc_max; +/* + * Memory accounting functions to be used only when DEBUG_KMEM is set. + */ +# ifdef HAVE_ATOMIC64_T # define kmem_alloc_used_add(size) atomic64_add(size, &kmem_alloc_used) # define kmem_alloc_used_sub(size) atomic64_sub(size, &kmem_alloc_used) @@ -118,13 +165,13 @@ extern unsigned long long vmem_alloc_max; # define vmem_alloc_used_read() atomic64_read(&vmem_alloc_used) # define vmem_alloc_used_set(size) atomic64_set(&vmem_alloc_used, size) -# else - -extern atomic_t kmem_alloc_used; +extern atomic64_t kmem_alloc_used; extern unsigned long long kmem_alloc_max; -extern atomic_t vmem_alloc_used; +extern atomic64_t vmem_alloc_used; extern unsigned long long vmem_alloc_max; +# else /* HAVE_ATOMIC64_T */ + # define kmem_alloc_used_add(size) atomic_add(size, &kmem_alloc_used) # define kmem_alloc_used_sub(size) atomic_sub(size, &kmem_alloc_used) # define kmem_alloc_used_read() atomic_read(&kmem_alloc_used) @@ -134,90 +181,107 @@ extern unsigned long long vmem_alloc_max; # define vmem_alloc_used_read() atomic_read(&vmem_alloc_used) # define vmem_alloc_used_set(size) atomic_set(&vmem_alloc_used, size) -# endif /* _LP64 */ - -# define kmem_alloc(size, flags) __kmem_alloc((size), (flags), 0, 0) -# define kmem_zalloc(size, flags) __kmem_alloc((size), ((flags) | \ - __GFP_ZERO), 0, 0) - -/* The node alloc functions are only used by the SPL code itself */ -# ifdef HAVE_KMALLOC_NODE -# define kmem_alloc_node(size, flags, node) __kmem_alloc((size), (flags), 1, \ - node) -# else -# define kmem_alloc_node(size, flags, node) __kmem_alloc((size), (flags), 0, 0) -# endif +extern atomic_t kmem_alloc_used; +extern unsigned long long kmem_alloc_max; +extern atomic_t vmem_alloc_used; +extern unsigned long long vmem_alloc_max; -# define vmem_zalloc(size, flags) vmem_alloc((size), ((flags) | \ - __GFP_ZERO)) +# endif /* HAVE_ATOMIC64_T */ # ifdef DEBUG_KMEM_TRACKING - -extern void *kmem_alloc_track(size_t size, int flags, const char *func, - int line, int node_alloc, int node); -extern void kmem_free_track(void *ptr, size_t size); -extern void *vmem_alloc_track(size_t size, int flags, const char *func, - int line); -extern void vmem_free_track(void *ptr, size_t size); - -# define __kmem_alloc(size, flags, na, node) kmem_alloc_track((size), \ - (flags), __FUNCTION__, \ - __LINE__, (na), (node)) -# define kmem_free(ptr, size) kmem_free_track((ptr), (size)) -# define vmem_alloc(size, flags) vmem_alloc_track((size), \ - (flags),__FUNCTION__, \ - __LINE__) -# define vmem_free(ptr, size) vmem_free_track((ptr), (size)) +/* + * DEBUG_KMEM && DEBUG_KMEM_TRACKING + * + * The maximum level of memory debugging. All memory will be accounted + * for and each allocation will be explicitly tracked. Any allocation + * which is leaked will be reported on module unload and the exact location + * where that memory was allocation will be reported. This level of memory + * tracking will have a significant impact on performance and should only + * be enabled for debugging. This feature may be enabled by passing + * --enable-debug-kmem-tracking to configure. + */ +# define kmem_alloc(sz, fl) kmem_alloc_track((sz), (fl), \ + __FUNCTION__, __LINE__, 0, 0) +# define kmem_zalloc(sz, fl) kmem_alloc_track((sz), (fl)|__GFP_ZERO,\ + __FUNCTION__, __LINE__, 0, 0) +# define kmem_alloc_node(sz, fl, nd) kmem_alloc_track((sz), (fl), \ + __FUNCTION__, __LINE__, 1, nd) +# define kmem_free(ptr, sz) kmem_free_track((ptr), (sz)) + +# define vmem_alloc(sz, fl) vmem_alloc_track((sz), (fl), \ + __FUNCTION__, __LINE__) +# define vmem_zalloc(sz, fl) vmem_alloc_track((sz), (fl)|__GFP_ZERO,\ + __FUNCTION__, __LINE__) +# define vmem_free(ptr, sz) vmem_free_track((ptr), (sz)) + +extern void *kmem_alloc_track(size_t, int, const char *, int, int, int); +extern void kmem_free_track(void *, size_t); +extern void *vmem_alloc_track(size_t, int, const char *, int); +extern void vmem_free_track(void *, size_t); # else /* DEBUG_KMEM_TRACKING */ - -extern void *kmem_alloc_debug(size_t size, int flags, const char *func, - int line, int node_alloc, int node); -extern void kmem_free_debug(void *ptr, size_t size); -extern void *vmem_alloc_debug(size_t size, int flags, const char *func, - int line); -extern void vmem_free_debug(void *ptr, size_t size); - -# define __kmem_alloc(size, flags, na, node) kmem_alloc_debug((size), \ - (flags), __FUNCTION__, \ - __LINE__, (na), (node)) -# define kmem_free(ptr, size) kmem_free_debug((ptr), (size)) -# define vmem_alloc(size, flags) vmem_alloc_debug((size), \ - (flags), __FUNCTION__, \ - __LINE__) -# define vmem_free(ptr, size) vmem_free_debug((ptr), (size)) +/* + * DEBUG_KMEM && !DEBUG_KMEM_TRACKING + * + * The default build will set DEBUG_KEM. This provides basic memory + * accounting with little to no impact on performance. When the module + * is unloaded in any memory was leaked the total number of leaked bytes + * will be reported on the console. To disable this basic accounting + * pass the --disable-debug-kmem option to configure. + */ +# define kmem_alloc(sz, fl) kmem_alloc_debug((sz), (fl), \ + __FUNCTION__, __LINE__, 0, 0) +# define kmem_zalloc(sz, fl) kmem_alloc_debug((sz), (fl)|__GFP_ZERO,\ + __FUNCTION__, __LINE__, 0, 0) +# define kmem_alloc_node(sz, fl, nd) kmem_alloc_debug((sz), (fl), \ + __FUNCTION__, __LINE__, 1, nd) +# define kmem_free(ptr, sz) kmem_free_debug((ptr), (sz)) + +# define vmem_alloc(sz, fl) vmem_alloc_debug((sz), (fl), \ + __FUNCTION__, __LINE__) +# define vmem_zalloc(sz, fl) vmem_alloc_debug((sz), (fl)|__GFP_ZERO,\ + __FUNCTION__, __LINE__) +# define vmem_free(ptr, sz) vmem_free_debug((ptr), (sz)) + +extern void *kmem_alloc_debug(size_t, int, const char *, int, int, int); +extern void kmem_free_debug(void *, size_t); +extern void *vmem_alloc_debug(size_t, int, const char *, int); +extern void vmem_free_debug(void *, size_t); # endif /* DEBUG_KMEM_TRACKING */ - #else /* DEBUG_KMEM */ +/* + * !DEBUG_KMEM && !DEBUG_KMEM_TRACKING + * + * All debugging is disabled. There will be no overhead even for + * minimal memory accounting. To enable basic accounting pass the + * --enable-debug-kmem option to configure. + */ +# define kmem_alloc(sz, fl) kmalloc_nofail((sz), (fl)) +# define kmem_zalloc(sz, fl) kzalloc_nofail((sz), (fl)) +# define kmem_alloc_node(sz, fl, nd) kmalloc_node_nofail((sz), (fl), (nd)) +# define kmem_free(ptr, sz) ((void)(sz), kfree(ptr)) -# define kmem_alloc(size, flags) kmalloc_nofail((size), (flags)) -# define kmem_zalloc(size, flags) kzalloc_nofail((size), (flags)) -# define kmem_free(ptr, size) ((void)(size), kfree(ptr)) - -# ifdef HAVE_KMALLOC_NODE -# define kmem_alloc_node(size, flags, node) \ - kmalloc_node_nofail((size), (flags), (node)) -# else -# define kmem_alloc_node(size, flags, node) \ - kmalloc_nofail((size), (flags)) -# endif - -# define vmem_alloc(size, flags) __vmalloc((size), ((flags) | \ - __GFP_HIGHMEM), PAGE_KERNEL) -# define vmem_zalloc(size, flags) \ -({ \ - void *_ptr_ = __vmalloc((size),((flags)|__GFP_HIGHMEM),PAGE_KERNEL); \ - if (_ptr_) \ - memset(_ptr_, 0, (size)); \ - _ptr_; \ -}) -# define vmem_free(ptr, size) ((void)(size), vfree(ptr)) +# define vmem_alloc(sz, fl) vmalloc_nofail((sz), (fl)) +# define vmem_zalloc(sz, fl) vzalloc_nofail((sz), (fl)) +# define vmem_free(ptr, sz) ((void)(sz), vfree(ptr)) #endif /* DEBUG_KMEM */ +extern int kmem_debugging(void); +extern char *kmem_vasprintf(const char *fmt, va_list ap); +extern char *kmem_asprintf(const char *fmt, ...); +extern char *strdup(const char *str); +extern void strfree(char *str); + + /* - * Slab allocation interfaces + * Slab allocation interfaces. The SPL slab differs from the standard + * Linux SLAB or SLUB primarily in that each cache may be backed by slabs + * allocated from the physical or virtal memory address space. The virtual + * slabs allow for good behavior when allocation large objects of identical + * size. This slab implementation also supports both constructors and + * destructions which the Linux slab does not. */ enum { KMC_BIT_NOTOUCH = 0, /* Don't update ages */ @@ -246,12 +310,6 @@ enum { #define KMC_REAP_CHUNK INT_MAX #define KMC_DEFAULT_SEEKS 1 -extern int kmem_debugging(void); -extern char *kmem_vasprintf(const char *fmt, va_list ap); -extern char *kmem_asprintf(const char *fmt, ...); -#define strfree(str) kfree(str) -#define strdup(str) kstrdup(str, GFP_KERNEL) - extern struct list_head spl_kmem_cache_list; extern struct rw_semaphore spl_kmem_cache_sem; diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index e575b1ee9..ec1ccb4ce 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -271,6 +271,34 @@ kmem_asprintf(const char *fmt, ...) } EXPORT_SYMBOL(kmem_asprintf); +static char * +__strdup(const char *str, int flags) +{ + char *ptr; + int n; + + n = strlen(str); + ptr = kmalloc_nofail(n + 1, flags); + if (ptr) + memcpy(ptr, str, n + 1); + + return ptr; +} + +char * +strdup(const char *str) +{ + return __strdup(str, KM_SLEEP); +} +EXPORT_SYMBOL(strdup); + +void +strfree(char *str) +{ + kmem_free(str, strlen(str) + 1); +} +EXPORT_SYMBOL(strfree); + /* * Memory allocation interfaces and debugging for basic kmem_* * and vmem_* style memory allocation. When DEBUG_KMEM is enabled @@ -285,12 +313,12 @@ atomic64_t kmem_alloc_used = ATOMIC64_INIT(0); unsigned long long kmem_alloc_max = 0; atomic64_t vmem_alloc_used = ATOMIC64_INIT(0); unsigned long long vmem_alloc_max = 0; -# else +# else /* HAVE_ATOMIC64_T */ atomic_t kmem_alloc_used = ATOMIC_INIT(0); unsigned long long kmem_alloc_max = 0; atomic_t vmem_alloc_used = ATOMIC_INIT(0); unsigned long long vmem_alloc_max = 0; -# endif /* _LP64 */ +# endif /* HAVE_ATOMIC64_T */ EXPORT_SYMBOL(kmem_alloc_used); EXPORT_SYMBOL(kmem_alloc_max); @@ -340,77 +368,9 @@ EXPORT_SYMBOL(kmem_list); EXPORT_SYMBOL(vmem_lock); EXPORT_SYMBOL(vmem_table); EXPORT_SYMBOL(vmem_list); -# endif -#endif - -/* - * Slab allocation interfaces - * - * While the Linux slab implementation was inspired by the Solaris - * implemenation I cannot use it to emulate the Solaris APIs. I - * require two features which are not provided by the Linux slab. - * - * 1) Constructors AND destructors. Recent versions of the Linux - * kernel have removed support for destructors. This is a deal - * breaker for the SPL which contains particularly expensive - * initializers for mutex's, condition variables, etc. We also - * require a minimal level of cleanup for these data types unlike - * many Linux data type which do need to be explicitly destroyed. - * - * 2) Virtual address space backed slab. Callers of the Solaris slab - * expect it to work well for both small are very large allocations. - * Because of memory fragmentation the Linux slab which is backed - * by kmalloc'ed memory performs very badly when confronted with - * large numbers of large allocations. Basing the slab on the - * virtual address space removes the need for contigeous pages - * and greatly improve performance for large allocations. - * - * For these reasons, the SPL has its own slab implementation with - * the needed features. It is not as highly optimized as either the - * Solaris or Linux slabs, but it should get me most of what is - * needed until it can be optimized or obsoleted by another approach. - * - * One serious concern I do have about this method is the relatively - * small virtual address space on 32bit arches. This will seriously - * constrain the size of the slab caches and their performance. - * - * XXX: Improve the partial slab list by carefully maintaining a - * strict ordering of fullest to emptiest slabs based on - * the slab reference count. This gaurentees the when freeing - * slabs back to the system we need only linearly traverse the - * last N slabs in the list to discover all the freeable slabs. - * - * XXX: NUMA awareness for optionally allocating memory close to a - * particular core. This can be adventageous if you know the slab - * object will be short lived and primarily accessed from one core. - * - * XXX: Slab coloring may also yield performance improvements and would - * be desirable to implement. - */ - -struct list_head spl_kmem_cache_list; /* List of caches */ -struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ - -static int spl_cache_flush(spl_kmem_cache_t *skc, - spl_kmem_magazine_t *skm, int flush); - -#ifdef HAVE_SET_SHRINKER -static struct shrinker *spl_kmem_cache_shrinker; -#else -static int spl_kmem_cache_generic_shrinker(int nr_to_scan, - unsigned int gfp_mask); -static struct shrinker spl_kmem_cache_shrinker = { - .shrink = spl_kmem_cache_generic_shrinker, - .seeks = KMC_DEFAULT_SEEKS, -}; -#endif - -#ifdef DEBUG_KMEM -# ifdef DEBUG_KMEM_TRACKING static kmem_debug_t * -kmem_del_init(spinlock_t *lock, struct hlist_head *table, int bits, - void *addr) +kmem_del_init(spinlock_t *lock, struct hlist_head *table, int bits, void *addr) { struct hlist_head *head; struct hlist_node *node; @@ -444,17 +404,20 @@ kmem_alloc_track(size_t size, int flags, const char *func, int line, unsigned long irq_flags; SENTRY; + /* Function may be called with KM_NOSLEEP so failure is possible */ dptr = (kmem_debug_t *) kmalloc_nofail(sizeof(kmem_debug_t), flags & ~__GFP_ZERO); - if (dptr == NULL) { + if (unlikely(dptr == NULL)) { SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "debug " "kmem_alloc(%ld, 0x%x) at %s:%d failed (%lld/%llu)\n", sizeof(kmem_debug_t), flags, func, line, kmem_alloc_used_read(), kmem_alloc_max); } else { - /* Marked unlikely because we should never be doing this, - * we tolerate to up 2 pages but a single page is best. */ + /* + * Marked unlikely because we should never be doing this, + * we tolerate to up 2 pages but a single page is best. + */ if (unlikely((size > PAGE_SIZE*2) && !(flags & KM_NODEBUG))) { SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "large " "kmem_alloc(%llu, 0x%x) at %s:%d (%lld/%llu)\n", @@ -463,14 +426,17 @@ kmem_alloc_track(size_t size, int flags, const char *func, int line, spl_debug_dumpstack(NULL); } - /* We use kstrdup() below because the string pointed to by + /* + * We use __strdup() below because the string pointed to by * __FUNCTION__ might not be available by the time we want - * to print it since the module might have been unloaded. */ - dptr->kd_func = kstrdup(func, flags & ~__GFP_ZERO); + * to print it since the module might have been unloaded. + * This can only fail in the KM_NOSLEEP case. + */ + dptr->kd_func = __strdup(func, flags & ~__GFP_ZERO); if (unlikely(dptr->kd_func == NULL)) { kfree(dptr); SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, - "debug kstrdup() at %s:%d failed (%lld/%llu)\n", + "debug __strdup() at %s:%d failed (%lld/%llu)\n", func, line, kmem_alloc_used_read(), kmem_alloc_max); goto out; } @@ -533,7 +499,8 @@ kmem_free_track(void *ptr, size_t size) dptr = kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr); - ASSERT(dptr); /* Must exist in hash due to kmem_alloc() */ + /* Must exist in hash due to kmem_alloc() */ + ASSERT(dptr); /* Size must match */ ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), " @@ -567,28 +534,37 @@ vmem_alloc_track(size_t size, int flags, const char *func, int line) ASSERT(flags & KM_SLEEP); + /* Function may be called with KM_NOSLEEP so failure is possible */ dptr = (kmem_debug_t *) kmalloc_nofail(sizeof(kmem_debug_t), flags & ~__GFP_ZERO); - if (dptr == NULL) { + if (unlikely(dptr == NULL)) { SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "debug " "vmem_alloc(%ld, 0x%x) at %s:%d failed (%lld/%llu)\n", sizeof(kmem_debug_t), flags, func, line, vmem_alloc_used_read(), vmem_alloc_max); } else { - /* We use kstrdup() below because the string pointed to by + /* + * We use __strdup() below because the string pointed to by * __FUNCTION__ might not be available by the time we want - * to print it, since the module might have been unloaded. */ - dptr->kd_func = kstrdup(func, flags & ~__GFP_ZERO); + * to print it, since the module might have been unloaded. + * This can never fail because we have already asserted + * that flags is KM_SLEEP. + */ + dptr->kd_func = __strdup(func, flags & ~__GFP_ZERO); if (unlikely(dptr->kd_func == NULL)) { kfree(dptr); SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, - "debug kstrdup() at %s:%d failed (%lld/%llu)\n", + "debug __strdup() at %s:%d failed (%lld/%llu)\n", func, line, vmem_alloc_used_read(), vmem_alloc_max); goto out; } - ptr = __vmalloc(size, (flags | __GFP_HIGHMEM) & ~__GFP_ZERO, - PAGE_KERNEL); + /* Use the correct allocator */ + if (flags & __GFP_ZERO) { + ptr = vzalloc_nofail(size, flags & ~__GFP_ZERO); + } else { + ptr = vmalloc_nofail(size, flags); + } if (unlikely(ptr == NULL)) { kfree(dptr->kd_func); @@ -600,9 +576,6 @@ vmem_alloc_track(size_t size, int flags, const char *func, int line) goto out; } - if (flags & __GFP_ZERO) - memset(ptr, 0, size); - vmem_alloc_used_add(size); if (unlikely(vmem_alloc_used_read() > vmem_alloc_max)) vmem_alloc_max = vmem_alloc_used_read(); @@ -640,7 +613,9 @@ vmem_free_track(void *ptr, size_t size) (unsigned long long) size); dptr = kmem_del_init(&vmem_lock, vmem_table, VMEM_HASH_BITS, ptr); - ASSERT(dptr); /* Must exist in hash due to vmem_alloc() */ + + /* Must exist in hash due to vmem_alloc() */ + ASSERT(dptr); /* Size must match */ ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), " @@ -673,11 +648,13 @@ kmem_alloc_debug(size_t size, int flags, const char *func, int line, void *ptr; SENTRY; - /* Marked unlikely because we should never be doing this, - * we tolerate to up 2 pages but a single page is best. */ + /* + * Marked unlikely because we should never be doing this, + * we tolerate to up 2 pages but a single page is best. + */ if (unlikely((size > PAGE_SIZE * 2) && !(flags & KM_NODEBUG))) { SDEBUG(SD_CONSOLE | SD_WARNING, - "Large kmem_alloc(%llu, 0x%x) at %s:%d (%lld/%llu)\n", + "large kmem_alloc(%llu, 0x%x) at %s:%d (%lld/%llu)\n", (unsigned long long) size, flags, func, line, kmem_alloc_used_read(), kmem_alloc_max); spl_debug_dumpstack(NULL); @@ -693,7 +670,7 @@ kmem_alloc_debug(size_t size, int flags, const char *func, int line, ptr = kmalloc_nofail(size, flags); } - if (ptr == NULL) { + if (unlikely(ptr == NULL)) { SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "kmem_alloc(%llu, 0x%x) at %s:%d failed (%lld/%llu)\n", (unsigned long long) size, flags, func, line, @@ -706,8 +683,9 @@ kmem_alloc_debug(size_t size, int flags, const char *func, int line, SDEBUG_LIMIT(SD_INFO, "kmem_alloc(%llu, 0x%x) at %s:%d = %p (%lld/%llu)\n", (unsigned long long) size, flags, func, line, ptr, - kmem_alloc_used_read(), kmem_alloc_max); + kmem_alloc_used_read(), kmem_alloc_max); } + SRETURN(ptr); } EXPORT_SYMBOL(kmem_alloc_debug); @@ -724,8 +702,6 @@ kmem_free_debug(void *ptr, size_t size) SDEBUG_LIMIT(SD_INFO, "kmem_free(%p, %llu) (%lld/%llu)\n", ptr, (unsigned long long) size, kmem_alloc_used_read(), kmem_alloc_max); - - memset(ptr, 0x5a, size); kfree(ptr); SEXIT; @@ -740,17 +716,19 @@ vmem_alloc_debug(size_t size, int flags, const char *func, int line) ASSERT(flags & KM_SLEEP); - ptr = __vmalloc(size, (flags | __GFP_HIGHMEM) & ~__GFP_ZERO, - PAGE_KERNEL); - if (ptr == NULL) { + /* Use the correct allocator */ + if (flags & __GFP_ZERO) { + ptr = vzalloc_nofail(size, flags & (~__GFP_ZERO)); + } else { + ptr = vmalloc_nofail(size, flags); + } + + if (unlikely(ptr == NULL)) { SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "vmem_alloc(%llu, 0x%x) at %s:%d failed (%lld/%llu)\n", (unsigned long long) size, flags, func, line, vmem_alloc_used_read(), vmem_alloc_max); } else { - if (flags & __GFP_ZERO) - memset(ptr, 0, size); - vmem_alloc_used_add(size); if (unlikely(vmem_alloc_used_read() > vmem_alloc_max)) vmem_alloc_max = vmem_alloc_used_read(); @@ -776,8 +754,6 @@ vmem_free_debug(void *ptr, size_t size) SDEBUG_LIMIT(SD_INFO, "vmem_free(%p, %llu) (%lld/%llu)\n", ptr, (unsigned long long) size, vmem_alloc_used_read(), vmem_alloc_max); - - memset(ptr, 0x5a, size); vfree(ptr); SEXIT; @@ -787,6 +763,68 @@ EXPORT_SYMBOL(vmem_free_debug); # endif /* DEBUG_KMEM_TRACKING */ #endif /* DEBUG_KMEM */ +/* + * Slab allocation interfaces + * + * While the Linux slab implementation was inspired by the Solaris + * implemenation I cannot use it to emulate the Solaris APIs. I + * require two features which are not provided by the Linux slab. + * + * 1) Constructors AND destructors. Recent versions of the Linux + * kernel have removed support for destructors. This is a deal + * breaker for the SPL which contains particularly expensive + * initializers for mutex's, condition variables, etc. We also + * require a minimal level of cleanup for these data types unlike + * many Linux data type which do need to be explicitly destroyed. + * + * 2) Virtual address space backed slab. Callers of the Solaris slab + * expect it to work well for both small are very large allocations. + * Because of memory fragmentation the Linux slab which is backed + * by kmalloc'ed memory performs very badly when confronted with + * large numbers of large allocations. Basing the slab on the + * virtual address space removes the need for contigeous pages + * and greatly improve performance for large allocations. + * + * For these reasons, the SPL has its own slab implementation with + * the needed features. It is not as highly optimized as either the + * Solaris or Linux slabs, but it should get me most of what is + * needed until it can be optimized or obsoleted by another approach. + * + * One serious concern I do have about this method is the relatively + * small virtual address space on 32bit arches. This will seriously + * constrain the size of the slab caches and their performance. + * + * XXX: Improve the partial slab list by carefully maintaining a + * strict ordering of fullest to emptiest slabs based on + * the slab reference count. This gaurentees the when freeing + * slabs back to the system we need only linearly traverse the + * last N slabs in the list to discover all the freeable slabs. + * + * XXX: NUMA awareness for optionally allocating memory close to a + * particular core. This can be adventageous if you know the slab + * object will be short lived and primarily accessed from one core. + * + * XXX: Slab coloring may also yield performance improvements and would + * be desirable to implement. + */ + +struct list_head spl_kmem_cache_list; /* List of caches */ +struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ + +static int spl_cache_flush(spl_kmem_cache_t *skc, + spl_kmem_magazine_t *skm, int flush); + +#ifdef HAVE_SET_SHRINKER +static struct shrinker *spl_kmem_cache_shrinker; +#else +static int spl_kmem_cache_generic_shrinker(int nr_to_scan, + unsigned int gfp_mask); +static struct shrinker spl_kmem_cache_shrinker = { + .shrink = spl_kmem_cache_generic_shrinker, + .seeks = KMC_DEFAULT_SEEKS, +}; +#endif + static void * kv_alloc(spl_kmem_cache_t *skc, int size, int flags) { |