summaryrefslogtreecommitdiffstats
path: root/include/sys/kmem.h
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2012-08-18 12:42:28 -0700
committerBrian Behlendorf <[email protected]>2012-08-27 12:00:55 -0700
commiteb0f407a2b9089113ef6f2402ebd887511315b43 (patch)
treed02e28271e7938fc79dd3db92e8404800579358f /include/sys/kmem.h
parent500e95c884f711883e094b48b58450ce2d80f2ea (diff)
Add PF_NOFS debugging flag
PF_NOFS is a per-process debug flag which is set in current->flags to detect when a process is performing an unsafe allocation. All tasks with PF_NOFS set must strictly use KM_PUSHPAGE for allocations because if they enter direct reclaim and initiate I/O they may deadlock. When debugging is disabled, any incorrect usage will be detected and a call stack with a warning will be printed to the console. The flags will then be automatically corrected to allow for safe execution. If debugging is enabled this will be treated as a fatal condition. To avoid any risk of conflicting with the existing PF_ flags. The PF_NOFS bit shadows the rarely used PF_MUTEX_TESTER bit. Only when CONFIG_RT_MUTEX_TESTER is not set, and we know this bit is unused, will the PF_NOFS bit be valid. Happily, most existing distributions ship a kernel with CONFIG_RT_MUTEX_TESTER disabled. Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'include/sys/kmem.h')
-rw-r--r--include/sys/kmem.h49
1 files changed, 49 insertions, 0 deletions
diff --git a/include/sys/kmem.h b/include/sys/kmem.h
index aaff6d046..116d6db0d 100644
--- a/include/sys/kmem.h
+++ b/include/sys/kmem.h
@@ -56,6 +56,47 @@
#endif
/*
+ * PF_NOFS is a per-process debug flag which is set in current->flags to
+ * detect when a process is performing an unsafe allocation. All tasks
+ * with PF_NOFS set must strictly use KM_PUSHPAGE for allocations because
+ * if they enter direct reclaim and initiate I/O the may deadlock.
+ *
+ * When debugging is disabled, any incorrect usage will be detected and
+ * a call stack with warning will be printed to the console. The flags
+ * will then be automatically corrected to allow for safe execution. If
+ * debugging is enabled this will be treated as a fatal condition.
+ *
+ * To avoid any risk of conflicting with the existing PF_ flags. The
+ * PF_NOFS bit shadows the rarely used PF_MUTEX_TESTER bit. Only when
+ * CONFIG_RT_MUTEX_TESTER is not set, and we know this bit is unused,
+ * will the PF_NOFS bit be valid. Happily, most existing distributions
+ * ship a kernel with CONFIG_RT_MUTEX_TESTER disabled.
+ */
+#if !defined(CONFIG_RT_MUTEX_TESTER) && defined(PF_MUTEX_TESTER)
+# define PF_NOFS PF_MUTEX_TESTER
+
+static inline void
+sanitize_flags(struct task_struct *p, gfp_t *flags)
+{
+ if (unlikely((p->flags & PF_NOFS) && (*flags & (__GFP_IO|__GFP_FS)))) {
+# ifdef NDEBUG
+ SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "Fixing allocation for "
+ "task %s (%d) which used GFP flags 0x%x with PF_NOFS set\n",
+ p->comm, p->pid, flags);
+ spl_debug_dumpstack(p);
+ *flags &= ~(__GFP_IO|__GFP_FS);
+# else
+ PANIC("FATAL allocation for task %s (%d) which used GFP "
+ "flags 0x%x with PF_NOFS set\n", p->comm, p->pid, flags);
+# endif /* NDEBUG */
+ }
+}
+#else
+# define PF_NOFS 0x00000000
+# define sanitize_flags(p, fl) ((void)0)
+#endif /* !defined(CONFIG_RT_MUTEX_TESTER) && defined(PF_MUTEX_TESTER) */
+
+/*
* __GFP_NOFAIL looks like it will be removed from the kernel perhaps as
* early as 2.6.32. To avoid this issue when it occurs in upstream kernels
* we retry the allocation here as long as it is not __GFP_WAIT (GFP_ATOMIC).
@@ -67,6 +108,8 @@ kmalloc_nofail(size_t size, gfp_t flags)
{
void *ptr;
+ sanitize_flags(current, &flags);
+
do {
ptr = kmalloc(size, flags);
} while (ptr == NULL && (flags & __GFP_WAIT));
@@ -79,6 +122,8 @@ kzalloc_nofail(size_t size, gfp_t flags)
{
void *ptr;
+ sanitize_flags(current, &flags);
+
do {
ptr = kzalloc(size, flags);
} while (ptr == NULL && (flags & __GFP_WAIT));
@@ -92,6 +137,8 @@ kmalloc_node_nofail(size_t size, gfp_t flags, int node)
#ifdef HAVE_KMALLOC_NODE
void *ptr;
+ sanitize_flags(current, &flags);
+
do {
ptr = kmalloc_node(size, flags, node);
} while (ptr == NULL && (flags & __GFP_WAIT));
@@ -107,6 +154,8 @@ vmalloc_nofail(size_t size, gfp_t flags)
{
void *ptr;
+ sanitize_flags(current, &flags);
+
/*
* Retry failed __vmalloc() allocations once every second. The
* rational for the delay is that the likely failure modes are: