extern void mpol_rebind_task(struct task_struct *tsk,
const nodemask_t *new);
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
+extern void mpol_fix_fork_child_flag(struct task_struct *p);
#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
#ifdef CONFIG_CPUSET
{
}
+static inline void mpol_fix_fork_child_flag(struct task_struct *p)
+{
+}
+
#define set_cpuset_being_rebound(x) do {} while (0)
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
#define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */
#define PF_SPREAD_PAGE 0x04000000 /* Spread page cache over cpuset */
#define PF_SPREAD_SLAB 0x08000000 /* Spread some slab caches over cpuset */
+#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
/*
* Only the _current_ task can read/write to tsk->flags, but other
p->mempolicy = NULL;
goto bad_fork_cleanup_cpuset;
}
+ mpol_fix_fork_child_flag(p);
#endif
#ifdef CONFIG_DEBUG_MUTEXES
return mpol_check_policy(mode, nodes);
}
+
+/*
+ * Update task->flags PF_MEMPOLICY bit: set iff non-default
+ * mempolicy. Allows more rapid checking of this (combined perhaps
+ * with other PF_* flag bits) on memory allocation hot code paths.
+ *
+ * If called from outside this file, the task 'p' should -only- be
+ * a newly forked child not yet visible on the task list, because
+ * manipulating the task flags of a visible task is not safe.
+ *
+ * The above limitation is why this routine has the funny name
+ * mpol_fix_fork_child_flag().
+ *
+ * It is also safe to call this with a task pointer of current,
+ * which the static wrapper mpol_set_task_struct_flag() does,
+ * for use within this file.
+ */
+
+void mpol_fix_fork_child_flag(struct task_struct *p)
+{
+ if (p->mempolicy)
+ p->flags |= PF_MEMPOLICY;
+ else
+ p->flags &= ~PF_MEMPOLICY;
+}
+
+static void mpol_set_task_struct_flag(void)
+{
+ mpol_fix_fork_child_flag(current);
+}
+
/* Set the process memory policy */
long do_set_mempolicy(int mode, nodemask_t *nodes)
{
return PTR_ERR(new);
mpol_free(current->mempolicy);
current->mempolicy = new;
+ mpol_set_task_struct_flag();
if (new && new->policy == MPOL_INTERLEAVE)
current->il_next = first_node(new->v.nodes);
return 0;
#ifdef CONFIG_NUMA
static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
+static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
static struct array_cache **alloc_alien_cache(int node, int limit)
{
struct array_cache *ac;
#ifdef CONFIG_NUMA
- if (unlikely(current->mempolicy && !in_interrupt())) {
- int nid = slab_node(current->mempolicy);
-
- if (nid != numa_node_id())
- return __cache_alloc_node(cachep, flags, nid);
- }
- if (unlikely(cpuset_do_slab_mem_spread() &&
- (cachep->flags & SLAB_MEM_SPREAD) &&
- !in_interrupt())) {
- int nid = cpuset_mem_spread_node();
-
- if (nid != numa_node_id())
- return __cache_alloc_node(cachep, flags, nid);
+ if (unlikely(current->flags & (PF_SPREAD_PAGE | PF_SPREAD_SLAB |
+ PF_MEMPOLICY))) {
+ objp = alternate_node_alloc(cachep, flags);
+ if (objp != NULL)
+ return objp;
}
#endif
}
#ifdef CONFIG_NUMA
+/*
+ * Try allocating on another node if PF_SPREAD_PAGE|PF_SPREAD_SLAB|PF_MEMPOLICY.
+ *
+ * If we are in_interrupt, then process context, including cpusets and
+ * mempolicy, may not apply and should not be used for allocation policy.
+ */
+static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+ int nid_alloc, nid_here;
+
+ if (in_interrupt())
+ return NULL;
+ nid_alloc = nid_here = numa_node_id();
+ if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
+ nid_alloc = cpuset_mem_spread_node();
+ else if (current->mempolicy)
+ nid_alloc = slab_node(current->mempolicy);
+ if (nid_alloc != nid_here)
+ return __cache_alloc_node(cachep, flags, nid_alloc);
+ return NULL;
+}
+
/*
* A interface to enable slab creation on nodeid
*/