{
if (!pol)
return;
- if (!mpol_store_user_nodemask(pol) &&
+ if (!mpol_store_user_nodemask(pol) && !(pol->flags & MPOL_F_LOCAL) &&
nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
return;
return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT);
}
+/*
+ * queue_pages_pmd() has three possible return values:
+ * 1 - pages are placed on the right node or queued successfully.
+ * 0 - THP was split.
+ * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
+ * page was already on a node that does not follow the policy.
+ */
static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
unsigned long flags;
if (unlikely(is_pmd_migration_entry(*pmd))) {
- ret = 1;
+ ret = -EIO;
goto unlock;
}
page = pmd_page(*pmd);
ret = 1;
flags = qp->flags;
/* go to thp migration */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ if (!vma_migratable(walk->vma)) {
+ ret = -EIO;
+ goto unlock;
+ }
+
migrate_page_add(page, qp->pagelist, flags);
+ } else
+ ret = -EIO;
unlock:
spin_unlock(ptl);
out:
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
- if (ret)
+ if (ret > 0)
return 0;
+ else if (ret < 0)
+ return ret;
}
if (pmd_trans_unstable(pmd))
goto retry;
}
- migrate_page_add(page, qp->pagelist, flags);
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ if (!vma_migratable(vma))
+ break;
+ migrate_page_add(page, qp->pagelist, flags);
+ } else
+ break;
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
- return 0;
+ return addr != end ? -EIO : 0;
}
static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
unsigned long endvma = vma->vm_end;
unsigned long flags = qp->flags;
- if (!vma_migratable(vma))
+ /*
+ * Need check MPOL_MF_STRICT to return -EIO if possible
+ * regardless of vma_migratable
+ */
+ if (!vma_migratable(vma) &&
+ !(flags & MPOL_MF_STRICT))
return 1;
if (endvma > end)
}
/* queue pages from current vma */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ if (flags & MPOL_MF_VALID)
return 0;
return 1;
}
nodemask_t *nodes)
{
unsigned long copy = ALIGN(maxnode-1, 64) / 8;
- const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long);
+ unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long);
if (copy > nbytes) {
if (copy > PAGE_SIZE)
int uninitialized_var(pval);
nodemask_t nodes;
- if (nmask != NULL && maxnode < MAX_NUMNODES)
+ if (nmask != NULL && maxnode < nr_node_ids)
return -EINVAL;
err = do_get_mempolicy(&pval, &nodes, addr, flags);
unsigned long nr_bits, alloc_size;
DECLARE_BITMAP(bm, MAX_NUMNODES);
- nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
+ nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids);
alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
if (nmask)
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(hpage_node, *nmask)) {
mpol_cond_put(pol);
- page = __alloc_pages_node(hpage_node,
- gfp | __GFP_THISNODE, order);
+ /*
+ * We cannot invoke reclaim if __GFP_THISNODE
+ * is set. Invoking reclaim with
+ * __GFP_THISNODE set, would cause THP
+ * allocations to trigger heavy swapping
+ * despite there may be tons of free memory
+ * (including potentially plenty of THP
+ * already available in the buddy) on all the
+ * other NUMA nodes.
+ *
+ * At most we could invoke compaction when
+ * __GFP_THISNODE is set (but we would need to
+ * refrain from invoking reclaim even if
+ * compaction returned COMPACT_SKIPPED because
+ * there wasn't not enough memory to succeed
+ * compaction). For now just avoid
+ * __GFP_THISNODE instead of limiting the
+ * allocation path to a strict and single
+ * compaction invocation.
+ *
+ * Supposedly if direct reclaim was enabled by
+ * the caller, the app prefers THP regardless
+ * of the node it comes from so this would be
+ * more desiderable behavior than only
+ * providing THP originated from the local
+ * node in such case.
+ */
+ if (!(gfp & __GFP_DIRECT_RECLAIM))
+ gfp |= __GFP_THISNODE;
+ page = __alloc_pages_node(hpage_node, gfp, order);
goto out;
}
}