*/
static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
{
- return mm->tlb_flush_pending;
+ /*
+ * Must be called with PTL held; such that our PTL acquire will have
+ * observed the store from set_tlb_flush_pending().
+ */
+ return atomic_read(&mm->tlb_flush_pending) > 0;
+ }
+
+ /*
+ * Returns true if there are two above TLB batching threads in parallel.
+ */
+ static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
+ {
+ return atomic_read(&mm->tlb_flush_pending) > 1;
+ }
+
+ static inline void init_tlb_flush_pending(struct mm_struct *mm)
+ {
+ atomic_set(&mm->tlb_flush_pending, 0);
}
- static inline void set_tlb_flush_pending(struct mm_struct *mm)
+
+ static inline void inc_tlb_flush_pending(struct mm_struct *mm)
{
- mm->tlb_flush_pending = true;
+ atomic_inc(&mm->tlb_flush_pending);
+
/*
- * Guarantee that the tlb_flush_pending increase does not leak into the
- * critical section updating the page tables
+ * The only time this value is relevant is when there are indeed pages
+ * to flush. And we'll only flush pages after changing them, which
+ * requires the PTL.
+ *
+ * So the ordering here is:
+ *
- * mm->tlb_flush_pending = true;
++ * atomic_inc(&mm->tlb_flush_pending);
+ * spin_lock(&ptl);
+ * ...
+ * set_pte_at();
+ * spin_unlock(&ptl);
+ *
+ * spin_lock(&ptl)
+ * mm_tlb_flush_pending();
+ * ....
+ * spin_unlock(&ptl);
+ *
+ * flush_tlb_range();
- * mm->tlb_flush_pending = false;
++ * atomic_dec(&mm->tlb_flush_pending);
+ *
+ * So the =true store is constrained by the PTL unlock, and the =false
+ * store is constrained by the TLB invalidate.
*/
- smp_mb__before_spinlock();
}
+
/* Clearing is done after a TLB flush, which also provides a barrier. */
- static inline void clear_tlb_flush_pending(struct mm_struct *mm)
- {
- /* see set_tlb_flush_pending */
- mm->tlb_flush_pending = false;
- }
- #else
- static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
- {
- return false;
- }
- static inline void set_tlb_flush_pending(struct mm_struct *mm)
- {
- }
- static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+ static inline void dec_tlb_flush_pending(struct mm_struct *mm)
{
+ /*
+ * Guarantee that the tlb_flush_pending does not not leak into the
+ * critical section, since we must order the PTE change and changes to
+ * the pending TLB flush indication. We could have relied on TLB flush
+ * as a memory barrier, but this behavior is not clearly documented.
+ */
+ smp_mb__before_atomic();
+ atomic_dec(&mm->tlb_flush_pending);
}
- #endif
struct vm_fault;
goto clear_pmdnuma;
}
+ /*
+ * The page_table_lock above provides a memory barrier
+ * with change_protection_range.
+ */
+ if (mm_tlb_flush_pending(vma->vm_mm))
+ flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+
+ /*
+ * Since we took the NUMA fault, we must have observed the !accessible
+ * bit. Make sure all other CPUs agree with that, to avoid them
+ * modifying the page we're about to migrate.
+ *
+ * Must be done under PTL such that we'll observe the relevant
+ * set_tlb_flush_pending().
+ */
+ if (mm_tlb_flush_pending(vma->vm_mm))
+ need_flush = true;
+
/*
* Migrate the THP to the requested node, returns with page unlocked
* and access rights restored.