From f412ac08c9861b4791af0145934c22f1458686da Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:41 -0700 Subject: [PATCH] [PATCH] mm: fix rss and mmlist locking A couple of oddities were guarded by page_table_lock, no longer properly guarded when that is split. The mm_counters of file_rss and anon_rss: make those an atomic_t, or an atomic64_t if the architecture supports it, in such a case. Definitions by courtesy of Christoph Lameter: who spent considerable effort on more scalable ways of counting, but found insufficient benefit in practice. And adding an mm with swap to the mmlist for swapoff: the list is well- guarded by its own lock, but the list_empty check now has to be repeated inside it. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 42 ++++++++++++++++++++++++++++++++++++++---- mm/memory.c | 4 +++- mm/rmap.c | 3 ++- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 292cb57ce38..1c30bc308ef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -249,13 +249,47 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, extern void arch_unmap_area(struct mm_struct *, unsigned long); extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS +/* + * The mm counters are not protected by its page_table_lock, + * so must be incremented atomically. + */ +#ifdef ATOMIC64_INIT +#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value) +#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member)) +#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member) +#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member) +#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member) +typedef atomic64_t mm_counter_t; +#else /* !ATOMIC64_INIT */ +/* + * The counters wrap back to 0 at 2^32 * PAGE_SIZE, + * that is, at 16TB if using 4kB page size. + */ +#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value) +#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member)) +#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member) +#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member) +#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member) +typedef atomic_t mm_counter_t; +#endif /* !ATOMIC64_INIT */ + +#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +/* + * The mm counters are protected by its page_table_lock, + * so can be incremented directly. + */ #define set_mm_counter(mm, member, value) (mm)->_##member = (value) #define get_mm_counter(mm, member) ((mm)->_##member) #define add_mm_counter(mm, member, value) (mm)->_##member += (value) #define inc_mm_counter(mm, member) (mm)->_##member++ #define dec_mm_counter(mm, member) (mm)->_##member-- -#define get_mm_rss(mm) ((mm)->_file_rss + (mm)->_anon_rss) +typedef unsigned long mm_counter_t; + +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +#define get_mm_rss(mm) \ + (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss)) #define update_hiwater_rss(mm) do { \ unsigned long _rss = get_mm_rss(mm); \ if ((mm)->hiwater_rss < _rss) \ @@ -266,8 +300,6 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); (mm)->hiwater_vm = (mm)->total_vm; \ } while (0) -typedef unsigned long mm_counter_t; - struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ struct rb_root mm_rb; @@ -291,7 +323,9 @@ struct mm_struct { * by mmlist_lock */ - /* Special counters protected by the page_table_lock */ + /* Special counters, in some configurations protected by the + * page_table_lock, in other configurations by being atomic. + */ mm_counter_t _file_rss; mm_counter_t _anon_rss; diff --git a/mm/memory.c b/mm/memory.c index e9ef599498b..d68421dd64e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -372,7 +372,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, /* make sure dst_mm is on swapoff's mmlist. */ if (unlikely(list_empty(&dst_mm->mmlist))) { spin_lock(&mmlist_lock); - list_add(&dst_mm->mmlist, &src_mm->mmlist); + if (list_empty(&dst_mm->mmlist)) + list_add(&dst_mm->mmlist, + &src_mm->mmlist); spin_unlock(&mmlist_lock); } } diff --git a/mm/rmap.c b/mm/rmap.c index a33e779d1bd..a7427bbf57e 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -559,7 +559,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) swap_duplicate(entry); if (list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); - list_add(&mm->mmlist, &init_mm.mmlist); + if (list_empty(&mm->mmlist)) + list_add(&mm->mmlist, &init_mm.mmlist); spin_unlock(&mmlist_lock); } set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); -- 2.20.1