memcg: make mem_cgroup_split_huge_fixup() more efficient
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Fri, 13 Jan 2012 01:18:20 +0000 (17:18 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 13 Jan 2012 04:13:05 +0000 (20:13 -0800)
In split_huge_page(), mem_cgroup_split_huge_fixup() is called to handle
page_cgroup modifcations.  It takes move_lock_page_cgroup() and modifies
page_cgroup and LRU accounting jobs and called HPAGE_PMD_SIZE - 1 times.

But thinking again,
  - compound_lock() is held at move_accout...then, it's not necessary
    to take move_lock_page_cgroup().
  - LRU is locked and all tail pages will go into the same LRU as
    head is now on.
  - page_cgroup is contiguous in huge page range.

This patch fixes mem_cgroup_split_huge_fixup() as to be called once per
hugepage and reduce costs for spliting.

[akpm@linux-foundation.org: fix typo, per Michal]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/memcontrol.h
mm/huge_memory.c
mm/memcontrol.c

index e2f8e7caf04b217f697f6c8c85afcc26c4510c21..cee3761666f07914348f50336557e266ba78b496 100644 (file)
@@ -163,7 +163,7 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg);
 
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
+void mem_cgroup_split_huge_fixup(struct page *head);
 #endif
 
 #ifdef CONFIG_DEBUG_VM
@@ -379,8 +379,7 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
        return 0;
 }
 
-static inline void mem_cgroup_split_huge_fixup(struct page *head,
-                                               struct page *tail)
+static inline void mem_cgroup_split_huge_fixup(struct page *head)
 {
 }
 
index 36b3d988b4ef6ac8c263ee0732c1d08513afb04f..db522e160ccaacc1c12c162666d25bee8ebed7e8 100644 (file)
@@ -1207,6 +1207,8 @@ static void __split_huge_page_refcount(struct page *page)
        /* prevent PageLRU to go away from under us, and freeze lru stats */
        spin_lock_irq(&zone->lru_lock);
        compound_lock(page);
+       /* complete memcg works before add pages to LRU */
+       mem_cgroup_split_huge_fixup(page);
 
        for (i = 1; i < HPAGE_PMD_NR; i++) {
                struct page *page_tail = page + i;
@@ -1278,7 +1280,6 @@ static void __split_huge_page_refcount(struct page *page)
                BUG_ON(!PageDirty(page_tail));
                BUG_ON(!PageSwapBacked(page_tail));
 
-               mem_cgroup_split_huge_fixup(page, page_tail);
 
                lru_add_page_tail(zone, page, page_tail);
        }
index 972878b648c2af512c7f586b0c756c8fb0d92fa6..42174612cc0b42deae1108acb5a7c47d354f97e0 100644 (file)
@@ -2553,39 +2553,39 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
                        (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
 /*
  * Because tail pages are not marked as "used", set it. We're under
- * zone->lru_lock, 'splitting on pmd' and compund_lock.
+ * zone->lru_lock, 'splitting on pmd' and compound_lock.
+ * charge/uncharge will be never happen and move_account() is done under
+ * compound_lock(), so we don't have to take care of races.
  */
-void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
+void mem_cgroup_split_huge_fixup(struct page *head)
 {
        struct page_cgroup *head_pc = lookup_page_cgroup(head);
-       struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
-       unsigned long flags;
+       struct page_cgroup *pc;
+       int i;
 
        if (mem_cgroup_disabled())
                return;
-       /*
-        * We have no races with charge/uncharge but will have races with
-        * page state accounting.
-        */
-       move_lock_page_cgroup(head_pc, &flags);
+       for (i = 1; i < HPAGE_PMD_NR; i++) {
+               pc = head_pc + i;
+               pc->mem_cgroup = head_pc->mem_cgroup;
+               smp_wmb();/* see __commit_charge() */
+               /*
+                * LRU flags cannot be copied because we need to add tail
+                * page to LRU by generic call and our hooks will be called.
+                */
+               pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+       }
 
-       tail_pc->mem_cgroup = head_pc->mem_cgroup;
-       smp_wmb(); /* see __commit_charge() */
        if (PageCgroupAcctLRU(head_pc)) {
                enum lru_list lru;
                struct mem_cgroup_per_zone *mz;
-
                /*
-                * LRU flags cannot be copied because we need to add tail
-                *.page to LRU by generic call and our hook will be called.
                 * We hold lru_lock, then, reduce counter directly.
                 */
                lru = page_lru(head);
                mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
-               MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+               MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
        }
-       tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
-       move_unlock_page_cgroup(head_pc, &flags);
 }
 #endif