mm: soft-offline: check return value in second __get_any_page() call
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / mm / memory-failure.c
index ceb0c7f1932f2e97c18cb1971f5b0593d57172ab..37df20faddd59c87841ad6b7c27d92dbc3d87971 100644 (file)
@@ -208,9 +208,9 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
 #endif
        si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT;
 
-       if ((flags & MF_ACTION_REQUIRED) && t == current) {
+       if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
                si.si_code = BUS_MCEERR_AR;
-               ret = force_sig_info(SIGBUS, &si, t);
+               ret = force_sig_info(SIGBUS, &si, current);
        } else {
                /*
                 * Don't use force here, it's convenient if the signal
@@ -382,10 +382,12 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno,
        }
 }
 
-static int task_early_kill(struct task_struct *tsk)
+static int task_early_kill(struct task_struct *tsk, int force_early)
 {
        if (!tsk->mm)
                return 0;
+       if (force_early)
+               return 1;
        if (tsk->flags & PF_MCE_PROCESS)
                return !!(tsk->flags & PF_MCE_EARLY);
        return sysctl_memory_failure_early_kill;
@@ -395,7 +397,7 @@ static int task_early_kill(struct task_struct *tsk)
  * Collect processes when the error hit an anonymous page.
  */
 static void collect_procs_anon(struct page *page, struct list_head *to_kill,
-                             struct to_kill **tkc)
+                             struct to_kill **tkc, int force_early)
 {
        struct vm_area_struct *vma;
        struct task_struct *tsk;
@@ -411,7 +413,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
        for_each_process (tsk) {
                struct anon_vma_chain *vmac;
 
-               if (!task_early_kill(tsk))
+               if (!task_early_kill(tsk, force_early))
                        continue;
                anon_vma_interval_tree_foreach(vmac, &av->rb_root,
                                               pgoff, pgoff) {
@@ -430,7 +432,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
  * Collect processes when the error hit a file mapped page.
  */
 static void collect_procs_file(struct page *page, struct list_head *to_kill,
-                             struct to_kill **tkc)
+                             struct to_kill **tkc, int force_early)
 {
        struct vm_area_struct *vma;
        struct task_struct *tsk;
@@ -441,7 +443,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
        for_each_process(tsk) {
                pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 
-               if (!task_early_kill(tsk))
+               if (!task_early_kill(tsk, force_early))
                        continue;
 
                vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
@@ -467,7 +469,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
  * First preallocate one tokill structure outside the spin locks,
  * so that we can kill at least one process reasonably reliable.
  */
-static void collect_procs(struct page *page, struct list_head *tokill)
+static void collect_procs(struct page *page, struct list_head *tokill,
+                               int force_early)
 {
        struct to_kill *tk;
 
@@ -478,9 +481,9 @@ static void collect_procs(struct page *page, struct list_head *tokill)
        if (!tk)
                return;
        if (PageAnon(page))
-               collect_procs_anon(page, tokill, &tk);
+               collect_procs_anon(page, tokill, &tk, force_early);
        else
-               collect_procs_file(page, tokill, &tk);
+               collect_procs_file(page, tokill, &tk, force_early);
        kfree(tk);
 }
 
@@ -854,14 +857,14 @@ static int page_action(struct page_state *ps, struct page *p,
  * the pages and send SIGBUS to the processes if the data was dirty.
  */
 static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
-                                 int trapno, int flags)
+                                 int trapno, int flags, struct page **hpagep)
 {
        enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
        struct address_space *mapping;
        LIST_HEAD(tokill);
        int ret;
        int kill = 1, forcekill;
-       struct page *hpage = compound_head(p);
+       struct page *hpage = *hpagep;
        struct page *ppage;
 
        if (PageReserved(p) || PageSlab(p))
@@ -936,6 +939,21 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
                                BUG_ON(!PageHWPoison(p));
                                return SWAP_FAIL;
                        }
+                       /*
+                        * We pinned the head page for hwpoison handling,
+                        * now we split the thp and we are interested in
+                        * the hwpoisoned raw page, so move the refcount
+                        * to it. Similarly, page lock is shifted.
+                        */
+                       if (hpage != p) {
+                               if (!(flags & MF_COUNT_INCREASED)) {
+                                       put_page(hpage);
+                                       get_page(p);
+                               }
+                               lock_page(p);
+                               unlock_page(hpage);
+                               *hpagep = p;
+                       }
                        /* THP is split, so ppage should be the real poisoned page. */
                        ppage = p;
                }
@@ -950,19 +968,13 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
         * there's nothing that can be done.
         */
        if (kill)
-               collect_procs(ppage, &tokill);
-
-       if (hpage != ppage)
-               lock_page(ppage);
+               collect_procs(ppage, &tokill, flags & MF_ACTION_REQUIRED);
 
        ret = try_to_unmap(ppage, ttu);
        if (ret != SWAP_SUCCESS)
                printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
                                pfn, page_mapcount(ppage));
 
-       if (hpage != ppage)
-               unlock_page(ppage);
-
        /*
         * Now that the dirty bit has been propagated to the
         * struct page and all unmaps done we can decide if
@@ -1074,15 +1086,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
                        return 0;
                } else if (PageHuge(hpage)) {
                        /*
-                        * Check "just unpoisoned", "filter hit", and
-                        * "race with other subpage."
+                        * Check "filter hit" and "race with other subpage."
                         */
                        lock_page(hpage);
-                       if (!PageHWPoison(hpage)
-                           || (hwpoison_filter(p) && TestClearPageHWPoison(p))
-                           || (p != hpage && TestSetPageHWPoison(hpage))) {
-                               atomic_long_sub(nr_pages, &num_poisoned_pages);
-                               return 0;
+                       if (PageHWPoison(hpage)) {
+                               if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
+                                   || (p != hpage && TestSetPageHWPoison(hpage))) {
+                                       atomic_long_sub(nr_pages, &num_poisoned_pages);
+                                       unlock_page(hpage);
+                                       return 0;
+                               }
                        }
                        set_page_hwpoison_huge_page(hpage);
                        res = dequeue_hwpoisoned_huge_page(hpage);
@@ -1104,10 +1117,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
         * The check (unnecessarily) ignores LRU pages being isolated and
         * walked by the page reclaim code, however that's not a big loss.
         */
-       if (!PageHuge(p) && !PageTransTail(p)) {
-               if (!PageLRU(p))
-                       shake_page(p, 0);
-               if (!PageLRU(p)) {
+       if (!PageHuge(p)) {
+               if (!PageLRU(hpage))
+                       shake_page(hpage, 0);
+               if (!PageLRU(hpage)) {
                        /*
                         * shake_page could have turned it free.
                         */
@@ -1143,6 +1156,8 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
         */
        if (!PageHWPoison(p)) {
                printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
+               atomic_long_sub(nr_pages, &num_poisoned_pages);
+               put_page(hpage);
                res = 0;
                goto out;
        }
@@ -1179,8 +1194,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
        /*
         * Now take care of user space mappings.
         * Abort on fail: __delete_from_page_cache() assumes unmapped page.
+        *
+        * When the raw error page is thp tail page, hpage points to the raw
+        * page after thp split.
         */
-       if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) {
+       if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage)
+           != SWAP_SUCCESS) {
                printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
                res = -EBUSY;
                goto out;
@@ -1410,7 +1429,8 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags)
 
        /*
         * Isolate the page, so that it doesn't get reallocated if it
-        * was free.
+        * was free. This flag should be kept set until the source page
+        * is freed and PG_hwpoison on it is set.
         */
        set_migratetype_isolate(p, true);
        /*
@@ -1433,7 +1453,6 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags)
                /* Not a free page */
                ret = 1;
        }
-       unset_migratetype_isolate(p, MIGRATE_MOVABLE);
        unlock_memory_hotplug();
        return ret;
 }
@@ -1453,7 +1472,9 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
                 * Did it turn free?
                 */
                ret = __get_any_page(page, pfn, 0);
-               if (!PageLRU(page)) {
+               if (ret == 1 && !PageLRU(page)) {
+                       /* Drop page reference which is from __get_any_page() */
+                       put_page(page);
                        pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
                                pfn, page->flags);
                        return -EIO;
@@ -1489,12 +1510,17 @@ static int soft_offline_huge_page(struct page *page, int flags)
                pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
                        pfn, ret, page->flags);
        } else {
-               set_page_hwpoison_huge_page(hpage);
-               dequeue_hwpoisoned_huge_page(hpage);
-               atomic_long_add(1 << compound_trans_order(hpage),
-                               &num_poisoned_pages);
+               /* overcommit hugetlb page will be freed to buddy */
+               if (PageHuge(page)) {
+                       set_page_hwpoison_huge_page(hpage);
+                       dequeue_hwpoisoned_huge_page(hpage);
+                       atomic_long_add(1 << compound_order(hpage),
+                                       &num_poisoned_pages);
+               } else {
+                       SetPageHWPoison(page);
+                       atomic_long_inc(&num_poisoned_pages);
+               }
        }
-       /* keep elevated page count for bad page */
        return ret;
 }
 
@@ -1526,7 +1552,7 @@ int soft_offline_page(struct page *page, int flags)
 {
        int ret;
        unsigned long pfn = page_to_pfn(page);
-       struct page *hpage = compound_trans_head(page);
+       struct page *hpage = compound_head(page);
 
        if (PageHWPoison(page)) {
                pr_info("soft offline: %#lx page already poisoned\n", pfn);
@@ -1559,7 +1585,7 @@ int soft_offline_page(struct page *page, int flags)
                        atomic_long_inc(&num_poisoned_pages);
                }
        }
-       /* keep elevated page count for bad page */
+       unset_migratetype_isolate(page, MIGRATE_MOVABLE);
        return ret;
 }
 
@@ -1625,7 +1651,22 @@ static int __soft_offline_page(struct page *page, int flags)
                        if (ret > 0)
                                ret = -EIO;
                } else {
+                       /*
+                        * After page migration succeeds, the source page can
+                        * be trapped in pagevec and actual freeing is delayed.
+                        * Freeing code works differently based on PG_hwpoison,
+                        * so there's a race. We need to make sure that the
+                        * source page should be freed back to buddy before
+                        * setting PG_hwpoison.
+                        */
+                       if (!is_free_buddy_page(page))
+                               lru_add_drain_all();
+                       if (!is_free_buddy_page(page))
+                               drain_all_pages();
                        SetPageHWPoison(page);
+                       if (!is_free_buddy_page(page))
+                               pr_info("soft offline: %#lx: page leaked\n",
+                                       pfn);
                        atomic_long_inc(&num_poisoned_pages);
                }
        } else {