#include "internal.h"
+#ifdef CONFIG_MTK_EXTMEM
+extern bool extmem_in_mspace(struct vm_area_struct *vma);
+extern unsigned long get_virt_from_mspace(unsigned long pa);
+#endif
+
#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nid.
#endif
* tear-down from @mm. The @fullmm argument is used when @mm is without
* users and we're going to destroy the full address space (exit/execve).
*/
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
{
tlb->mm = mm;
- tlb->fullmm = fullmm;
+ /* Is it from 0 to ~0? */
+ tlb->fullmm = !(start | (end+1));
tlb->need_flush_all = 0;
- tlb->start = -1UL;
- tlb->end = 0;
+ tlb->start = start;
+ tlb->end = end;
tlb->need_flush = 0;
tlb->local.next = NULL;
tlb->local.nr = 0;
{
struct mmu_gather_batch *batch, *next;
- tlb->start = start;
- tlb->end = end;
tlb_flush_mmu(tlb);
/* keep the page table cache within bounds */
if (!pte_file(pte)) {
swp_entry_t entry = pte_to_swp_entry(pte);
- if (swap_duplicate(entry) < 0)
- return entry.val;
-
- /* make sure dst_mm is on swapoff's mmlist. */
- if (unlikely(list_empty(&dst_mm->mmlist))) {
- spin_lock(&mmlist_lock);
- if (list_empty(&dst_mm->mmlist))
- list_add(&dst_mm->mmlist,
- &src_mm->mmlist);
- spin_unlock(&mmlist_lock);
- }
- if (likely(!non_swap_entry(entry)))
+ if (likely(!non_swap_entry(entry))) {
+ if (swap_duplicate(entry) < 0)
+ return entry.val;
+
+ /* make sure dst_mm is on swapoff's mmlist. */
+ if (unlikely(list_empty(&dst_mm->mmlist))) {
+ spin_lock(&mmlist_lock);
+ if (list_empty(&dst_mm->mmlist))
+ list_add(&dst_mm->mmlist,
+ &src_mm->mmlist);
+ spin_unlock(&mmlist_lock);
+ }
rss[MM_SWAPENTS]++;
- else if (is_migration_entry(entry)) {
+ } else if (is_migration_entry(entry)) {
page = migration_entry_to_page(entry);
if (PageAnon(page))
* and page-free while holding it.
*/
if (force_flush) {
+ unsigned long old_end;
+
force_flush = 0;
-#ifdef HAVE_GENERIC_MMU_GATHER
- tlb->start = addr;
- tlb->end = end;
-#endif
+ /*
+ * Flush the TLB just for the previous segment,
+ * then update the range to be the remaining
+ * TLB range.
+ */
+ old_end = tlb->end;
+ tlb->end = addr;
+
tlb_flush_mmu(tlb);
+
+ tlb->start = addr;
+ tlb->end = old_end;
+
if (addr != end)
goto again;
}
unsigned long end = start + size;
lru_add_drain();
- tlb_gather_mmu(&tlb, mm, 0);
+ tlb_gather_mmu(&tlb, mm, start, end);
update_hiwater_rss(mm);
mmu_notifier_invalidate_range_start(mm, start, end);
for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
unsigned long end = address + size;
lru_add_drain();
- tlb_gather_mmu(&tlb, mm, 0);
+ tlb_gather_mmu(&tlb, mm, address, end);
update_hiwater_rss(mm);
mmu_notifier_invalidate_range_start(mm, address, end);
unmap_single_vma(&tlb, vma, address, end, details);
}
EXPORT_SYMBOL_GPL(zap_vma_ptes);
+/*
+ * FOLL_FORCE can write to even unwritable pte's, but only
+ * after we've gone through a COW cycle and they are dirty.
+ */
+static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+{
+ return pte_write(pte) ||
+ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+}
+
/**
* follow_page_mask - look up a page descriptor from a user-virtual address
* @vma: vm_area_struct mapping @address
}
if ((flags & FOLL_NUMA) && pte_numa(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !pte_write(pte))
+ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags))
goto unlock;
page = vm_normal_page(vma, address, pte);
return ERR_PTR(-EFAULT);
return page;
}
+EXPORT_SYMBOL_GPL(follow_page_mask);
static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
{
page_mask = 0;
goto next_page;
}
-
+ #ifdef CONFIG_MTK_EXTMEM
+ if (!vma || !(vm_flags & vma->vm_flags))
+ {
+ return i ? : -EFAULT;
+ }
+
+ if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+ {
+ /*Would pass VM_IO | VM_RESERVED | VM_PFNMAP. (for Reserved Physical Memory PFN Mapping Usage)*/
+ if(!((vma->vm_flags&VM_IO)&&(vma->vm_flags&VM_RESERVED)&&(vma->vm_flags&VM_PFNMAP)))
+ return i ? : -EFAULT;
+ }
+ #else
if (!vma ||
(vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
!(vm_flags & vma->vm_flags))
return i ? : -EFAULT;
+ #endif
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
else
return -EFAULT;
}
- if (ret & VM_FAULT_SIGBUS)
+ if (ret & (VM_FAULT_SIGBUS |
+ VM_FAULT_SIGSEGV))
return i ? i : -EFAULT;
BUG();
}
*/
if ((ret & VM_FAULT_WRITE) &&
!(vma->vm_flags & VM_WRITE))
- foll_flags &= ~FOLL_WRITE;
+ foll_flags |= FOLL_COW;
cond_resched();
}
unsigned long address, unsigned int fault_flags)
{
struct vm_area_struct *vma;
+ vm_flags_t vm_flags;
int ret;
vma = find_extend_vma(mm, address);
if (!vma || address < vma->vm_start)
return -EFAULT;
+ vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
+ if (!(vm_flags & vma->vm_flags))
+ return -EFAULT;
+
ret = handle_mm_fault(mm, vma, address, fault_flags);
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
return -ENOMEM;
if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
return -EHWPOISON;
- if (ret & VM_FAULT_SIGBUS)
+ if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
return -EFAULT;
BUG();
}
* un-COW'ed pages by matching them up with "vma->vm_pgoff".
* See vm_normal_page() for details.
*/
+#ifdef CONFIG_MTK_EXTMEM
+ if (addr == vma->vm_start && end == vma->vm_end) {
+ vma->vm_pgoff = pfn;
+ } else if (is_cow_mapping(vma->vm_flags))
+ return -EINVAL;
+#else
if (is_cow_mapping(vma->vm_flags)) {
if (addr != vma->vm_start || end != vma->vm_end)
return -EINVAL;
vma->vm_pgoff = pfn;
}
-
+#endif
err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
if (err)
return -EINVAL;
if (prev && prev->vm_end == address)
return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
- expand_downwards(vma, address - PAGE_SIZE);
+ return expand_downwards(vma, address - PAGE_SIZE);
}
if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
struct vm_area_struct *next = vma->vm_next;
if (next && next->vm_start == address + PAGE_SIZE)
return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
- expand_upwards(vma, address + PAGE_SIZE);
+ return expand_upwards(vma, address + PAGE_SIZE);
}
return 0;
}
pte_unmap(page_table);
+ /* File mapping without ->vm_ops ? */
+ if (vma->vm_flags & VM_SHARED)
+ return VM_FAULT_SIGBUS;
+
/* Check if we need to add a guard page to the stack */
if (check_stack_guard_page(vma, address) < 0)
- return VM_FAULT_SIGBUS;
+ return VM_FAULT_SIGSEGV;
/* Use the zero-page for reads */
if (!(flags & FAULT_FLAG_WRITE)) {
pgoff_t pgoff = (((address & PAGE_MASK)
- vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
+ if (!vma->vm_ops->fault)
+ return VM_FAULT_SIGBUS;
+
pte_unmap(page_table);
return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
}
}
int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
- unsigned long addr, int current_nid)
+ unsigned long addr, int page_nid)
{
get_page(page);
count_vm_numa_event(NUMA_HINT_FAULTS);
- if (current_nid == numa_node_id())
+ if (page_nid == numa_node_id())
count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
return mpol_misplaced(page, vma, addr);
{
struct page *page = NULL;
spinlock_t *ptl;
- int current_nid = -1;
+ int page_nid = -1;
int target_nid;
bool migrated = false;
return 0;
}
- current_nid = page_to_nid(page);
- target_nid = numa_migrate_prep(page, vma, addr, current_nid);
+ page_nid = page_to_nid(page);
+ target_nid = numa_migrate_prep(page, vma, addr, page_nid);
pte_unmap_unlock(ptep, ptl);
if (target_nid == -1) {
- /*
- * Account for the fault against the current node if it not
- * being replaced regardless of where the page is located.
- */
- current_nid = numa_node_id();
put_page(page);
goto out;
}
/* Migrate to the requested node */
migrated = migrate_misplaced_page(page, target_nid);
if (migrated)
- current_nid = target_nid;
+ page_nid = target_nid;
out:
- if (current_nid != -1)
- task_numa_fault(current_nid, 1, migrated);
+ if (page_nid != -1)
+ task_numa_fault(page_nid, 1, migrated);
return 0;
}
unsigned long offset;
spinlock_t *ptl;
bool numa = false;
- int local_nid = numa_node_id();
spin_lock(&mm->page_table_lock);
pmd = *pmdp;
for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
pte_t pteval = *pte;
struct page *page;
- int curr_nid = local_nid;
+ int page_nid = -1;
int target_nid;
- bool migrated;
+ bool migrated = false;
+
if (!pte_present(pteval))
continue;
if (!pte_numa(pteval))
if (unlikely(page_mapcount(page) != 1))
continue;
- /*
- * Note that the NUMA fault is later accounted to either
- * the node that is currently running or where the page is
- * migrated to.
- */
- curr_nid = local_nid;
- target_nid = numa_migrate_prep(page, vma, addr,
- page_to_nid(page));
- if (target_nid == -1) {
+ page_nid = page_to_nid(page);
+ target_nid = numa_migrate_prep(page, vma, addr, page_nid);
+ pte_unmap_unlock(pte, ptl);
+ if (target_nid != -1) {
+ migrated = migrate_misplaced_page(page, target_nid);
+ if (migrated)
+ page_nid = target_nid;
+ } else {
put_page(page);
- continue;
}
- /* Migrate to the requested node */
- pte_unmap_unlock(pte, ptl);
- migrated = migrate_misplaced_page(page, target_nid);
- if (migrated)
- curr_nid = target_nid;
- task_numa_fault(curr_nid, 1, migrated);
+ if (page_nid != -1)
+ task_numa_fault(page_nid, 1, migrated);
pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
}
if (!pte_present(entry)) {
if (pte_none(entry)) {
if (vma->vm_ops) {
- if (likely(vma->vm_ops->fault))
- return do_linear_fault(mm, vma, address,
- pte, pmd, flags, entry);
+ return do_linear_fault(mm, vma, address,
+ pte, pmd, flags, entry);
}
return do_anonymous_page(mm, vma, address,
pte, pmd, flags);
/*
* By the time we get here, we already hold the mm semaphore
*/
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, unsigned int flags)
+static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- __set_current_state(TASK_RUNNING);
-
- count_vm_event(PGFAULT);
- mem_cgroup_count_vm_event(mm, PGFAULT);
-
- /* do counter updates before entering really critical section. */
- check_sync_rss_stat(current);
-
if (unlikely(is_vm_hugetlb_page(vma)))
return hugetlb_fault(mm, vma, address, flags);
return handle_pte_fault(mm, vma, address, pte, pmd, flags);
}
+int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags)
+{
+ int ret;
+
+ __set_current_state(TASK_RUNNING);
+
+ count_vm_event(PGFAULT);
+ mem_cgroup_count_vm_event(mm, PGFAULT);
+
+ /* do counter updates before entering really critical section. */
+ check_sync_rss_stat(current);
+
+ /*
+ * Enable the memcg OOM handling for faults triggered in user
+ * space. Kernel faults are handled more gracefully.
+ */
+ if (flags & FAULT_FLAG_USER)
+ mem_cgroup_oom_enable();
+
+ ret = __handle_mm_fault(mm, vma, address, flags);
+
+ if (flags & FAULT_FLAG_USER) {
+ mem_cgroup_oom_disable();
+ /*
+ * The task may have entered a memcg OOM situation but
+ * if the allocation error was handled gracefully (no
+ * VM_FAULT_OOM), there is no need to kill anything.
+ * Just clean up the OOM state peacefully.
+ */
+ if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
+ mem_cgroup_oom_synchronize(false);
+ }
+
+ return ret;
+}
+
#ifndef __PAGETABLE_PUD_FOLDED
/*
* Allocate page upper directory.
if (follow_phys(vma, addr, write, &prot, &phys_addr))
return -EINVAL;
- maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
+ maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
if (write)
memcpy_toio(maddr + offset, buf, len);
else
return len;
}
+EXPORT_SYMBOL_GPL(generic_access_phys);
#endif
/*
ret = get_user_pages(tsk, mm, addr, 1,
write, 1, &page, &vma);
if (ret <= 0) {
+#ifdef CONFIG_MTK_EXTMEM
+ if (!write) {
+ vma = find_vma(mm, addr);
+ if (!vma || vma->vm_start > addr)
+ break;
+ if (vma->vm_end < addr + len)
+ len = vma->vm_end - addr;
+ if (extmem_in_mspace(vma)) {
+ void *extmem_va = (void *)get_virt_from_mspace(vma->vm_pgoff << PAGE_SHIFT) + (addr - vma->vm_start);
+ memcpy(buf, extmem_va, len);
+ buf += len;
+ break;
+ }
+ }
+#endif
/*
* Check if this is a VM_IO | VM_PFNMAP VMA, which
* we can access using slightly different code.