[RAMEN9610-21208]coredump: fix race condition between mmget_not_zero()/get_task_mm...
[GitHub/MotorolaMobilityLLC/kernel-slsi.git] / fs / proc / task_mmu.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/mm.h>
3 #include <linux/vmacache.h>
4 #include <linux/hugetlb.h>
5 #include <linux/huge_mm.h>
6 #include <linux/mount.h>
7 #include <linux/seq_file.h>
8 #include <linux/highmem.h>
9 #include <linux/ptrace.h>
10 #include <linux/slab.h>
11 #include <linux/pagemap.h>
12 #include <linux/mempolicy.h>
13 #include <linux/rmap.h>
14 #include <linux/swap.h>
15 #include <linux/sched/mm.h>
16 #include <linux/swapops.h>
17 #include <linux/mmu_notifier.h>
18 #include <linux/page_idle.h>
19 #include <linux/shmem_fs.h>
20 #include <linux/uaccess.h>
21 #include <linux/mm_inline.h>
22
23 #include <asm/elf.h>
24 #include <asm/tlb.h>
25 #include <asm/tlbflush.h>
26 #include "internal.h"
27
28 void task_mem(struct seq_file *m, struct mm_struct *mm)
29 {
30 unsigned long text, lib, swap, ptes, pmds, anon, file, shmem;
31 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
32
33 anon = get_mm_counter(mm, MM_ANONPAGES);
34 file = get_mm_counter(mm, MM_FILEPAGES);
35 shmem = get_mm_counter(mm, MM_SHMEMPAGES);
36
37 /*
38 * Note: to minimize their overhead, mm maintains hiwater_vm and
39 * hiwater_rss only when about to *lower* total_vm or rss. Any
40 * collector of these hiwater stats must therefore get total_vm
41 * and rss too, which will usually be the higher. Barriers? not
42 * worth the effort, such snapshots can always be inconsistent.
43 */
44 hiwater_vm = total_vm = mm->total_vm;
45 if (hiwater_vm < mm->hiwater_vm)
46 hiwater_vm = mm->hiwater_vm;
47 hiwater_rss = total_rss = anon + file + shmem;
48 if (hiwater_rss < mm->hiwater_rss)
49 hiwater_rss = mm->hiwater_rss;
50
51 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
52 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
53 swap = get_mm_counter(mm, MM_SWAPENTS);
54 ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes);
55 pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
56 seq_printf(m,
57 "VmPeak:\t%8lu kB\n"
58 "VmSize:\t%8lu kB\n"
59 "VmLck:\t%8lu kB\n"
60 "VmPin:\t%8lu kB\n"
61 "VmHWM:\t%8lu kB\n"
62 "VmRSS:\t%8lu kB\n"
63 "RssAnon:\t%8lu kB\n"
64 "RssFile:\t%8lu kB\n"
65 "RssShmem:\t%8lu kB\n"
66 "VmData:\t%8lu kB\n"
67 "VmStk:\t%8lu kB\n"
68 "VmExe:\t%8lu kB\n"
69 "VmLib:\t%8lu kB\n"
70 "VmPTE:\t%8lu kB\n"
71 "VmPMD:\t%8lu kB\n"
72 "VmSwap:\t%8lu kB\n",
73 hiwater_vm << (PAGE_SHIFT-10),
74 total_vm << (PAGE_SHIFT-10),
75 mm->locked_vm << (PAGE_SHIFT-10),
76 mm->pinned_vm << (PAGE_SHIFT-10),
77 hiwater_rss << (PAGE_SHIFT-10),
78 total_rss << (PAGE_SHIFT-10),
79 anon << (PAGE_SHIFT-10),
80 file << (PAGE_SHIFT-10),
81 shmem << (PAGE_SHIFT-10),
82 mm->data_vm << (PAGE_SHIFT-10),
83 mm->stack_vm << (PAGE_SHIFT-10), text, lib,
84 ptes >> 10,
85 pmds >> 10,
86 swap << (PAGE_SHIFT-10));
87 hugetlb_report_usage(m, mm);
88 }
89
90 unsigned long task_vsize(struct mm_struct *mm)
91 {
92 return PAGE_SIZE * mm->total_vm;
93 }
94
95 unsigned long task_statm(struct mm_struct *mm,
96 unsigned long *shared, unsigned long *text,
97 unsigned long *data, unsigned long *resident)
98 {
99 *shared = get_mm_counter(mm, MM_FILEPAGES) +
100 get_mm_counter(mm, MM_SHMEMPAGES);
101 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
102 >> PAGE_SHIFT;
103 *data = mm->data_vm + mm->stack_vm;
104 *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
105 return mm->total_vm;
106 }
107
108 #ifdef CONFIG_NUMA
109 /*
110 * Save get_task_policy() for show_numa_map().
111 */
112 static void hold_task_mempolicy(struct proc_maps_private *priv)
113 {
114 struct task_struct *task = priv->task;
115
116 task_lock(task);
117 priv->task_mempolicy = get_task_policy(task);
118 mpol_get(priv->task_mempolicy);
119 task_unlock(task);
120 }
121 static void release_task_mempolicy(struct proc_maps_private *priv)
122 {
123 mpol_put(priv->task_mempolicy);
124 }
125 #else
126 static void hold_task_mempolicy(struct proc_maps_private *priv)
127 {
128 }
129 static void release_task_mempolicy(struct proc_maps_private *priv)
130 {
131 }
132 #endif
133
134 static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
135 {
136 const char __user *name = vma_get_anon_name(vma);
137 struct mm_struct *mm = vma->vm_mm;
138
139 unsigned long page_start_vaddr;
140 unsigned long page_offset;
141 unsigned long num_pages;
142 unsigned long max_len = NAME_MAX;
143 int i;
144
145 page_start_vaddr = (unsigned long)name & PAGE_MASK;
146 page_offset = (unsigned long)name - page_start_vaddr;
147 num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE);
148
149 seq_puts(m, "[anon:");
150
151 for (i = 0; i < num_pages; i++) {
152 int len;
153 int write_len;
154 const char *kaddr;
155 long pages_pinned;
156 struct page *page;
157
158 pages_pinned = get_user_pages_remote(current, mm,
159 page_start_vaddr, 1, 0, &page, NULL, NULL);
160 if (pages_pinned < 1) {
161 seq_puts(m, "<fault>]");
162 return;
163 }
164
165 kaddr = (const char *)kmap(page);
166 len = min(max_len, PAGE_SIZE - page_offset);
167 write_len = strnlen(kaddr + page_offset, len);
168 seq_write(m, kaddr + page_offset, write_len);
169 kunmap(page);
170 put_page(page);
171
172 /* if strnlen hit a null terminator then we're done */
173 if (write_len != len)
174 break;
175
176 max_len -= len;
177 page_offset = 0;
178 page_start_vaddr += PAGE_SIZE;
179 }
180
181 seq_putc(m, ']');
182 }
183
184 static void vma_stop(struct proc_maps_private *priv)
185 {
186 struct mm_struct *mm = priv->mm;
187
188 release_task_mempolicy(priv);
189 up_read(&mm->mmap_sem);
190 mmput(mm);
191 }
192
193 static struct vm_area_struct *
194 m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma)
195 {
196 if (vma == priv->tail_vma)
197 return NULL;
198 return vma->vm_next ?: priv->tail_vma;
199 }
200
201 static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma)
202 {
203 if (m->count < m->size) /* vma is copied successfully */
204 m->version = m_next_vma(m->private, vma) ? vma->vm_end : -1UL;
205 }
206
207 static void *m_start(struct seq_file *m, loff_t *ppos)
208 {
209 struct proc_maps_private *priv = m->private;
210 unsigned long last_addr = m->version;
211 struct mm_struct *mm;
212 struct vm_area_struct *vma;
213 unsigned int pos = *ppos;
214
215 /* See m_cache_vma(). Zero at the start or after lseek. */
216 if (last_addr == -1UL)
217 return NULL;
218
219 priv->task = get_proc_task(priv->inode);
220 if (!priv->task)
221 return ERR_PTR(-ESRCH);
222
223 mm = priv->mm;
224 if (!mm || !mmget_not_zero(mm))
225 return NULL;
226
227 down_read(&mm->mmap_sem);
228 hold_task_mempolicy(priv);
229 priv->tail_vma = get_gate_vma(mm);
230
231 if (last_addr) {
232 vma = find_vma(mm, last_addr - 1);
233 if (vma && vma->vm_start <= last_addr)
234 vma = m_next_vma(priv, vma);
235 if (vma)
236 return vma;
237 }
238
239 m->version = 0;
240 if (pos < mm->map_count) {
241 for (vma = mm->mmap; pos; pos--) {
242 m->version = vma->vm_start;
243 vma = vma->vm_next;
244 }
245 return vma;
246 }
247
248 /* we do not bother to update m->version in this case */
249 if (pos == mm->map_count && priv->tail_vma)
250 return priv->tail_vma;
251
252 vma_stop(priv);
253 return NULL;
254 }
255
256 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
257 {
258 struct proc_maps_private *priv = m->private;
259 struct vm_area_struct *next;
260
261 (*pos)++;
262 next = m_next_vma(priv, v);
263 if (!next)
264 vma_stop(priv);
265 return next;
266 }
267
268 static void m_stop(struct seq_file *m, void *v)
269 {
270 struct proc_maps_private *priv = m->private;
271
272 if (!IS_ERR_OR_NULL(v))
273 vma_stop(priv);
274 if (priv->task) {
275 put_task_struct(priv->task);
276 priv->task = NULL;
277 }
278 }
279
280 static int proc_maps_open(struct inode *inode, struct file *file,
281 const struct seq_operations *ops, int psize)
282 {
283 struct proc_maps_private *priv = __seq_open_private(file, ops, psize);
284
285 if (!priv)
286 return -ENOMEM;
287
288 priv->inode = inode;
289 priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
290 if (IS_ERR(priv->mm)) {
291 int err = PTR_ERR(priv->mm);
292
293 seq_release_private(inode, file);
294 return err;
295 }
296
297 return 0;
298 }
299
300 static int proc_map_release(struct inode *inode, struct file *file)
301 {
302 struct seq_file *seq = file->private_data;
303 struct proc_maps_private *priv = seq->private;
304
305 if (priv->mm)
306 mmdrop(priv->mm);
307
308 kfree(priv->rollup);
309 return seq_release_private(inode, file);
310 }
311
312 static int do_maps_open(struct inode *inode, struct file *file,
313 const struct seq_operations *ops)
314 {
315 return proc_maps_open(inode, file, ops,
316 sizeof(struct proc_maps_private));
317 }
318
319 /*
320 * Indicate if the VMA is a stack for the given task; for
321 * /proc/PID/maps that is the stack of the main task.
322 */
323 static int is_stack(struct vm_area_struct *vma)
324 {
325 /*
326 * We make no effort to guess what a given thread considers to be
327 * its "stack". It's not even well-defined for programs written
328 * languages like Go.
329 */
330 return vma->vm_start <= vma->vm_mm->start_stack &&
331 vma->vm_end >= vma->vm_mm->start_stack;
332 }
333
334 static void show_vma_header_prefix(struct seq_file *m,
335 unsigned long start, unsigned long end,
336 vm_flags_t flags, unsigned long long pgoff,
337 dev_t dev, unsigned long ino)
338 {
339 seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
340 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
341 start,
342 end,
343 flags & VM_READ ? 'r' : '-',
344 flags & VM_WRITE ? 'w' : '-',
345 flags & VM_EXEC ? 'x' : '-',
346 flags & VM_MAYSHARE ? 's' : 'p',
347 pgoff,
348 MAJOR(dev), MINOR(dev), ino);
349 }
350
351 static void
352 show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
353 {
354 struct mm_struct *mm = vma->vm_mm;
355 struct file *file = vma->vm_file;
356 vm_flags_t flags = vma->vm_flags;
357 unsigned long ino = 0;
358 unsigned long long pgoff = 0;
359 unsigned long start, end;
360 dev_t dev = 0;
361 const char *name = NULL;
362
363 if (file) {
364 struct inode *inode = file_inode(vma->vm_file);
365 dev = inode->i_sb->s_dev;
366 ino = inode->i_ino;
367 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
368 }
369
370 start = vma->vm_start;
371 end = vma->vm_end;
372 show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
373
374 /*
375 * Print the dentry name for named mappings, and a
376 * special [heap] marker for the heap:
377 */
378 if (file) {
379 seq_pad(m, ' ');
380 seq_file_path(m, file, "\n");
381 goto done;
382 }
383
384 if (vma->vm_ops && vma->vm_ops->name) {
385 name = vma->vm_ops->name(vma);
386 if (name)
387 goto done;
388 }
389
390 name = arch_vma_name(vma);
391 if (!name) {
392 if (!mm) {
393 name = "[vdso]";
394 goto done;
395 }
396
397 if (vma->vm_start <= mm->brk &&
398 vma->vm_end >= mm->start_brk) {
399 name = "[heap]";
400 goto done;
401 }
402
403 if (is_stack(vma)) {
404 name = "[stack]";
405 goto done;
406 }
407
408 if (vma_get_anon_name(vma)) {
409 seq_pad(m, ' ');
410 seq_print_vma_name(m, vma);
411 }
412 }
413
414 done:
415 if (name) {
416 seq_pad(m, ' ');
417 seq_puts(m, name);
418 }
419 seq_putc(m, '\n');
420 }
421
422 static int show_map(struct seq_file *m, void *v, int is_pid)
423 {
424 show_map_vma(m, v, is_pid);
425 m_cache_vma(m, v);
426 return 0;
427 }
428
429 static int show_pid_map(struct seq_file *m, void *v)
430 {
431 return show_map(m, v, 1);
432 }
433
434 static int show_tid_map(struct seq_file *m, void *v)
435 {
436 return show_map(m, v, 0);
437 }
438
439 static const struct seq_operations proc_pid_maps_op = {
440 .start = m_start,
441 .next = m_next,
442 .stop = m_stop,
443 .show = show_pid_map
444 };
445
446 static const struct seq_operations proc_tid_maps_op = {
447 .start = m_start,
448 .next = m_next,
449 .stop = m_stop,
450 .show = show_tid_map
451 };
452
453 static int pid_maps_open(struct inode *inode, struct file *file)
454 {
455 return do_maps_open(inode, file, &proc_pid_maps_op);
456 }
457
458 static int tid_maps_open(struct inode *inode, struct file *file)
459 {
460 return do_maps_open(inode, file, &proc_tid_maps_op);
461 }
462
463 const struct file_operations proc_pid_maps_operations = {
464 .open = pid_maps_open,
465 .read = seq_read,
466 .llseek = seq_lseek,
467 .release = proc_map_release,
468 };
469
470 const struct file_operations proc_tid_maps_operations = {
471 .open = tid_maps_open,
472 .read = seq_read,
473 .llseek = seq_lseek,
474 .release = proc_map_release,
475 };
476
477 /*
478 * Proportional Set Size(PSS): my share of RSS.
479 *
480 * PSS of a process is the count of pages it has in memory, where each
481 * page is divided by the number of processes sharing it. So if a
482 * process has 1000 pages all to itself, and 1000 shared with one other
483 * process, its PSS will be 1500.
484 *
485 * To keep (accumulated) division errors low, we adopt a 64bit
486 * fixed-point pss counter to minimize division errors. So (pss >>
487 * PSS_SHIFT) would be the real byte count.
488 *
489 * A shift of 12 before division means (assuming 4K page size):
490 * - 1M 3-user-pages add up to 8KB errors;
491 * - supports mapcount up to 2^24, or 16M;
492 * - supports PSS up to 2^52 bytes, or 4PB.
493 */
494 #define PSS_SHIFT 12
495
496 #ifdef CONFIG_PROC_PAGE_MONITOR
497 struct mem_size_stats {
498 bool first;
499 unsigned long resident;
500 unsigned long shared_clean;
501 unsigned long shared_dirty;
502 unsigned long private_clean;
503 unsigned long private_dirty;
504 unsigned long referenced;
505 unsigned long anonymous;
506 unsigned long lazyfree;
507 unsigned long anonymous_thp;
508 unsigned long shmem_thp;
509 unsigned long swap;
510 unsigned long shared_hugetlb;
511 unsigned long private_hugetlb;
512 unsigned long first_vma_start;
513 u64 pss;
514 u64 pss_locked;
515 u64 swap_pss;
516 bool check_shmem_swap;
517 };
518
519 static void smaps_account(struct mem_size_stats *mss, struct page *page,
520 bool compound, bool young, bool dirty, bool locked)
521 {
522 int i, nr = compound ? 1 << compound_order(page) : 1;
523 unsigned long size = nr * PAGE_SIZE;
524
525 if (PageAnon(page)) {
526 mss->anonymous += size;
527 if (!PageSwapBacked(page) && !dirty && !PageDirty(page))
528 mss->lazyfree += size;
529 }
530
531 mss->resident += size;
532 /* Accumulate the size in pages that have been accessed. */
533 if (young || page_is_young(page) || PageReferenced(page))
534 mss->referenced += size;
535
536 /*
537 * page_count(page) == 1 guarantees the page is mapped exactly once.
538 * If any subpage of the compound page mapped with PTE it would elevate
539 * page_count().
540 */
541 if (page_count(page) == 1) {
542 if (dirty || PageDirty(page))
543 mss->private_dirty += size;
544 else
545 mss->private_clean += size;
546 mss->pss += (u64)size << PSS_SHIFT;
547 if (locked)
548 mss->pss_locked += (u64)size << PSS_SHIFT;
549 return;
550 }
551
552 for (i = 0; i < nr; i++, page++) {
553 int mapcount = page_mapcount(page);
554 unsigned long pss = (PAGE_SIZE << PSS_SHIFT);
555
556 if (mapcount >= 2) {
557 if (dirty || PageDirty(page))
558 mss->shared_dirty += PAGE_SIZE;
559 else
560 mss->shared_clean += PAGE_SIZE;
561 mss->pss += pss / mapcount;
562 if (locked)
563 mss->pss_locked += pss / mapcount;
564 } else {
565 if (dirty || PageDirty(page))
566 mss->private_dirty += PAGE_SIZE;
567 else
568 mss->private_clean += PAGE_SIZE;
569 mss->pss += pss;
570 if (locked)
571 mss->pss_locked += pss;
572 }
573 }
574 }
575
576 #ifdef CONFIG_SHMEM
577 static int smaps_pte_hole(unsigned long addr, unsigned long end,
578 struct mm_walk *walk)
579 {
580 struct mem_size_stats *mss = walk->private;
581
582 mss->swap += shmem_partial_swap_usage(
583 walk->vma->vm_file->f_mapping, addr, end);
584
585 return 0;
586 }
587 #endif
588
589 static void smaps_pte_entry(pte_t *pte, unsigned long addr,
590 struct mm_walk *walk)
591 {
592 struct mem_size_stats *mss = walk->private;
593 struct vm_area_struct *vma = walk->vma;
594 bool locked = !!(vma->vm_flags & VM_LOCKED);
595 struct page *page = NULL;
596
597 if (pte_present(*pte)) {
598 page = vm_normal_page(vma, addr, *pte);
599 } else if (is_swap_pte(*pte)) {
600 swp_entry_t swpent = pte_to_swp_entry(*pte);
601
602 if (!non_swap_entry(swpent)) {
603 int mapcount;
604
605 mss->swap += PAGE_SIZE;
606 mapcount = swp_swapcount(swpent);
607 if (mapcount >= 2) {
608 u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
609
610 do_div(pss_delta, mapcount);
611 mss->swap_pss += pss_delta;
612 } else {
613 mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
614 }
615 } else if (is_migration_entry(swpent))
616 page = migration_entry_to_page(swpent);
617 else if (is_device_private_entry(swpent))
618 page = device_private_entry_to_page(swpent);
619 } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
620 && pte_none(*pte))) {
621 page = find_get_entry(vma->vm_file->f_mapping,
622 linear_page_index(vma, addr));
623 if (!page)
624 return;
625
626 if (radix_tree_exceptional_entry(page))
627 mss->swap += PAGE_SIZE;
628 else
629 put_page(page);
630
631 return;
632 }
633
634 if (!page)
635 return;
636
637 smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
638 }
639
640 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
641 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
642 struct mm_walk *walk)
643 {
644 struct mem_size_stats *mss = walk->private;
645 struct vm_area_struct *vma = walk->vma;
646 bool locked = !!(vma->vm_flags & VM_LOCKED);
647 struct page *page;
648
649 /* FOLL_DUMP will return -EFAULT on huge zero page */
650 page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
651 if (IS_ERR_OR_NULL(page))
652 return;
653 if (PageAnon(page))
654 mss->anonymous_thp += HPAGE_PMD_SIZE;
655 else if (PageSwapBacked(page))
656 mss->shmem_thp += HPAGE_PMD_SIZE;
657 else if (is_zone_device_page(page))
658 /* pass */;
659 else
660 VM_BUG_ON_PAGE(1, page);
661 smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
662 }
663 #else
664 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
665 struct mm_walk *walk)
666 {
667 }
668 #endif
669
670 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
671 struct mm_walk *walk)
672 {
673 struct vm_area_struct *vma = walk->vma;
674 pte_t *pte;
675 spinlock_t *ptl;
676
677 ptl = pmd_trans_huge_lock(pmd, vma);
678 if (ptl) {
679 if (pmd_present(*pmd))
680 smaps_pmd_entry(pmd, addr, walk);
681 spin_unlock(ptl);
682 goto out;
683 }
684
685 if (pmd_trans_unstable(pmd))
686 goto out;
687 /*
688 * The mmap_sem held all the way back in m_start() is what
689 * keeps khugepaged out of here and from collapsing things
690 * in here.
691 */
692 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
693 for (; addr != end; pte++, addr += PAGE_SIZE)
694 smaps_pte_entry(pte, addr, walk);
695 pte_unmap_unlock(pte - 1, ptl);
696 out:
697 cond_resched();
698 return 0;
699 }
700
701 static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
702 {
703 /*
704 * Don't forget to update Documentation/ on changes.
705 */
706 static const char mnemonics[BITS_PER_LONG][2] = {
707 /*
708 * In case if we meet a flag we don't know about.
709 */
710 [0 ... (BITS_PER_LONG-1)] = "??",
711
712 [ilog2(VM_READ)] = "rd",
713 [ilog2(VM_WRITE)] = "wr",
714 [ilog2(VM_EXEC)] = "ex",
715 [ilog2(VM_SHARED)] = "sh",
716 [ilog2(VM_MAYREAD)] = "mr",
717 [ilog2(VM_MAYWRITE)] = "mw",
718 [ilog2(VM_MAYEXEC)] = "me",
719 [ilog2(VM_MAYSHARE)] = "ms",
720 [ilog2(VM_GROWSDOWN)] = "gd",
721 [ilog2(VM_PFNMAP)] = "pf",
722 [ilog2(VM_DENYWRITE)] = "dw",
723 #ifdef CONFIG_X86_INTEL_MPX
724 [ilog2(VM_MPX)] = "mp",
725 #endif
726 [ilog2(VM_LOCKED)] = "lo",
727 [ilog2(VM_IO)] = "io",
728 [ilog2(VM_SEQ_READ)] = "sr",
729 [ilog2(VM_RAND_READ)] = "rr",
730 [ilog2(VM_DONTCOPY)] = "dc",
731 [ilog2(VM_DONTEXPAND)] = "de",
732 [ilog2(VM_ACCOUNT)] = "ac",
733 [ilog2(VM_NORESERVE)] = "nr",
734 [ilog2(VM_HUGETLB)] = "ht",
735 [ilog2(VM_ARCH_1)] = "ar",
736 [ilog2(VM_WIPEONFORK)] = "wf",
737 [ilog2(VM_DONTDUMP)] = "dd",
738 #ifdef CONFIG_MEM_SOFT_DIRTY
739 [ilog2(VM_SOFTDIRTY)] = "sd",
740 #endif
741 [ilog2(VM_MIXEDMAP)] = "mm",
742 [ilog2(VM_HUGEPAGE)] = "hg",
743 [ilog2(VM_NOHUGEPAGE)] = "nh",
744 [ilog2(VM_MERGEABLE)] = "mg",
745 [ilog2(VM_UFFD_MISSING)]= "um",
746 [ilog2(VM_UFFD_WP)] = "uw",
747 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
748 /* These come out via ProtectionKey: */
749 [ilog2(VM_PKEY_BIT0)] = "",
750 [ilog2(VM_PKEY_BIT1)] = "",
751 [ilog2(VM_PKEY_BIT2)] = "",
752 [ilog2(VM_PKEY_BIT3)] = "",
753 #endif
754 };
755 size_t i;
756
757 seq_puts(m, "VmFlags: ");
758 for (i = 0; i < BITS_PER_LONG; i++) {
759 if (!mnemonics[i][0])
760 continue;
761 if (vma->vm_flags & (1UL << i)) {
762 seq_printf(m, "%c%c ",
763 mnemonics[i][0], mnemonics[i][1]);
764 }
765 }
766 seq_putc(m, '\n');
767 }
768
769 #ifdef CONFIG_HUGETLB_PAGE
770 static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
771 unsigned long addr, unsigned long end,
772 struct mm_walk *walk)
773 {
774 struct mem_size_stats *mss = walk->private;
775 struct vm_area_struct *vma = walk->vma;
776 struct page *page = NULL;
777
778 if (pte_present(*pte)) {
779 page = vm_normal_page(vma, addr, *pte);
780 } else if (is_swap_pte(*pte)) {
781 swp_entry_t swpent = pte_to_swp_entry(*pte);
782
783 if (is_migration_entry(swpent))
784 page = migration_entry_to_page(swpent);
785 else if (is_device_private_entry(swpent))
786 page = device_private_entry_to_page(swpent);
787 }
788 if (page) {
789 int mapcount = page_mapcount(page);
790
791 if (mapcount >= 2)
792 mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
793 else
794 mss->private_hugetlb += huge_page_size(hstate_vma(vma));
795 }
796 return 0;
797 }
798 #endif /* HUGETLB_PAGE */
799
800 void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
801 {
802 }
803
804 static int show_smap(struct seq_file *m, void *v, int is_pid)
805 {
806 struct proc_maps_private *priv = m->private;
807 struct vm_area_struct *vma = v;
808 struct mem_size_stats mss_stack;
809 struct mem_size_stats *mss;
810 struct mm_walk smaps_walk = {
811 .pmd_entry = smaps_pte_range,
812 #ifdef CONFIG_HUGETLB_PAGE
813 .hugetlb_entry = smaps_hugetlb_range,
814 #endif
815 .mm = vma->vm_mm,
816 };
817 int ret = 0;
818 bool rollup_mode;
819 bool last_vma;
820
821 if (priv->rollup) {
822 rollup_mode = true;
823 mss = priv->rollup;
824 if (mss->first) {
825 mss->first_vma_start = vma->vm_start;
826 mss->first = false;
827 }
828 last_vma = !m_next_vma(priv, vma);
829 } else {
830 rollup_mode = false;
831 memset(&mss_stack, 0, sizeof(mss_stack));
832 mss = &mss_stack;
833 }
834
835 smaps_walk.private = mss;
836
837 #ifdef CONFIG_SHMEM
838 /* In case of smaps_rollup, reset the value from previous vma */
839 mss->check_shmem_swap = false;
840 if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
841 /*
842 * For shared or readonly shmem mappings we know that all
843 * swapped out pages belong to the shmem object, and we can
844 * obtain the swap value much more efficiently. For private
845 * writable mappings, we might have COW pages that are
846 * not affected by the parent swapped out pages of the shmem
847 * object, so we have to distinguish them during the page walk.
848 * Unless we know that the shmem object (or the part mapped by
849 * our VMA) has no swapped out pages at all.
850 */
851 unsigned long shmem_swapped = shmem_swap_usage(vma);
852
853 if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
854 !(vma->vm_flags & VM_WRITE)) {
855 mss->swap += shmem_swapped;
856 } else {
857 mss->check_shmem_swap = true;
858 smaps_walk.pte_hole = smaps_pte_hole;
859 }
860 }
861 #endif
862 /* mmap_sem is held in m_start */
863 walk_page_vma(vma, &smaps_walk);
864
865 if (!rollup_mode) {
866 show_map_vma(m, vma, is_pid);
867 if (vma_get_anon_name(vma)) {
868 seq_puts(m, "Name: ");
869 seq_print_vma_name(m, vma);
870 seq_putc(m, '\n');
871 }
872 } else if (last_vma) {
873 show_vma_header_prefix(
874 m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0);
875 seq_pad(m, ' ');
876 seq_puts(m, "[rollup]\n");
877 } else {
878 ret = SEQ_SKIP;
879 }
880
881 if (!rollup_mode)
882 seq_printf(m,
883 "Size: %8lu kB\n"
884 "KernelPageSize: %8lu kB\n"
885 "MMUPageSize: %8lu kB\n",
886 (vma->vm_end - vma->vm_start) >> 10,
887 vma_kernel_pagesize(vma) >> 10,
888 vma_mmu_pagesize(vma) >> 10);
889
890
891 if (!rollup_mode || last_vma)
892 seq_printf(m,
893 "Rss: %8lu kB\n"
894 "Pss: %8lu kB\n"
895 "Shared_Clean: %8lu kB\n"
896 "Shared_Dirty: %8lu kB\n"
897 "Private_Clean: %8lu kB\n"
898 "Private_Dirty: %8lu kB\n"
899 "Referenced: %8lu kB\n"
900 "Anonymous: %8lu kB\n"
901 "LazyFree: %8lu kB\n"
902 "AnonHugePages: %8lu kB\n"
903 "ShmemPmdMapped: %8lu kB\n"
904 "Shared_Hugetlb: %8lu kB\n"
905 "Private_Hugetlb: %7lu kB\n"
906 "Swap: %8lu kB\n"
907 "SwapPss: %8lu kB\n"
908 "Locked: %8lu kB\n",
909 mss->resident >> 10,
910 (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
911 mss->shared_clean >> 10,
912 mss->shared_dirty >> 10,
913 mss->private_clean >> 10,
914 mss->private_dirty >> 10,
915 mss->referenced >> 10,
916 mss->anonymous >> 10,
917 mss->lazyfree >> 10,
918 mss->anonymous_thp >> 10,
919 mss->shmem_thp >> 10,
920 mss->shared_hugetlb >> 10,
921 mss->private_hugetlb >> 10,
922 mss->swap >> 10,
923 (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
924 (unsigned long)(mss->pss_locked >> (10 + PSS_SHIFT)));
925
926 if (!rollup_mode) {
927 arch_show_smap(m, vma);
928 show_smap_vma_flags(m, vma);
929 }
930 m_cache_vma(m, vma);
931 return ret;
932 }
933
934 static int show_pid_smap(struct seq_file *m, void *v)
935 {
936 return show_smap(m, v, 1);
937 }
938
939 static int show_tid_smap(struct seq_file *m, void *v)
940 {
941 return show_smap(m, v, 0);
942 }
943
944 static const struct seq_operations proc_pid_smaps_op = {
945 .start = m_start,
946 .next = m_next,
947 .stop = m_stop,
948 .show = show_pid_smap
949 };
950
951 static const struct seq_operations proc_tid_smaps_op = {
952 .start = m_start,
953 .next = m_next,
954 .stop = m_stop,
955 .show = show_tid_smap
956 };
957
958 static int pid_smaps_open(struct inode *inode, struct file *file)
959 {
960 return do_maps_open(inode, file, &proc_pid_smaps_op);
961 }
962
963 static int pid_smaps_rollup_open(struct inode *inode, struct file *file)
964 {
965 struct seq_file *seq;
966 struct proc_maps_private *priv;
967 int ret = do_maps_open(inode, file, &proc_pid_smaps_op);
968
969 if (ret < 0)
970 return ret;
971 seq = file->private_data;
972 priv = seq->private;
973 priv->rollup = kzalloc(sizeof(*priv->rollup), GFP_KERNEL);
974 if (!priv->rollup) {
975 proc_map_release(inode, file);
976 return -ENOMEM;
977 }
978 priv->rollup->first = true;
979 return 0;
980 }
981
982 static int tid_smaps_open(struct inode *inode, struct file *file)
983 {
984 return do_maps_open(inode, file, &proc_tid_smaps_op);
985 }
986
987 const struct file_operations proc_pid_smaps_operations = {
988 .open = pid_smaps_open,
989 .read = seq_read,
990 .llseek = seq_lseek,
991 .release = proc_map_release,
992 };
993
994 const struct file_operations proc_pid_smaps_rollup_operations = {
995 .open = pid_smaps_rollup_open,
996 .read = seq_read,
997 .llseek = seq_lseek,
998 .release = proc_map_release,
999 };
1000
1001 const struct file_operations proc_tid_smaps_operations = {
1002 .open = tid_smaps_open,
1003 .read = seq_read,
1004 .llseek = seq_lseek,
1005 .release = proc_map_release,
1006 };
1007
1008 enum clear_refs_types {
1009 CLEAR_REFS_ALL = 1,
1010 CLEAR_REFS_ANON,
1011 CLEAR_REFS_MAPPED,
1012 CLEAR_REFS_SOFT_DIRTY,
1013 CLEAR_REFS_MM_HIWATER_RSS,
1014 CLEAR_REFS_LAST,
1015 };
1016
1017 struct clear_refs_private {
1018 enum clear_refs_types type;
1019 };
1020
1021 #ifdef CONFIG_MEM_SOFT_DIRTY
1022 static inline void clear_soft_dirty(struct vm_area_struct *vma,
1023 unsigned long addr, pte_t *pte)
1024 {
1025 /*
1026 * The soft-dirty tracker uses #PF-s to catch writes
1027 * to pages, so write-protect the pte as well. See the
1028 * Documentation/vm/soft-dirty.txt for full description
1029 * of how soft-dirty works.
1030 */
1031 pte_t ptent = *pte;
1032
1033 if (pte_present(ptent)) {
1034 ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte);
1035 ptent = pte_wrprotect(ptent);
1036 ptent = pte_clear_soft_dirty(ptent);
1037 ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent);
1038 } else if (is_swap_pte(ptent)) {
1039 ptent = pte_swp_clear_soft_dirty(ptent);
1040 set_pte_at(vma->vm_mm, addr, pte, ptent);
1041 }
1042 }
1043 #else
1044 static inline void clear_soft_dirty(struct vm_area_struct *vma,
1045 unsigned long addr, pte_t *pte)
1046 {
1047 }
1048 #endif
1049
1050 #if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1051 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
1052 unsigned long addr, pmd_t *pmdp)
1053 {
1054 pmd_t pmd = *pmdp;
1055
1056 if (pmd_present(pmd)) {
1057 /* See comment in change_huge_pmd() */
1058 pmdp_invalidate(vma, addr, pmdp);
1059 if (pmd_dirty(*pmdp))
1060 pmd = pmd_mkdirty(pmd);
1061 if (pmd_young(*pmdp))
1062 pmd = pmd_mkyoung(pmd);
1063
1064 pmd = pmd_wrprotect(pmd);
1065 pmd = pmd_clear_soft_dirty(pmd);
1066
1067 set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
1068 } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
1069 pmd = pmd_swp_clear_soft_dirty(pmd);
1070 set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
1071 }
1072 }
1073 #else
1074 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
1075 unsigned long addr, pmd_t *pmdp)
1076 {
1077 }
1078 #endif
1079
1080 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
1081 unsigned long end, struct mm_walk *walk)
1082 {
1083 struct clear_refs_private *cp = walk->private;
1084 struct vm_area_struct *vma = walk->vma;
1085 pte_t *pte, ptent;
1086 spinlock_t *ptl;
1087 struct page *page;
1088
1089 ptl = pmd_trans_huge_lock(pmd, vma);
1090 if (ptl) {
1091 if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
1092 clear_soft_dirty_pmd(vma, addr, pmd);
1093 goto out;
1094 }
1095
1096 if (!pmd_present(*pmd))
1097 goto out;
1098
1099 page = pmd_page(*pmd);
1100
1101 /* Clear accessed and referenced bits. */
1102 pmdp_test_and_clear_young(vma, addr, pmd);
1103 test_and_clear_page_young(page);
1104 ClearPageReferenced(page);
1105 out:
1106 spin_unlock(ptl);
1107 return 0;
1108 }
1109
1110 if (pmd_trans_unstable(pmd))
1111 return 0;
1112
1113 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
1114 for (; addr != end; pte++, addr += PAGE_SIZE) {
1115 ptent = *pte;
1116
1117 if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
1118 clear_soft_dirty(vma, addr, pte);
1119 continue;
1120 }
1121
1122 if (!pte_present(ptent))
1123 continue;
1124
1125 page = vm_normal_page(vma, addr, ptent);
1126 if (!page)
1127 continue;
1128
1129 /* Clear accessed and referenced bits. */
1130 ptep_test_and_clear_young(vma, addr, pte);
1131 test_and_clear_page_young(page);
1132 ClearPageReferenced(page);
1133 }
1134 pte_unmap_unlock(pte - 1, ptl);
1135 cond_resched();
1136 return 0;
1137 }
1138
1139 static int clear_refs_test_walk(unsigned long start, unsigned long end,
1140 struct mm_walk *walk)
1141 {
1142 struct clear_refs_private *cp = walk->private;
1143 struct vm_area_struct *vma = walk->vma;
1144
1145 if (vma->vm_flags & VM_PFNMAP)
1146 return 1;
1147
1148 /*
1149 * Writing 1 to /proc/pid/clear_refs affects all pages.
1150 * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
1151 * Writing 3 to /proc/pid/clear_refs only affects file mapped pages.
1152 * Writing 4 to /proc/pid/clear_refs affects all pages.
1153 */
1154 if (cp->type == CLEAR_REFS_ANON && vma->vm_file)
1155 return 1;
1156 if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)
1157 return 1;
1158 return 0;
1159 }
1160
1161 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
1162 size_t count, loff_t *ppos)
1163 {
1164 struct task_struct *task;
1165 char buffer[PROC_NUMBUF];
1166 struct mm_struct *mm;
1167 struct vm_area_struct *vma;
1168 enum clear_refs_types type;
1169 struct mmu_gather tlb;
1170 int itype;
1171 int rv;
1172
1173 memset(buffer, 0, sizeof(buffer));
1174 if (count > sizeof(buffer) - 1)
1175 count = sizeof(buffer) - 1;
1176 if (copy_from_user(buffer, buf, count))
1177 return -EFAULT;
1178 rv = kstrtoint(strstrip(buffer), 10, &itype);
1179 if (rv < 0)
1180 return rv;
1181 type = (enum clear_refs_types)itype;
1182 if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
1183 return -EINVAL;
1184
1185 task = get_proc_task(file_inode(file));
1186 if (!task)
1187 return -ESRCH;
1188 mm = get_task_mm(task);
1189 if (mm) {
1190 struct clear_refs_private cp = {
1191 .type = type,
1192 };
1193 struct mm_walk clear_refs_walk = {
1194 .pmd_entry = clear_refs_pte_range,
1195 .test_walk = clear_refs_test_walk,
1196 .mm = mm,
1197 .private = &cp,
1198 };
1199
1200 if (type == CLEAR_REFS_MM_HIWATER_RSS) {
1201 if (down_write_killable(&mm->mmap_sem)) {
1202 count = -EINTR;
1203 goto out_mm;
1204 }
1205
1206 /*
1207 * Writing 5 to /proc/pid/clear_refs resets the peak
1208 * resident set size to this mm's current rss value.
1209 */
1210 reset_mm_hiwater_rss(mm);
1211 up_write(&mm->mmap_sem);
1212 goto out_mm;
1213 }
1214
1215 down_read(&mm->mmap_sem);
1216 tlb_gather_mmu(&tlb, mm, 0, -1);
1217 if (type == CLEAR_REFS_SOFT_DIRTY) {
1218 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1219 if (!(vma->vm_flags & VM_SOFTDIRTY))
1220 continue;
1221 up_read(&mm->mmap_sem);
1222 if (down_write_killable(&mm->mmap_sem)) {
1223 count = -EINTR;
1224 goto out_mm;
1225 }
1226 /*
1227 * Avoid to modify vma->vm_flags
1228 * without locked ops while the
1229 * coredump reads the vm_flags.
1230 */
1231 if (!mmget_still_valid(mm)) {
1232 /*
1233 * Silently return "count"
1234 * like if get_task_mm()
1235 * failed. FIXME: should this
1236 * function have returned
1237 * -ESRCH if get_task_mm()
1238 * failed like if
1239 * get_proc_task() fails?
1240 */
1241 up_write(&mm->mmap_sem);
1242 goto out_mm;
1243 }
1244 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1245 vma->vm_flags &= ~VM_SOFTDIRTY;
1246 vma_set_page_prot(vma);
1247 }
1248 downgrade_write(&mm->mmap_sem);
1249 break;
1250 }
1251 mmu_notifier_invalidate_range_start(mm, 0, -1);
1252 }
1253 walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
1254 if (type == CLEAR_REFS_SOFT_DIRTY)
1255 mmu_notifier_invalidate_range_end(mm, 0, -1);
1256 tlb_finish_mmu(&tlb, 0, -1);
1257 up_read(&mm->mmap_sem);
1258 out_mm:
1259 mmput(mm);
1260 }
1261 put_task_struct(task);
1262
1263 return count;
1264 }
1265
1266 const struct file_operations proc_clear_refs_operations = {
1267 .write = clear_refs_write,
1268 .llseek = noop_llseek,
1269 };
1270
1271 typedef struct {
1272 u64 pme;
1273 } pagemap_entry_t;
1274
1275 struct pagemapread {
1276 int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
1277 pagemap_entry_t *buffer;
1278 bool show_pfn;
1279 };
1280
1281 #define PAGEMAP_WALK_SIZE (PMD_SIZE)
1282 #define PAGEMAP_WALK_MASK (PMD_MASK)
1283
1284 #define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
1285 #define PM_PFRAME_BITS 55
1286 #define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
1287 #define PM_SOFT_DIRTY BIT_ULL(55)
1288 #define PM_MMAP_EXCLUSIVE BIT_ULL(56)
1289 #define PM_FILE BIT_ULL(61)
1290 #define PM_SWAP BIT_ULL(62)
1291 #define PM_PRESENT BIT_ULL(63)
1292
1293 #define PM_END_OF_BUFFER 1
1294
1295 static inline pagemap_entry_t make_pme(u64 frame, u64 flags)
1296 {
1297 return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags };
1298 }
1299
1300 static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
1301 struct pagemapread *pm)
1302 {
1303 pm->buffer[pm->pos++] = *pme;
1304 if (pm->pos >= pm->len)
1305 return PM_END_OF_BUFFER;
1306 return 0;
1307 }
1308
1309 static int pagemap_pte_hole(unsigned long start, unsigned long end,
1310 struct mm_walk *walk)
1311 {
1312 struct pagemapread *pm = walk->private;
1313 unsigned long addr = start;
1314 int err = 0;
1315
1316 while (addr < end) {
1317 struct vm_area_struct *vma = find_vma(walk->mm, addr);
1318 pagemap_entry_t pme = make_pme(0, 0);
1319 /* End of address space hole, which we mark as non-present. */
1320 unsigned long hole_end;
1321
1322 if (vma)
1323 hole_end = min(end, vma->vm_start);
1324 else
1325 hole_end = end;
1326
1327 for (; addr < hole_end; addr += PAGE_SIZE) {
1328 err = add_to_pagemap(addr, &pme, pm);
1329 if (err)
1330 goto out;
1331 }
1332
1333 if (!vma)
1334 break;
1335
1336 /* Addresses in the VMA. */
1337 if (vma->vm_flags & VM_SOFTDIRTY)
1338 pme = make_pme(0, PM_SOFT_DIRTY);
1339 for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
1340 err = add_to_pagemap(addr, &pme, pm);
1341 if (err)
1342 goto out;
1343 }
1344 }
1345 out:
1346 return err;
1347 }
1348
1349 static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
1350 struct vm_area_struct *vma, unsigned long addr, pte_t pte)
1351 {
1352 u64 frame = 0, flags = 0;
1353 struct page *page = NULL;
1354
1355 if (pte_present(pte)) {
1356 if (pm->show_pfn)
1357 frame = pte_pfn(pte);
1358 flags |= PM_PRESENT;
1359 page = _vm_normal_page(vma, addr, pte, true);
1360 if (pte_soft_dirty(pte))
1361 flags |= PM_SOFT_DIRTY;
1362 } else if (is_swap_pte(pte)) {
1363 swp_entry_t entry;
1364 if (pte_swp_soft_dirty(pte))
1365 flags |= PM_SOFT_DIRTY;
1366 entry = pte_to_swp_entry(pte);
1367 if (pm->show_pfn)
1368 frame = swp_type(entry) |
1369 (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
1370 flags |= PM_SWAP;
1371 if (is_migration_entry(entry))
1372 page = migration_entry_to_page(entry);
1373
1374 if (is_device_private_entry(entry))
1375 page = device_private_entry_to_page(entry);
1376 }
1377
1378 if (page && !PageAnon(page))
1379 flags |= PM_FILE;
1380 if (page && page_mapcount(page) == 1)
1381 flags |= PM_MMAP_EXCLUSIVE;
1382 if (vma->vm_flags & VM_SOFTDIRTY)
1383 flags |= PM_SOFT_DIRTY;
1384
1385 return make_pme(frame, flags);
1386 }
1387
1388 static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
1389 struct mm_walk *walk)
1390 {
1391 struct vm_area_struct *vma = walk->vma;
1392 struct pagemapread *pm = walk->private;
1393 spinlock_t *ptl;
1394 pte_t *pte, *orig_pte;
1395 int err = 0;
1396
1397 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1398 ptl = pmd_trans_huge_lock(pmdp, vma);
1399 if (ptl) {
1400 u64 flags = 0, frame = 0;
1401 pmd_t pmd = *pmdp;
1402 struct page *page = NULL;
1403
1404 if (vma->vm_flags & VM_SOFTDIRTY)
1405 flags |= PM_SOFT_DIRTY;
1406
1407 if (pmd_present(pmd)) {
1408 page = pmd_page(pmd);
1409
1410 flags |= PM_PRESENT;
1411 if (pmd_soft_dirty(pmd))
1412 flags |= PM_SOFT_DIRTY;
1413 if (pm->show_pfn)
1414 frame = pmd_pfn(pmd) +
1415 ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1416 }
1417 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1418 else if (is_swap_pmd(pmd)) {
1419 swp_entry_t entry = pmd_to_swp_entry(pmd);
1420 unsigned long offset;
1421
1422 if (pm->show_pfn) {
1423 offset = swp_offset(entry) +
1424 ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1425 frame = swp_type(entry) |
1426 (offset << MAX_SWAPFILES_SHIFT);
1427 }
1428 flags |= PM_SWAP;
1429 if (pmd_swp_soft_dirty(pmd))
1430 flags |= PM_SOFT_DIRTY;
1431 VM_BUG_ON(!is_pmd_migration_entry(pmd));
1432 page = migration_entry_to_page(entry);
1433 }
1434 #endif
1435
1436 if (page && page_mapcount(page) == 1)
1437 flags |= PM_MMAP_EXCLUSIVE;
1438
1439 for (; addr != end; addr += PAGE_SIZE) {
1440 pagemap_entry_t pme = make_pme(frame, flags);
1441
1442 err = add_to_pagemap(addr, &pme, pm);
1443 if (err)
1444 break;
1445 if (pm->show_pfn) {
1446 if (flags & PM_PRESENT)
1447 frame++;
1448 else if (flags & PM_SWAP)
1449 frame += (1 << MAX_SWAPFILES_SHIFT);
1450 }
1451 }
1452 spin_unlock(ptl);
1453 return err;
1454 }
1455
1456 if (pmd_trans_unstable(pmdp))
1457 return 0;
1458 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1459
1460 /*
1461 * We can assume that @vma always points to a valid one and @end never
1462 * goes beyond vma->vm_end.
1463 */
1464 orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
1465 for (; addr < end; pte++, addr += PAGE_SIZE) {
1466 pagemap_entry_t pme;
1467
1468 pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
1469 err = add_to_pagemap(addr, &pme, pm);
1470 if (err)
1471 break;
1472 }
1473 pte_unmap_unlock(orig_pte, ptl);
1474
1475 cond_resched();
1476
1477 return err;
1478 }
1479
1480 #ifdef CONFIG_HUGETLB_PAGE
1481 /* This function walks within one hugetlb entry in the single call */
1482 static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
1483 unsigned long addr, unsigned long end,
1484 struct mm_walk *walk)
1485 {
1486 struct pagemapread *pm = walk->private;
1487 struct vm_area_struct *vma = walk->vma;
1488 u64 flags = 0, frame = 0;
1489 int err = 0;
1490 pte_t pte;
1491
1492 if (vma->vm_flags & VM_SOFTDIRTY)
1493 flags |= PM_SOFT_DIRTY;
1494
1495 pte = huge_ptep_get(ptep);
1496 if (pte_present(pte)) {
1497 struct page *page = pte_page(pte);
1498
1499 if (!PageAnon(page))
1500 flags |= PM_FILE;
1501
1502 if (page_mapcount(page) == 1)
1503 flags |= PM_MMAP_EXCLUSIVE;
1504
1505 flags |= PM_PRESENT;
1506 if (pm->show_pfn)
1507 frame = pte_pfn(pte) +
1508 ((addr & ~hmask) >> PAGE_SHIFT);
1509 }
1510
1511 for (; addr != end; addr += PAGE_SIZE) {
1512 pagemap_entry_t pme = make_pme(frame, flags);
1513
1514 err = add_to_pagemap(addr, &pme, pm);
1515 if (err)
1516 return err;
1517 if (pm->show_pfn && (flags & PM_PRESENT))
1518 frame++;
1519 }
1520
1521 cond_resched();
1522
1523 return err;
1524 }
1525 #endif /* HUGETLB_PAGE */
1526
1527 /*
1528 * /proc/pid/pagemap - an array mapping virtual pages to pfns
1529 *
1530 * For each page in the address space, this file contains one 64-bit entry
1531 * consisting of the following:
1532 *
1533 * Bits 0-54 page frame number (PFN) if present
1534 * Bits 0-4 swap type if swapped
1535 * Bits 5-54 swap offset if swapped
1536 * Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
1537 * Bit 56 page exclusively mapped
1538 * Bits 57-60 zero
1539 * Bit 61 page is file-page or shared-anon
1540 * Bit 62 page swapped
1541 * Bit 63 page present
1542 *
1543 * If the page is not present but in swap, then the PFN contains an
1544 * encoding of the swap file number and the page's offset into the
1545 * swap. Unmapped pages return a null PFN. This allows determining
1546 * precisely which pages are mapped (or in swap) and comparing mapped
1547 * pages between processes.
1548 *
1549 * Efficient users of this interface will use /proc/pid/maps to
1550 * determine which areas of memory are actually mapped and llseek to
1551 * skip over unmapped regions.
1552 */
1553 static ssize_t pagemap_read(struct file *file, char __user *buf,
1554 size_t count, loff_t *ppos)
1555 {
1556 struct mm_struct *mm = file->private_data;
1557 struct pagemapread pm;
1558 struct mm_walk pagemap_walk = {};
1559 unsigned long src;
1560 unsigned long svpfn;
1561 unsigned long start_vaddr;
1562 unsigned long end_vaddr;
1563 int ret = 0, copied = 0;
1564
1565 if (!mm || !mmget_not_zero(mm))
1566 goto out;
1567
1568 ret = -EINVAL;
1569 /* file position must be aligned */
1570 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
1571 goto out_mm;
1572
1573 ret = 0;
1574 if (!count)
1575 goto out_mm;
1576
1577 /* do not disclose physical addresses: attack vector */
1578 pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
1579
1580 pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
1581 pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_KERNEL);
1582 ret = -ENOMEM;
1583 if (!pm.buffer)
1584 goto out_mm;
1585
1586 pagemap_walk.pmd_entry = pagemap_pmd_range;
1587 pagemap_walk.pte_hole = pagemap_pte_hole;
1588 #ifdef CONFIG_HUGETLB_PAGE
1589 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
1590 #endif
1591 pagemap_walk.mm = mm;
1592 pagemap_walk.private = &pm;
1593
1594 src = *ppos;
1595 svpfn = src / PM_ENTRY_BYTES;
1596 start_vaddr = svpfn << PAGE_SHIFT;
1597 end_vaddr = mm->task_size;
1598
1599 /* watch out for wraparound */
1600 if (svpfn > mm->task_size >> PAGE_SHIFT)
1601 start_vaddr = end_vaddr;
1602
1603 /*
1604 * The odds are that this will stop walking way
1605 * before end_vaddr, because the length of the
1606 * user buffer is tracked in "pm", and the walk
1607 * will stop when we hit the end of the buffer.
1608 */
1609 ret = 0;
1610 while (count && (start_vaddr < end_vaddr)) {
1611 int len;
1612 unsigned long end;
1613
1614 pm.pos = 0;
1615 end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
1616 /* overflow ? */
1617 if (end < start_vaddr || end > end_vaddr)
1618 end = end_vaddr;
1619 down_read(&mm->mmap_sem);
1620 ret = walk_page_range(start_vaddr, end, &pagemap_walk);
1621 up_read(&mm->mmap_sem);
1622 start_vaddr = end;
1623
1624 len = min(count, PM_ENTRY_BYTES * pm.pos);
1625 if (copy_to_user(buf, pm.buffer, len)) {
1626 ret = -EFAULT;
1627 goto out_free;
1628 }
1629 copied += len;
1630 buf += len;
1631 count -= len;
1632 }
1633 *ppos += copied;
1634 if (!ret || ret == PM_END_OF_BUFFER)
1635 ret = copied;
1636
1637 out_free:
1638 kfree(pm.buffer);
1639 out_mm:
1640 mmput(mm);
1641 out:
1642 return ret;
1643 }
1644
1645 static int pagemap_open(struct inode *inode, struct file *file)
1646 {
1647 struct mm_struct *mm;
1648
1649 mm = proc_mem_open(inode, PTRACE_MODE_READ);
1650 if (IS_ERR(mm))
1651 return PTR_ERR(mm);
1652 file->private_data = mm;
1653 return 0;
1654 }
1655
1656 static int pagemap_release(struct inode *inode, struct file *file)
1657 {
1658 struct mm_struct *mm = file->private_data;
1659
1660 if (mm)
1661 mmdrop(mm);
1662 return 0;
1663 }
1664
1665 const struct file_operations proc_pagemap_operations = {
1666 .llseek = mem_lseek, /* borrow this */
1667 .read = pagemap_read,
1668 .open = pagemap_open,
1669 .release = pagemap_release,
1670 };
1671 #endif /* CONFIG_PROC_PAGE_MONITOR */
1672
1673 #ifdef CONFIG_PROCESS_RECLAIM
1674 enum reclaim_type {
1675 RECLAIM_FILE,
1676 RECLAIM_ANON,
1677 RECLAIM_ALL,
1678 };
1679
1680 static int reclaim_pmd_range(pmd_t *pmd, unsigned long addr,
1681 unsigned long end, struct mm_walk *walk)
1682 {
1683 pte_t *orig_pte, *pte, ptent;
1684 spinlock_t *ptl;
1685 LIST_HEAD(page_list);
1686 struct page *page;
1687 int isolated = 0;
1688 struct vm_area_struct *vma = walk->vma;
1689
1690 orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
1691 for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
1692 ptent = *pte;
1693 if (!pte_present(ptent))
1694 continue;
1695
1696 page = vm_normal_page(vma, addr, ptent);
1697 if (!page)
1698 continue;
1699 /*
1700 * XXX: we don't handle compound page at this moment but
1701 * it should revisit for THP page before upstream.
1702 */
1703 if (PageCompound(page)) {
1704 unsigned int order = compound_order(page);
1705 unsigned int nr_pages = (1 << order) - 1;
1706
1707 addr += (nr_pages * PAGE_SIZE);
1708 pte += nr_pages;
1709 continue;
1710 }
1711
1712 if (!PageLRU(page))
1713 continue;
1714
1715 if (page_mapcount(page) > 1)
1716 continue;
1717
1718 if (isolate_lru_page(page))
1719 continue;
1720
1721 isolated++;
1722 list_add(&page->lru, &page_list);
1723 if (isolated >= SWAP_CLUSTER_MAX) {
1724 pte_unmap_unlock(orig_pte, ptl);
1725 reclaim_pages(&page_list);
1726 isolated = 0;
1727 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
1728 orig_pte = pte;
1729 }
1730 }
1731
1732 pte_unmap_unlock(orig_pte, ptl);
1733 reclaim_pages(&page_list);
1734
1735 cond_resched();
1736 return 0;
1737 }
1738
1739 static ssize_t reclaim_write(struct file *file, const char __user *buf,
1740 size_t count, loff_t *ppos)
1741 {
1742 struct task_struct *task;
1743 char buffer[PROC_NUMBUF];
1744 struct mm_struct *mm;
1745 struct vm_area_struct *vma;
1746 enum reclaim_type type;
1747 char *type_buf;
1748
1749 if (!capable(CAP_SYS_NICE))
1750 return -EPERM;
1751
1752 memset(buffer, 0, sizeof(buffer));
1753 if (count > sizeof(buffer) - 1)
1754 count = sizeof(buffer) - 1;
1755
1756 if (copy_from_user(buffer, buf, count))
1757 return -EFAULT;
1758
1759 type_buf = strstrip(buffer);
1760 if (!strcmp(type_buf, "file"))
1761 type = RECLAIM_FILE;
1762 else if (!strcmp(type_buf, "anon"))
1763 type = RECLAIM_ANON;
1764 else if (!strcmp(type_buf, "all"))
1765 type = RECLAIM_ALL;
1766 else
1767 return -EINVAL;
1768
1769 task = get_proc_task(file->f_path.dentry->d_inode);
1770 if (!task)
1771 return -ESRCH;
1772
1773 mm = get_task_mm(task);
1774 if (mm) {
1775 struct mm_walk reclaim_walk = {
1776 .pmd_entry = reclaim_pmd_range,
1777 .mm = mm,
1778 };
1779
1780 down_read(&mm->mmap_sem);
1781 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1782 if (is_vm_hugetlb_page(vma))
1783 continue;
1784
1785 if (vma->vm_flags & VM_LOCKED)
1786 continue;
1787
1788 if (type == RECLAIM_ANON && !vma_is_anonymous(vma))
1789 continue;
1790 if (type == RECLAIM_FILE && vma_is_anonymous(vma))
1791 continue;
1792
1793 walk_page_range(vma->vm_start, vma->vm_end,
1794 &reclaim_walk);
1795 }
1796 flush_tlb_mm(mm);
1797 up_read(&mm->mmap_sem);
1798 mmput(mm);
1799 }
1800 put_task_struct(task);
1801
1802 return count;
1803 }
1804
1805 const struct file_operations proc_reclaim_operations = {
1806 .write = reclaim_write,
1807 .llseek = noop_llseek,
1808 };
1809 #endif
1810
1811 #ifdef CONFIG_NUMA
1812
1813 struct numa_maps {
1814 unsigned long pages;
1815 unsigned long anon;
1816 unsigned long active;
1817 unsigned long writeback;
1818 unsigned long mapcount_max;
1819 unsigned long dirty;
1820 unsigned long swapcache;
1821 unsigned long node[MAX_NUMNODES];
1822 };
1823
1824 struct numa_maps_private {
1825 struct proc_maps_private proc_maps;
1826 struct numa_maps md;
1827 };
1828
1829 static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
1830 unsigned long nr_pages)
1831 {
1832 int count = page_mapcount(page);
1833
1834 md->pages += nr_pages;
1835 if (pte_dirty || PageDirty(page))
1836 md->dirty += nr_pages;
1837
1838 if (PageSwapCache(page))
1839 md->swapcache += nr_pages;
1840
1841 if (PageActive(page) || PageUnevictable(page))
1842 md->active += nr_pages;
1843
1844 if (PageWriteback(page))
1845 md->writeback += nr_pages;
1846
1847 if (PageAnon(page))
1848 md->anon += nr_pages;
1849
1850 if (count > md->mapcount_max)
1851 md->mapcount_max = count;
1852
1853 md->node[page_to_nid(page)] += nr_pages;
1854 }
1855
1856 static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
1857 unsigned long addr)
1858 {
1859 struct page *page;
1860 int nid;
1861
1862 if (!pte_present(pte))
1863 return NULL;
1864
1865 page = vm_normal_page(vma, addr, pte);
1866 if (!page)
1867 return NULL;
1868
1869 if (PageReserved(page))
1870 return NULL;
1871
1872 nid = page_to_nid(page);
1873 if (!node_isset(nid, node_states[N_MEMORY]))
1874 return NULL;
1875
1876 return page;
1877 }
1878
1879 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1880 static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
1881 struct vm_area_struct *vma,
1882 unsigned long addr)
1883 {
1884 struct page *page;
1885 int nid;
1886
1887 if (!pmd_present(pmd))
1888 return NULL;
1889
1890 page = vm_normal_page_pmd(vma, addr, pmd);
1891 if (!page)
1892 return NULL;
1893
1894 if (PageReserved(page))
1895 return NULL;
1896
1897 nid = page_to_nid(page);
1898 if (!node_isset(nid, node_states[N_MEMORY]))
1899 return NULL;
1900
1901 return page;
1902 }
1903 #endif
1904
1905 static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
1906 unsigned long end, struct mm_walk *walk)
1907 {
1908 struct numa_maps *md = walk->private;
1909 struct vm_area_struct *vma = walk->vma;
1910 spinlock_t *ptl;
1911 pte_t *orig_pte;
1912 pte_t *pte;
1913
1914 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1915 ptl = pmd_trans_huge_lock(pmd, vma);
1916 if (ptl) {
1917 struct page *page;
1918
1919 page = can_gather_numa_stats_pmd(*pmd, vma, addr);
1920 if (page)
1921 gather_stats(page, md, pmd_dirty(*pmd),
1922 HPAGE_PMD_SIZE/PAGE_SIZE);
1923 spin_unlock(ptl);
1924 return 0;
1925 }
1926
1927 if (pmd_trans_unstable(pmd))
1928 return 0;
1929 #endif
1930 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
1931 do {
1932 struct page *page = can_gather_numa_stats(*pte, vma, addr);
1933 if (!page)
1934 continue;
1935 gather_stats(page, md, pte_dirty(*pte), 1);
1936
1937 } while (pte++, addr += PAGE_SIZE, addr != end);
1938 pte_unmap_unlock(orig_pte, ptl);
1939 cond_resched();
1940 return 0;
1941 }
1942 #ifdef CONFIG_HUGETLB_PAGE
1943 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1944 unsigned long addr, unsigned long end, struct mm_walk *walk)
1945 {
1946 pte_t huge_pte = huge_ptep_get(pte);
1947 struct numa_maps *md;
1948 struct page *page;
1949
1950 if (!pte_present(huge_pte))
1951 return 0;
1952
1953 page = pte_page(huge_pte);
1954 if (!page)
1955 return 0;
1956
1957 md = walk->private;
1958 gather_stats(page, md, pte_dirty(huge_pte), 1);
1959 return 0;
1960 }
1961
1962 #else
1963 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1964 unsigned long addr, unsigned long end, struct mm_walk *walk)
1965 {
1966 return 0;
1967 }
1968 #endif
1969
1970 /*
1971 * Display pages allocated per node and memory policy via /proc.
1972 */
1973 static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1974 {
1975 struct numa_maps_private *numa_priv = m->private;
1976 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
1977 struct vm_area_struct *vma = v;
1978 struct numa_maps *md = &numa_priv->md;
1979 struct file *file = vma->vm_file;
1980 struct mm_struct *mm = vma->vm_mm;
1981 struct mm_walk walk = {
1982 .hugetlb_entry = gather_hugetlb_stats,
1983 .pmd_entry = gather_pte_stats,
1984 .private = md,
1985 .mm = mm,
1986 };
1987 struct mempolicy *pol;
1988 char buffer[64];
1989 int nid;
1990
1991 if (!mm)
1992 return 0;
1993
1994 /* Ensure we start with an empty set of numa_maps statistics. */
1995 memset(md, 0, sizeof(*md));
1996
1997 pol = __get_vma_policy(vma, vma->vm_start);
1998 if (pol) {
1999 mpol_to_str(buffer, sizeof(buffer), pol);
2000 mpol_cond_put(pol);
2001 } else {
2002 mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy);
2003 }
2004
2005 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
2006
2007 if (file) {
2008 seq_puts(m, " file=");
2009 seq_file_path(m, file, "\n\t= ");
2010 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
2011 seq_puts(m, " heap");
2012 } else if (is_stack(vma)) {
2013 seq_puts(m, " stack");
2014 }
2015
2016 if (is_vm_hugetlb_page(vma))
2017 seq_puts(m, " huge");
2018
2019 /* mmap_sem is held by m_start */
2020 walk_page_vma(vma, &walk);
2021
2022 if (!md->pages)
2023 goto out;
2024
2025 if (md->anon)
2026 seq_printf(m, " anon=%lu", md->anon);
2027
2028 if (md->dirty)
2029 seq_printf(m, " dirty=%lu", md->dirty);
2030
2031 if (md->pages != md->anon && md->pages != md->dirty)
2032 seq_printf(m, " mapped=%lu", md->pages);
2033
2034 if (md->mapcount_max > 1)
2035 seq_printf(m, " mapmax=%lu", md->mapcount_max);
2036
2037 if (md->swapcache)
2038 seq_printf(m, " swapcache=%lu", md->swapcache);
2039
2040 if (md->active < md->pages && !is_vm_hugetlb_page(vma))
2041 seq_printf(m, " active=%lu", md->active);
2042
2043 if (md->writeback)
2044 seq_printf(m, " writeback=%lu", md->writeback);
2045
2046 for_each_node_state(nid, N_MEMORY)
2047 if (md->node[nid])
2048 seq_printf(m, " N%d=%lu", nid, md->node[nid]);
2049
2050 seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10);
2051 out:
2052 seq_putc(m, '\n');
2053 m_cache_vma(m, vma);
2054 return 0;
2055 }
2056
2057 static int show_pid_numa_map(struct seq_file *m, void *v)
2058 {
2059 return show_numa_map(m, v, 1);
2060 }
2061
2062 static int show_tid_numa_map(struct seq_file *m, void *v)
2063 {
2064 return show_numa_map(m, v, 0);
2065 }
2066
2067 static const struct seq_operations proc_pid_numa_maps_op = {
2068 .start = m_start,
2069 .next = m_next,
2070 .stop = m_stop,
2071 .show = show_pid_numa_map,
2072 };
2073
2074 static const struct seq_operations proc_tid_numa_maps_op = {
2075 .start = m_start,
2076 .next = m_next,
2077 .stop = m_stop,
2078 .show = show_tid_numa_map,
2079 };
2080
2081 static int numa_maps_open(struct inode *inode, struct file *file,
2082 const struct seq_operations *ops)
2083 {
2084 return proc_maps_open(inode, file, ops,
2085 sizeof(struct numa_maps_private));
2086 }
2087
2088 static int pid_numa_maps_open(struct inode *inode, struct file *file)
2089 {
2090 return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
2091 }
2092
2093 static int tid_numa_maps_open(struct inode *inode, struct file *file)
2094 {
2095 return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
2096 }
2097
2098 const struct file_operations proc_pid_numa_maps_operations = {
2099 .open = pid_numa_maps_open,
2100 .read = seq_read,
2101 .llseek = seq_lseek,
2102 .release = proc_map_release,
2103 };
2104
2105 const struct file_operations proc_tid_numa_maps_operations = {
2106 .open = tid_numa_maps_open,
2107 .read = seq_read,
2108 .llseek = seq_lseek,
2109 .release = proc_map_release,
2110 };
2111 #endif /* CONFIG_NUMA */