Merge tag 'v3.10.70' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / kernel / power / snapshot.c
1 /*
2 * linux/kernel/power/snapshot.c
3 *
4 * This file provides system snapshot/restore functionality for swsusp.
5 *
6 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz>
7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
8 *
9 * This file is released under the GPLv2.
10 *
11 */
12
13 #include <linux/version.h>
14 #include <linux/module.h>
15 #include <linux/mm.h>
16 #include <linux/suspend.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/spinlock.h>
20 #include <linux/kernel.h>
21 #include <linux/pm.h>
22 #include <linux/device.h>
23 #include <linux/init.h>
24 #include <linux/bootmem.h>
25 #include <linux/syscalls.h>
26 #include <linux/console.h>
27 #include <linux/highmem.h>
28 #include <linux/list.h>
29 #include <linux/slab.h>
30
31 #include <asm/uaccess.h>
32 #include <asm/mmu_context.h>
33 #include <asm/pgtable.h>
34 #include <asm/tlbflush.h>
35 #include <asm/io.h>
36
37 #include "power.h"
38 #include "tuxonice_builtin.h"
39 #include "tuxonice_pagedir.h"
40
41 static int swsusp_page_is_free(struct page *);
42 static void swsusp_set_page_forbidden(struct page *);
43 static void swsusp_unset_page_forbidden(struct page *);
44
45 /*
46 * Number of bytes to reserve for memory allocations made by device drivers
47 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
48 * cause image creation to fail (tunable via /sys/power/reserved_size).
49 */
50 unsigned long reserved_size;
51
52 void __init hibernate_reserved_size_init(void)
53 {
54 reserved_size = SPARE_PAGES * PAGE_SIZE;
55 }
56
57 /*
58 * Preferred image size in bytes (tunable via /sys/power/image_size).
59 * When it is set to N, swsusp will do its best to ensure the image
60 * size will not exceed N bytes, but if that is impossible, it will
61 * try to create the smallest image possible.
62 */
63 unsigned long image_size;
64
65 void __init hibernate_image_size_init(void)
66 {
67 image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
68 }
69
70 /* List of PBEs needed for restoring the pages that were allocated before
71 * the suspend and included in the suspend image, but have also been
72 * allocated by the "resume" kernel, so their contents cannot be written
73 * directly to their "original" page frames.
74 */
75 struct pbe *restore_pblist;
76 EXPORT_SYMBOL_GPL(restore_pblist);
77
78 int resume_attempted;
79 EXPORT_SYMBOL_GPL(resume_attempted);
80
81 /* Pointer to an auxiliary buffer (1 page) */
82 static void *buffer;
83
84 /**
85 * @safe_needed - on resume, for storing the PBE list and the image,
86 * we can only use memory pages that do not conflict with the pages
87 * used before suspend. The unsafe pages have PageNosaveFree set
88 * and we count them using unsafe_pages.
89 *
90 * Each allocated image page is marked as PageNosave and PageNosaveFree
91 * so that swsusp_free() can release it.
92 */
93
94 #define PG_ANY 0
95 #define PG_SAFE 1
96 #define PG_UNSAFE_CLEAR 1
97 #define PG_UNSAFE_KEEP 0
98
99 static unsigned int allocated_unsafe_pages;
100
101 static void *get_image_page(gfp_t gfp_mask, int safe_needed)
102 {
103 void *res;
104
105 res = (void *)get_zeroed_page(gfp_mask);
106 if (safe_needed)
107 while (res && swsusp_page_is_free(virt_to_page(res))) {
108 /* The page is unsafe, mark it for swsusp_free() */
109 swsusp_set_page_forbidden(virt_to_page(res));
110 allocated_unsafe_pages++;
111 res = (void *)get_zeroed_page(gfp_mask);
112 }
113 if (res) {
114 swsusp_set_page_forbidden(virt_to_page(res));
115 swsusp_set_page_free(virt_to_page(res));
116 }
117 return res;
118 }
119
120 unsigned long get_safe_page(gfp_t gfp_mask)
121 {
122 if (toi_running)
123 return toi_get_nonconflicting_page();
124
125 return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
126 }
127
128 static struct page *alloc_image_page(gfp_t gfp_mask)
129 {
130 struct page *page;
131
132 page = alloc_page(gfp_mask);
133 if (page) {
134 swsusp_set_page_forbidden(page);
135 swsusp_set_page_free(page);
136 }
137 return page;
138 }
139
140 /**
141 * free_image_page - free page represented by @addr, allocated with
142 * get_image_page (page flags set by it must be cleared)
143 */
144
145 static inline void free_image_page(void *addr, int clear_nosave_free)
146 {
147 struct page *page;
148
149 BUG_ON(!virt_addr_valid(addr));
150
151 page = virt_to_page(addr);
152
153 swsusp_unset_page_forbidden(page);
154 if (clear_nosave_free)
155 swsusp_unset_page_free(page);
156
157 __free_page(page);
158 }
159
160 /* struct linked_page is used to build chains of pages */
161
162 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *))
163
164 struct linked_page {
165 struct linked_page *next;
166 char data[LINKED_PAGE_DATA_SIZE];
167 } __attribute__((packed));
168
169 static inline void
170 free_list_of_pages(struct linked_page *list, int clear_page_nosave)
171 {
172 while (list) {
173 struct linked_page *lp = list->next;
174
175 free_image_page(list, clear_page_nosave);
176 list = lp;
177 }
178 }
179
180 /**
181 * struct chain_allocator is used for allocating small objects out of
182 * a linked list of pages called 'the chain'.
183 *
184 * The chain grows each time when there is no room for a new object in
185 * the current page. The allocated objects cannot be freed individually.
186 * It is only possible to free them all at once, by freeing the entire
187 * chain.
188 *
189 * NOTE: The chain allocator may be inefficient if the allocated objects
190 * are not much smaller than PAGE_SIZE.
191 */
192
193 struct chain_allocator {
194 struct linked_page *chain; /* the chain */
195 unsigned int used_space; /* total size of objects allocated out
196 * of the current page
197 */
198 gfp_t gfp_mask; /* mask for allocating pages */
199 int safe_needed; /* if set, only "safe" pages are allocated */
200 };
201
202 static void
203 chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
204 {
205 ca->chain = NULL;
206 ca->used_space = LINKED_PAGE_DATA_SIZE;
207 ca->gfp_mask = gfp_mask;
208 ca->safe_needed = safe_needed;
209 }
210
211 static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
212 {
213 void *ret;
214
215 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
216 struct linked_page *lp;
217
218 lp = get_image_page(ca->gfp_mask, ca->safe_needed);
219 if (!lp)
220 return NULL;
221
222 lp->next = ca->chain;
223 ca->chain = lp;
224 ca->used_space = 0;
225 }
226 ret = ca->chain->data + ca->used_space;
227 ca->used_space += size;
228 return ret;
229 }
230
231 /**
232 * Data types related to memory bitmaps.
233 *
234 * Memory bitmap is a structure consiting of many linked lists of
235 * objects. The main list's elements are of type struct zone_bitmap
236 * and each of them corresonds to one zone. For each zone bitmap
237 * object there is a list of objects of type struct bm_block that
238 * represent each blocks of bitmap in which information is stored.
239 *
240 * struct memory_bitmap contains a pointer to the main list of zone
241 * bitmap objects, a struct bm_position used for browsing the bitmap,
242 * and a pointer to the list of pages used for allocating all of the
243 * zone bitmap objects and bitmap block objects.
244 *
245 * NOTE: It has to be possible to lay out the bitmap in memory
246 * using only allocations of order 0. Additionally, the bitmap is
247 * designed to work with arbitrary number of zones (this is over the
248 * top for now, but let's avoid making unnecessary assumptions ;-).
249 *
250 * struct zone_bitmap contains a pointer to a list of bitmap block
251 * objects and a pointer to the bitmap block object that has been
252 * most recently used for setting bits. Additionally, it contains the
253 * pfns that correspond to the start and end of the represented zone.
254 *
255 * struct bm_block contains a pointer to the memory page in which
256 * information is stored (in the form of a block of bitmap)
257 * It also contains the pfns that correspond to the start and end of
258 * the represented memory area.
259 */
260
261 static inline unsigned long bm_block_bits(struct bm_block *bb)
262 {
263 return bb->end_pfn - bb->start_pfn;
264 }
265
266 /* Functions that operate on memory bitmaps */
267
268 void memory_bm_position_reset_index(struct memory_bitmap *bm, int index)
269 {
270 bm->states[index].block = list_entry(bm->blocks.next,
271 struct bm_block, hook);
272 bm->states[index].bit = 0;
273 }
274 EXPORT_SYMBOL_GPL(memory_bm_position_reset_index);
275
276 void memory_bm_position_reset(struct memory_bitmap *bm)
277 {
278 int i;
279
280 for (i = 0; i < bm->num_states; i++) {
281 bm->states[i].block = list_entry(bm->blocks.next,
282 struct bm_block, hook);
283 bm->states[i].bit = 0;
284 }
285 }
286 EXPORT_SYMBOL_GPL(memory_bm_position_reset);
287
288 int memory_bm_set_iterators(struct memory_bitmap *bm, int number)
289 {
290 int bytes = number * sizeof(struct bm_position);
291 struct bm_position *new_states;
292
293 if (number < bm->num_states)
294 return 0;
295
296 new_states = kmalloc(bytes, GFP_KERNEL);
297 if (!new_states)
298 return -ENOMEM;
299
300 if (bm->states)
301 kfree(bm->states);
302
303 bm->states = new_states;
304 bm->num_states = number;
305 return 0;
306 }
307 EXPORT_SYMBOL_GPL(memory_bm_set_iterators);
308
309 /**
310 * create_bm_block_list - create a list of block bitmap objects
311 * @pages - number of pages to track
312 * @list - list to put the allocated blocks into
313 * @ca - chain allocator to be used for allocating memory
314 */
315 static int create_bm_block_list(unsigned long pages,
316 struct list_head *list,
317 struct chain_allocator *ca)
318 {
319 unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
320
321 while (nr_blocks-- > 0) {
322 struct bm_block *bb;
323
324 bb = chain_alloc(ca, sizeof(struct bm_block));
325 if (!bb)
326 return -ENOMEM;
327 list_add(&bb->hook, list);
328 }
329
330 return 0;
331 }
332
333 struct mem_extent {
334 struct list_head hook;
335 unsigned long start;
336 unsigned long end;
337 };
338
339 /**
340 * free_mem_extents - free a list of memory extents
341 * @list - list of extents to empty
342 */
343 static void free_mem_extents(struct list_head *list)
344 {
345 struct mem_extent *ext, *aux;
346
347 list_for_each_entry_safe(ext, aux, list, hook) {
348 list_del(&ext->hook);
349 kfree(ext);
350 }
351 }
352
353 /**
354 * create_mem_extents - create a list of memory extents representing
355 * contiguous ranges of PFNs
356 * @list - list to put the extents into
357 * @gfp_mask - mask to use for memory allocations
358 */
359 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
360 {
361 struct zone *zone;
362
363 INIT_LIST_HEAD(list);
364
365 for_each_populated_zone(zone) {
366 unsigned long zone_start, zone_end;
367 struct mem_extent *ext, *cur, *aux;
368
369 zone_start = zone->zone_start_pfn;
370 zone_end = zone->zone_start_pfn + zone->spanned_pages;
371
372 list_for_each_entry(ext, list, hook)
373 if (zone_start <= ext->end)
374 break;
375
376 if (&ext->hook == list || zone_end < ext->start) {
377 /* New extent is necessary */
378 struct mem_extent *new_ext;
379
380 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
381 if (!new_ext) {
382 free_mem_extents(list);
383 return -ENOMEM;
384 }
385 new_ext->start = zone_start;
386 new_ext->end = zone_end;
387 list_add_tail(&new_ext->hook, &ext->hook);
388 continue;
389 }
390
391 /* Merge this zone's range of PFNs with the existing one */
392 if (zone_start < ext->start)
393 ext->start = zone_start;
394 if (zone_end > ext->end)
395 ext->end = zone_end;
396
397 /* More merging may be possible */
398 cur = ext;
399 list_for_each_entry_safe_continue(cur, aux, list, hook) {
400 if (zone_end < cur->start)
401 break;
402 if (zone_end < cur->end)
403 ext->end = cur->end;
404 list_del(&cur->hook);
405 kfree(cur);
406 }
407 }
408
409 return 0;
410 }
411
412 /**
413 * memory_bm_create - allocate memory for a memory bitmap
414 */
415 int memory_bm_create_index(struct memory_bitmap *bm, gfp_t gfp_mask,
416 int safe_needed, int states)
417 {
418 struct chain_allocator ca;
419 struct list_head mem_extents;
420 struct mem_extent *ext;
421 int error;
422
423 chain_init(&ca, gfp_mask, safe_needed);
424 INIT_LIST_HEAD(&bm->blocks);
425
426 error = create_mem_extents(&mem_extents, gfp_mask);
427 if (error)
428 return error;
429
430 list_for_each_entry(ext, &mem_extents, hook) {
431 struct bm_block *bb;
432 unsigned long pfn = ext->start;
433 unsigned long pages = ext->end - ext->start;
434
435 bb = list_entry(bm->blocks.prev, struct bm_block, hook);
436
437 error = create_bm_block_list(pages, bm->blocks.prev, &ca);
438 if (error)
439 goto Error;
440
441 list_for_each_entry_continue(bb, &bm->blocks, hook) {
442 bb->data = get_image_page(gfp_mask, safe_needed);
443 if (!bb->data) {
444 error = -ENOMEM;
445 goto Error;
446 }
447
448 bb->start_pfn = pfn;
449 if (pages >= BM_BITS_PER_BLOCK) {
450 pfn += BM_BITS_PER_BLOCK;
451 pages -= BM_BITS_PER_BLOCK;
452 } else {
453 /* This is executed only once in the loop */
454 pfn += pages;
455 }
456 bb->end_pfn = pfn;
457 }
458 }
459
460 if (!error)
461 error = memory_bm_set_iterators(bm, states);
462
463 bm->p_list = ca.chain;
464 memory_bm_position_reset(bm);
465 Exit:
466 free_mem_extents(&mem_extents);
467 return error;
468
469 Error:
470 bm->p_list = ca.chain;
471 memory_bm_free(bm, PG_UNSAFE_CLEAR);
472 goto Exit;
473 }
474 EXPORT_SYMBOL_GPL(memory_bm_create_index);
475
476 int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
477 {
478 return memory_bm_create_index(bm, gfp_mask, safe_needed, 1);
479 }
480 EXPORT_SYMBOL_GPL(memory_bm_create);
481
482 /**
483 * memory_bm_free - free memory occupied by the memory bitmap @bm
484 */
485 void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
486 {
487 struct bm_block *bb;
488
489 list_for_each_entry(bb, &bm->blocks, hook)
490 if (bb->data)
491 free_image_page(bb->data, clear_nosave_free);
492
493 free_list_of_pages(bm->p_list, clear_nosave_free);
494
495 INIT_LIST_HEAD(&bm->blocks);
496
497 if (bm->states) {
498 kfree(bm->states);
499 bm->states = NULL;
500 bm->num_states = 0;
501 }
502 }
503 EXPORT_SYMBOL_GPL(memory_bm_free);
504
505 /**
506 * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
507 * to given pfn. The cur_zone_bm member of @bm and the cur_block member
508 * of @bm->states[i]_zone_bm are updated.
509 */
510 static int memory_bm_find_bit_index(struct memory_bitmap *bm, unsigned long pfn,
511 void **addr, unsigned int *bit_nr, int state)
512 {
513 struct bm_block *bb;
514
515 /*
516 * Check if the pfn corresponds to the current bitmap block and find
517 * the block where it fits if this is not the case.
518 */
519 bb = bm->states[state].block;
520 if (pfn < bb->start_pfn)
521 list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
522 if (pfn >= bb->start_pfn)
523 break;
524
525 if (pfn >= bb->end_pfn)
526 list_for_each_entry_continue(bb, &bm->blocks, hook)
527 if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
528 break;
529
530 if (&bb->hook == &bm->blocks)
531 return -EFAULT;
532
533 /* The block has been found */
534 bm->states[state].block = bb;
535 pfn -= bb->start_pfn;
536 bm->states[state].bit = pfn + 1;
537 *bit_nr = pfn;
538 *addr = bb->data;
539 return 0;
540 }
541
542 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
543 void **addr, unsigned int *bit_nr)
544 {
545 return memory_bm_find_bit_index(bm, pfn, addr, bit_nr, 0);
546 }
547
548 void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
549 {
550 void *addr;
551 unsigned int bit;
552 int error;
553
554 error = memory_bm_find_bit(bm, pfn, &addr, &bit);
555 BUG_ON(error);
556 set_bit(bit, addr);
557 }
558 EXPORT_SYMBOL_GPL(memory_bm_set_bit);
559
560 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
561 {
562 void *addr;
563 unsigned int bit;
564 int error;
565
566 error = memory_bm_find_bit(bm, pfn, &addr, &bit);
567 if (!error)
568 set_bit(bit, addr);
569 return error;
570 }
571
572 void memory_bm_clear_bit_index(struct memory_bitmap *bm, unsigned long pfn,
573 int index)
574 {
575 void *addr;
576 unsigned int bit;
577 int error;
578
579 error = memory_bm_find_bit_index(bm, pfn, &addr, &bit, index);
580 BUG_ON(error);
581 clear_bit(bit, addr);
582 }
583 EXPORT_SYMBOL_GPL(memory_bm_clear_bit_index);
584
585 void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
586 {
587 memory_bm_clear_bit_index(bm, pfn, 0);
588 }
589 EXPORT_SYMBOL_GPL(memory_bm_clear_bit);
590
591 int memory_bm_test_bit_index(struct memory_bitmap *bm, unsigned long pfn,
592 int index)
593 {
594 void *addr;
595 unsigned int bit;
596 int error;
597
598 error = memory_bm_find_bit_index(bm, pfn, &addr, &bit, index);
599 BUG_ON(error);
600 return test_bit(bit, addr);
601 }
602 EXPORT_SYMBOL_GPL(memory_bm_test_bit_index);
603
604 int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
605 {
606 return memory_bm_test_bit_index(bm, pfn, 0);
607 }
608 EXPORT_SYMBOL_GPL(memory_bm_test_bit);
609
610 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
611 {
612 void *addr;
613 unsigned int bit;
614
615 return !memory_bm_find_bit(bm, pfn, &addr, &bit);
616 }
617
618 /**
619 * memory_bm_next_pfn - find the pfn that corresponds to the next set bit
620 * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is
621 * returned.
622 *
623 * It is required to run memory_bm_position_reset() before the first call to
624 * this function.
625 */
626
627 unsigned long memory_bm_next_pfn_index(struct memory_bitmap *bm, int index)
628 {
629 struct bm_block *bb;
630 int bit;
631
632 bb = bm->states[index].block;
633 do {
634 bit = bm->states[index].bit;
635 bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
636 if (bit < bm_block_bits(bb))
637 goto Return_pfn;
638
639 bb = list_entry(bb->hook.next, struct bm_block, hook);
640 bm->states[index].block = bb;
641 bm->states[index].bit = 0;
642 } while (&bb->hook != &bm->blocks);
643
644 memory_bm_position_reset_index(bm, index);
645 return BM_END_OF_MAP;
646
647 Return_pfn:
648 bm->states[index].bit = bit + 1;
649 return bb->start_pfn + bit;
650 }
651 EXPORT_SYMBOL_GPL(memory_bm_next_pfn_index);
652
653 unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
654 {
655 return memory_bm_next_pfn_index(bm, 0);
656 }
657 EXPORT_SYMBOL_GPL(memory_bm_next_pfn);
658
659 void memory_bm_clear(struct memory_bitmap *bm)
660 {
661 unsigned long pfn;
662
663 memory_bm_position_reset(bm);
664 pfn = memory_bm_next_pfn(bm);
665 while (pfn != BM_END_OF_MAP) {
666 memory_bm_clear_bit(bm, pfn);
667 pfn = memory_bm_next_pfn(bm);
668 }
669 }
670 EXPORT_SYMBOL_GPL(memory_bm_clear);
671
672 void memory_bm_copy(struct memory_bitmap *source, struct memory_bitmap *dest)
673 {
674 unsigned long pfn;
675
676 memory_bm_position_reset(source);
677 pfn = memory_bm_next_pfn(source);
678 while (pfn != BM_END_OF_MAP) {
679 memory_bm_set_bit(dest, pfn);
680 pfn = memory_bm_next_pfn(source);
681 }
682 }
683 EXPORT_SYMBOL_GPL(memory_bm_copy);
684
685 void memory_bm_dup(struct memory_bitmap *source, struct memory_bitmap *dest)
686 {
687 memory_bm_clear(dest);
688 memory_bm_copy(source, dest);
689 }
690 EXPORT_SYMBOL_GPL(memory_bm_dup);
691
692 #ifdef CONFIG_TOI
693 #define DEFINE_MEMORY_BITMAP(name) \
694 struct memory_bitmap *name; \
695 EXPORT_SYMBOL_GPL(name)
696
697 DEFINE_MEMORY_BITMAP(pageset1_map);
698 DEFINE_MEMORY_BITMAP(pageset1_copy_map);
699 DEFINE_MEMORY_BITMAP(pageset2_map);
700 DEFINE_MEMORY_BITMAP(page_resave_map);
701 DEFINE_MEMORY_BITMAP(io_map);
702 DEFINE_MEMORY_BITMAP(nosave_map);
703 DEFINE_MEMORY_BITMAP(free_map);
704
705 int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
706 (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
707 {
708 int result = 0;
709 unsigned int nr = 0;
710 struct bm_block *bb;
711
712 if (!bm)
713 return result;
714
715 list_for_each_entry(bb, &bm->blocks, hook)
716 nr++;
717
718 result = (*rw_chunk)(WRITE, NULL, (char *) &nr, sizeof(unsigned int));
719 if (result)
720 return result;
721
722 list_for_each_entry(bb, &bm->blocks, hook) {
723 result = (*rw_chunk)(WRITE, NULL, (char *) &bb->start_pfn,
724 2 * sizeof(unsigned long));
725 if (result)
726 return result;
727
728 result = (*rw_chunk)(WRITE, NULL, (char *) bb->data, PAGE_SIZE);
729 if (result)
730 return result;
731 }
732
733 return 0;
734 }
735 EXPORT_SYMBOL_GPL(memory_bm_write);
736
737 int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
738 (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
739 {
740 int result = 0;
741 unsigned int nr, i;
742 struct bm_block *bb;
743
744 if (!bm)
745 return result;
746
747 result = memory_bm_create(bm, GFP_KERNEL, 0);
748
749 if (result)
750 return result;
751
752 result = (*rw_chunk)(READ, NULL, (char *) &nr, sizeof(unsigned int));
753 if (result)
754 goto Free;
755
756 for (i = 0; i < nr; i++) {
757 unsigned long pfn;
758
759 result = (*rw_chunk)(READ, NULL, (char *) &pfn,
760 sizeof(unsigned long));
761 if (result)
762 goto Free;
763
764 list_for_each_entry(bb, &bm->blocks, hook)
765 if (bb->start_pfn == pfn)
766 break;
767
768 if (&bb->hook == &bm->blocks) {
769 printk(KERN_ERR
770 "TuxOnIce: Failed to load memory bitmap.\n");
771 result = -EINVAL;
772 goto Free;
773 }
774
775 result = (*rw_chunk)(READ, NULL, (char *) &pfn,
776 sizeof(unsigned long));
777 if (result)
778 goto Free;
779
780 if (pfn != bb->end_pfn) {
781 printk(KERN_ERR
782 "TuxOnIce: Failed to load memory bitmap. "
783 "End PFN doesn't match what was saved.\n");
784 result = -EINVAL;
785 goto Free;
786 }
787
788 result = (*rw_chunk)(READ, NULL, (char *) bb->data, PAGE_SIZE);
789
790 if (result)
791 goto Free;
792 }
793
794 return 0;
795
796 Free:
797 memory_bm_free(bm, PG_ANY);
798 return result;
799 }
800 EXPORT_SYMBOL_GPL(memory_bm_read);
801 #endif
802
803 LIST_HEAD(nosave_regions);
804 EXPORT_SYMBOL_GPL(nosave_regions);
805
806 /**
807 * register_nosave_region - register a range of page frames the contents
808 * of which should not be saved during the suspend (to be used in the early
809 * initialization code)
810 */
811
812 void __init
813 __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
814 int use_kmalloc)
815 {
816 struct nosave_region *region;
817
818 if (start_pfn >= end_pfn)
819 return;
820
821 if (!list_empty(&nosave_regions)) {
822 /* Try to extend the previous region (they should be sorted) */
823 region = list_entry(nosave_regions.prev,
824 struct nosave_region, list);
825 if (region->end_pfn == start_pfn) {
826 region->end_pfn = end_pfn;
827 goto Report;
828 }
829 }
830 if (use_kmalloc) {
831 /* during init, this shouldn't fail */
832 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
833 BUG_ON(!region);
834 } else
835 /* This allocation cannot fail */
836 region = alloc_bootmem(sizeof(struct nosave_region));
837 region->start_pfn = start_pfn;
838 region->end_pfn = end_pfn;
839 list_add_tail(&region->list, &nosave_regions);
840 Report:
841 printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n",
842 start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
843 }
844
845 /*
846 * Set bits in this map correspond to the page frames the contents of which
847 * should not be saved during the suspend.
848 */
849 static struct memory_bitmap *forbidden_pages_map;
850
851 /* Set bits in this map correspond to free page frames. */
852 static struct memory_bitmap *free_pages_map;
853
854 /*
855 * Each page frame allocated for creating the image is marked by setting the
856 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
857 */
858
859 void swsusp_set_page_free(struct page *page)
860 {
861 if (free_pages_map)
862 memory_bm_set_bit(free_pages_map, page_to_pfn(page));
863 }
864
865 static int swsusp_page_is_free(struct page *page)
866 {
867 return free_pages_map ?
868 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
869 }
870
871 void swsusp_unset_page_free(struct page *page)
872 {
873 if (free_pages_map)
874 memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
875 }
876
877 static void swsusp_set_page_forbidden(struct page *page)
878 {
879 if (forbidden_pages_map)
880 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
881 }
882
883 int swsusp_page_is_forbidden(struct page *page)
884 {
885 return forbidden_pages_map ?
886 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
887 }
888
889 static void swsusp_unset_page_forbidden(struct page *page)
890 {
891 if (forbidden_pages_map)
892 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
893 }
894
895 /**
896 * mark_nosave_pages - set bits corresponding to the page frames the
897 * contents of which should not be saved in a given bitmap.
898 */
899
900 static void mark_nosave_pages(struct memory_bitmap *bm)
901 {
902 struct nosave_region *region;
903
904 if (list_empty(&nosave_regions))
905 return;
906
907 list_for_each_entry(region, &nosave_regions, list) {
908 unsigned long pfn;
909
910 pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n",
911 (unsigned long long) region->start_pfn << PAGE_SHIFT,
912 ((unsigned long long) region->end_pfn << PAGE_SHIFT)
913 - 1);
914
915 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
916 if (pfn_valid(pfn)) {
917 /*
918 * It is safe to ignore the result of
919 * mem_bm_set_bit_check() here, since we won't
920 * touch the PFNs for which the error is
921 * returned anyway.
922 */
923 mem_bm_set_bit_check(bm, pfn);
924 }
925 }
926 }
927
928 /**
929 * create_basic_memory_bitmaps - create bitmaps needed for marking page
930 * frames that should not be saved and free page frames. The pointers
931 * forbidden_pages_map and free_pages_map are only modified if everything
932 * goes well, because we don't want the bits to be used before both bitmaps
933 * are set up.
934 */
935
936 int create_basic_memory_bitmaps(void)
937 {
938 struct memory_bitmap *bm1, *bm2;
939 int error = 0;
940
941 BUG_ON(forbidden_pages_map || free_pages_map);
942
943 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
944 if (!bm1)
945 return -ENOMEM;
946
947 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
948 if (error)
949 goto Free_first_object;
950
951 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
952 if (!bm2)
953 goto Free_first_bitmap;
954
955 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
956 if (error)
957 goto Free_second_object;
958
959 forbidden_pages_map = bm1;
960 free_pages_map = bm2;
961 mark_nosave_pages(forbidden_pages_map);
962
963 pr_debug("PM: Basic memory bitmaps created\n");
964
965 return 0;
966
967 Free_second_object:
968 kfree(bm2);
969 Free_first_bitmap:
970 memory_bm_free(bm1, PG_UNSAFE_CLEAR);
971 Free_first_object:
972 kfree(bm1);
973 return -ENOMEM;
974 }
975
976 /**
977 * free_basic_memory_bitmaps - free memory bitmaps allocated by
978 * create_basic_memory_bitmaps(). The auxiliary pointers are necessary
979 * so that the bitmaps themselves are not referred to while they are being
980 * freed.
981 */
982
983 void free_basic_memory_bitmaps(void)
984 {
985 struct memory_bitmap *bm1, *bm2;
986
987 BUG_ON(!(forbidden_pages_map && free_pages_map));
988
989 bm1 = forbidden_pages_map;
990 bm2 = free_pages_map;
991 forbidden_pages_map = NULL;
992 free_pages_map = NULL;
993 memory_bm_free(bm1, PG_UNSAFE_CLEAR);
994 kfree(bm1);
995 memory_bm_free(bm2, PG_UNSAFE_CLEAR);
996 kfree(bm2);
997
998 pr_debug("PM: Basic memory bitmaps freed\n");
999 }
1000
1001 /**
1002 * snapshot_additional_pages - estimate the number of additional pages
1003 * be needed for setting up the suspend image data structures for given
1004 * zone (usually the returned value is greater than the exact number)
1005 */
1006
1007 unsigned int snapshot_additional_pages(struct zone *zone)
1008 {
1009 unsigned int res;
1010
1011 res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
1012 res += DIV_ROUND_UP(res * sizeof(struct bm_block),
1013 LINKED_PAGE_DATA_SIZE);
1014 return 2 * res;
1015 }
1016
1017 #ifdef CONFIG_HIGHMEM
1018 /**
1019 * count_free_highmem_pages - compute the total number of free highmem
1020 * pages, system-wide.
1021 */
1022
1023 static unsigned int count_free_highmem_pages(void)
1024 {
1025 struct zone *zone;
1026 unsigned int cnt = 0;
1027
1028 for_each_populated_zone(zone)
1029 if (is_highmem(zone))
1030 cnt += zone_page_state(zone, NR_FREE_PAGES);
1031
1032 return cnt;
1033 }
1034
1035 /**
1036 * saveable_highmem_page - Determine whether a highmem page should be
1037 * included in the suspend image.
1038 *
1039 * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
1040 * and it isn't a part of a free chunk of pages.
1041 */
1042 struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
1043 {
1044 struct page *page;
1045
1046 if (!pfn_valid(pfn))
1047 return NULL;
1048
1049 page = pfn_to_page(pfn);
1050 if (page_zone(page) != zone)
1051 return NULL;
1052
1053 BUG_ON(!PageHighMem(page));
1054
1055 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) ||
1056 PageReserved(page))
1057 return NULL;
1058
1059 if (page_is_guard(page))
1060 return NULL;
1061
1062 return page;
1063 }
1064 EXPORT_SYMBOL_GPL(saveable_highmem_page);
1065
1066 /**
1067 * count_highmem_pages - compute the total number of saveable highmem
1068 * pages.
1069 */
1070
1071 static unsigned int count_highmem_pages(void)
1072 {
1073 struct zone *zone;
1074 unsigned int n = 0;
1075
1076 for_each_populated_zone(zone) {
1077 unsigned long pfn, max_zone_pfn;
1078
1079 if (!is_highmem(zone))
1080 continue;
1081
1082 mark_free_pages(zone);
1083 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1084 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1085 if (saveable_highmem_page(zone, pfn))
1086 n++;
1087 }
1088 return n;
1089 }
1090 #endif /* CONFIG_HIGHMEM */
1091
1092 /**
1093 * saveable_page - Determine whether a non-highmem page should be included
1094 * in the suspend image.
1095 *
1096 * We should save the page if it isn't Nosave, and is not in the range
1097 * of pages statically defined as 'unsaveable', and it isn't a part of
1098 * a free chunk of pages.
1099 */
1100 struct page *saveable_page(struct zone *zone, unsigned long pfn)
1101 {
1102 struct page *page;
1103
1104 if (!pfn_valid(pfn))
1105 return NULL;
1106
1107 page = pfn_to_page(pfn);
1108 if (page_zone(page) != zone)
1109 return NULL;
1110
1111 BUG_ON(PageHighMem(page));
1112
1113 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
1114 return NULL;
1115
1116 if (PageReserved(page)
1117 && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
1118 return NULL;
1119
1120 if (page_is_guard(page))
1121 return NULL;
1122
1123 return page;
1124 }
1125 EXPORT_SYMBOL_GPL(saveable_page);
1126
1127 /**
1128 * count_data_pages - compute the total number of saveable non-highmem
1129 * pages.
1130 */
1131
1132 static unsigned int count_data_pages(void)
1133 {
1134 struct zone *zone;
1135 unsigned long pfn, max_zone_pfn;
1136 unsigned int n = 0;
1137
1138 for_each_populated_zone(zone) {
1139 if (is_highmem(zone))
1140 continue;
1141
1142 mark_free_pages(zone);
1143 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1144 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1145 if (saveable_page(zone, pfn))
1146 n++;
1147 }
1148 return n;
1149 }
1150
1151 /* This is needed, because copy_page and memcpy are not usable for copying
1152 * task structs.
1153 */
1154 static inline void do_copy_page(long *dst, long *src)
1155 {
1156 int n;
1157
1158 for (n = PAGE_SIZE / sizeof(long); n; n--)
1159 *dst++ = *src++;
1160 }
1161
1162
1163 /**
1164 * safe_copy_page - check if the page we are going to copy is marked as
1165 * present in the kernel page tables (this always is the case if
1166 * CONFIG_DEBUG_PAGEALLOC is not set and in that case
1167 * kernel_page_present() always returns 'true').
1168 */
1169 static void safe_copy_page(void *dst, struct page *s_page)
1170 {
1171 if (kernel_page_present(s_page)) {
1172 do_copy_page(dst, page_address(s_page));
1173 } else {
1174 kernel_map_pages(s_page, 1, 1);
1175 do_copy_page(dst, page_address(s_page));
1176 kernel_map_pages(s_page, 1, 0);
1177 }
1178 }
1179
1180
1181 #ifdef CONFIG_HIGHMEM
1182 static inline struct page *
1183 page_is_saveable(struct zone *zone, unsigned long pfn)
1184 {
1185 return is_highmem(zone) ?
1186 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
1187 }
1188
1189 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1190 {
1191 struct page *s_page, *d_page;
1192 void *src, *dst;
1193
1194 s_page = pfn_to_page(src_pfn);
1195 d_page = pfn_to_page(dst_pfn);
1196 if (PageHighMem(s_page)) {
1197 src = kmap_atomic(s_page);
1198 dst = kmap_atomic(d_page);
1199 do_copy_page(dst, src);
1200 kunmap_atomic(dst);
1201 kunmap_atomic(src);
1202 } else {
1203 if (PageHighMem(d_page)) {
1204 /* Page pointed to by src may contain some kernel
1205 * data modified by kmap_atomic()
1206 */
1207 safe_copy_page(buffer, s_page);
1208 dst = kmap_atomic(d_page);
1209 copy_page(dst, buffer);
1210 kunmap_atomic(dst);
1211 } else {
1212 safe_copy_page(page_address(d_page), s_page);
1213 }
1214 }
1215 }
1216 #else
1217 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn)
1218
1219 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1220 {
1221 safe_copy_page(page_address(pfn_to_page(dst_pfn)),
1222 pfn_to_page(src_pfn));
1223 }
1224 #endif /* CONFIG_HIGHMEM */
1225
1226 static void
1227 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
1228 {
1229 struct zone *zone;
1230 unsigned long pfn;
1231
1232 for_each_populated_zone(zone) {
1233 unsigned long max_zone_pfn;
1234
1235 mark_free_pages(zone);
1236 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1237 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1238 if (page_is_saveable(zone, pfn))
1239 memory_bm_set_bit(orig_bm, pfn);
1240 }
1241 memory_bm_position_reset(orig_bm);
1242 memory_bm_position_reset(copy_bm);
1243 for(;;) {
1244 pfn = memory_bm_next_pfn(orig_bm);
1245 if (unlikely(pfn == BM_END_OF_MAP))
1246 break;
1247 copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1248 }
1249 }
1250
1251 /* Total number of image pages */
1252 static unsigned int nr_copy_pages;
1253 /* Number of pages needed for saving the original pfns of the image pages */
1254 static unsigned int nr_meta_pages;
1255 /*
1256 * Numbers of normal and highmem page frames allocated for hibernation image
1257 * before suspending devices.
1258 */
1259 unsigned int alloc_normal, alloc_highmem;
1260 /*
1261 * Memory bitmap used for marking saveable pages (during hibernation) or
1262 * hibernation image pages (during restore)
1263 */
1264 static struct memory_bitmap orig_bm;
1265 /*
1266 * Memory bitmap used during hibernation for marking allocated page frames that
1267 * will contain copies of saveable pages. During restore it is initially used
1268 * for marking hibernation image pages, but then the set bits from it are
1269 * duplicated in @orig_bm and it is released. On highmem systems it is next
1270 * used for marking "safe" highmem pages, but it has to be reinitialized for
1271 * this purpose.
1272 */
1273 static struct memory_bitmap copy_bm;
1274
1275 /**
1276 * swsusp_free - free pages allocated for the suspend.
1277 *
1278 * Suspend pages are alocated before the atomic copy is made, so we
1279 * need to release them after the resume.
1280 */
1281
1282 void swsusp_free(void)
1283 {
1284 struct zone *zone;
1285 unsigned long pfn, max_zone_pfn;
1286
1287 for_each_populated_zone(zone) {
1288 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1289 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1290 if (pfn_valid(pfn)) {
1291 struct page *page = pfn_to_page(pfn);
1292
1293 if (swsusp_page_is_forbidden(page) &&
1294 swsusp_page_is_free(page)) {
1295 swsusp_unset_page_forbidden(page);
1296 swsusp_unset_page_free(page);
1297 __free_page(page);
1298 }
1299 }
1300 }
1301 nr_copy_pages = 0;
1302 nr_meta_pages = 0;
1303 restore_pblist = NULL;
1304 buffer = NULL;
1305 alloc_normal = 0;
1306 alloc_highmem = 0;
1307 }
1308
1309 /* Helper functions used for the shrinking of memory. */
1310
1311 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN)
1312
1313 /**
1314 * preallocate_image_pages - Allocate a number of pages for hibernation image
1315 * @nr_pages: Number of page frames to allocate.
1316 * @mask: GFP flags to use for the allocation.
1317 *
1318 * Return value: Number of page frames actually allocated
1319 */
1320 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
1321 {
1322 unsigned long nr_alloc = 0;
1323
1324 while (nr_pages > 0) {
1325 struct page *page;
1326
1327 page = alloc_image_page(mask);
1328 if (!page)
1329 break;
1330 memory_bm_set_bit(&copy_bm, page_to_pfn(page));
1331 if (PageHighMem(page))
1332 alloc_highmem++;
1333 else
1334 alloc_normal++;
1335 nr_pages--;
1336 nr_alloc++;
1337 }
1338
1339 return nr_alloc;
1340 }
1341
1342 static unsigned long preallocate_image_memory(unsigned long nr_pages,
1343 unsigned long avail_normal)
1344 {
1345 unsigned long alloc;
1346
1347 if (avail_normal <= alloc_normal)
1348 return 0;
1349
1350 alloc = avail_normal - alloc_normal;
1351 if (nr_pages < alloc)
1352 alloc = nr_pages;
1353
1354 return preallocate_image_pages(alloc, GFP_IMAGE);
1355 }
1356
1357 #ifdef CONFIG_HIGHMEM
1358 static unsigned long preallocate_image_highmem(unsigned long nr_pages)
1359 {
1360 return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
1361 }
1362
1363 /**
1364 * __fraction - Compute (an approximation of) x * (multiplier / base)
1365 */
1366 static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
1367 {
1368 x *= multiplier;
1369 do_div(x, base);
1370 return (unsigned long)x;
1371 }
1372
1373 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1374 unsigned long highmem,
1375 unsigned long total)
1376 {
1377 unsigned long alloc = __fraction(nr_pages, highmem, total);
1378
1379 return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
1380 }
1381 #else /* CONFIG_HIGHMEM */
1382 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
1383 {
1384 return 0;
1385 }
1386
1387 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1388 unsigned long highmem,
1389 unsigned long total)
1390 {
1391 return 0;
1392 }
1393 #endif /* CONFIG_HIGHMEM */
1394
1395 /**
1396 * free_unnecessary_pages - Release preallocated pages not needed for the image
1397 */
1398 static void free_unnecessary_pages(void)
1399 {
1400 unsigned long save, to_free_normal, to_free_highmem;
1401
1402 save = count_data_pages();
1403 if (alloc_normal >= save) {
1404 to_free_normal = alloc_normal - save;
1405 save = 0;
1406 } else {
1407 to_free_normal = 0;
1408 save -= alloc_normal;
1409 }
1410 save += count_highmem_pages();
1411 if (alloc_highmem >= save) {
1412 to_free_highmem = alloc_highmem - save;
1413 } else {
1414 to_free_highmem = 0;
1415 save -= alloc_highmem;
1416 if (to_free_normal > save)
1417 to_free_normal -= save;
1418 else
1419 to_free_normal = 0;
1420 }
1421
1422 memory_bm_position_reset(&copy_bm);
1423
1424 while (to_free_normal > 0 || to_free_highmem > 0) {
1425 unsigned long pfn = memory_bm_next_pfn(&copy_bm);
1426 struct page *page = pfn_to_page(pfn);
1427
1428 if (PageHighMem(page)) {
1429 if (!to_free_highmem)
1430 continue;
1431 to_free_highmem--;
1432 alloc_highmem--;
1433 } else {
1434 if (!to_free_normal)
1435 continue;
1436 to_free_normal--;
1437 alloc_normal--;
1438 }
1439 memory_bm_clear_bit(&copy_bm, pfn);
1440 swsusp_unset_page_forbidden(page);
1441 swsusp_unset_page_free(page);
1442 __free_page(page);
1443 }
1444 }
1445
1446 /**
1447 * minimum_image_size - Estimate the minimum acceptable size of an image
1448 * @saveable: Number of saveable pages in the system.
1449 *
1450 * We want to avoid attempting to free too much memory too hard, so estimate the
1451 * minimum acceptable size of a hibernation image to use as the lower limit for
1452 * preallocating memory.
1453 *
1454 * We assume that the minimum image size should be proportional to
1455 *
1456 * [number of saveable pages] - [number of pages that can be freed in theory]
1457 *
1458 * where the second term is the sum of (1) reclaimable slab pages, (2) active
1459 * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages,
1460 * minus mapped file pages.
1461 */
1462 static unsigned long minimum_image_size(unsigned long saveable)
1463 {
1464 unsigned long size;
1465
1466 size = global_page_state(NR_SLAB_RECLAIMABLE)
1467 + global_page_state(NR_ACTIVE_ANON)
1468 + global_page_state(NR_INACTIVE_ANON)
1469 + global_page_state(NR_ACTIVE_FILE)
1470 + global_page_state(NR_INACTIVE_FILE)
1471 - global_page_state(NR_FILE_MAPPED);
1472
1473 return saveable <= size ? 0 : saveable - size;
1474 }
1475
1476 /**
1477 * hibernate_preallocate_memory - Preallocate memory for hibernation image
1478 *
1479 * To create a hibernation image it is necessary to make a copy of every page
1480 * frame in use. We also need a number of page frames to be free during
1481 * hibernation for allocations made while saving the image and for device
1482 * drivers, in case they need to allocate memory from their hibernation
1483 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
1484 * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through
1485 * /sys/power/reserved_size, respectively). To make this happen, we compute the
1486 * total number of available page frames and allocate at least
1487 *
1488 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
1489 * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
1490 *
1491 * of them, which corresponds to the maximum size of a hibernation image.
1492 *
1493 * If image_size is set below the number following from the above formula,
1494 * the preallocation of memory is continued until the total number of saveable
1495 * pages in the system is below the requested image size or the minimum
1496 * acceptable image size returned by minimum_image_size(), whichever is greater.
1497 */
1498 int hibernate_preallocate_memory(void)
1499 {
1500 struct zone *zone;
1501 unsigned long saveable, size, max_size, count, highmem, pages = 0;
1502 unsigned long alloc, save_highmem, pages_highmem, avail_normal;
1503 struct timeval start, stop;
1504 int error;
1505
1506 printk(KERN_INFO "PM: Preallocating image memory... ");
1507 do_gettimeofday(&start);
1508
1509 error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
1510 if (error)
1511 goto err_out;
1512
1513 error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
1514 if (error)
1515 goto err_out;
1516
1517 alloc_normal = 0;
1518 alloc_highmem = 0;
1519
1520 /* Count the number of saveable data pages. */
1521 save_highmem = count_highmem_pages();
1522 saveable = count_data_pages();
1523
1524 /*
1525 * Compute the total number of page frames we can use (count) and the
1526 * number of pages needed for image metadata (size).
1527 */
1528 count = saveable;
1529 saveable += save_highmem;
1530 highmem = save_highmem;
1531 size = 0;
1532 for_each_populated_zone(zone) {
1533 size += snapshot_additional_pages(zone);
1534 if (is_highmem(zone))
1535 highmem += zone_page_state(zone, NR_FREE_PAGES);
1536 else
1537 count += zone_page_state(zone, NR_FREE_PAGES);
1538 }
1539 avail_normal = count;
1540 count += highmem;
1541 count -= totalreserve_pages;
1542
1543 /* Add number of pages required for page keys (s390 only). */
1544 size += page_key_additional_pages(saveable);
1545
1546 /* Compute the maximum number of saveable pages to leave in memory. */
1547 max_size = (count - (size + PAGES_FOR_IO)) / 2
1548 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
1549 /* Compute the desired number of image pages specified by image_size. */
1550 size = DIV_ROUND_UP(image_size, PAGE_SIZE);
1551 if (size > max_size)
1552 size = max_size;
1553 /*
1554 * If the desired number of image pages is at least as large as the
1555 * current number of saveable pages in memory, allocate page frames for
1556 * the image and we're done.
1557 */
1558 if (size >= saveable) {
1559 pages = preallocate_image_highmem(save_highmem);
1560 pages += preallocate_image_memory(saveable - pages, avail_normal);
1561 goto out;
1562 }
1563
1564 /* Estimate the minimum size of the image. */
1565 pages = minimum_image_size(saveable);
1566 /*
1567 * To avoid excessive pressure on the normal zone, leave room in it to
1568 * accommodate an image of the minimum size (unless it's already too
1569 * small, in which case don't preallocate pages from it at all).
1570 */
1571 if (avail_normal > pages)
1572 avail_normal -= pages;
1573 else
1574 avail_normal = 0;
1575 if (size < pages)
1576 size = min_t(unsigned long, pages, max_size);
1577
1578 /*
1579 * Let the memory management subsystem know that we're going to need a
1580 * large number of page frames to allocate and make it free some memory.
1581 * NOTE: If this is not done, performance will be hurt badly in some
1582 * test cases.
1583 */
1584 shrink_all_memory(saveable - size);
1585
1586 /*
1587 * The number of saveable pages in memory was too high, so apply some
1588 * pressure to decrease it. First, make room for the largest possible
1589 * image and fail if that doesn't work. Next, try to decrease the size
1590 * of the image as much as indicated by 'size' using allocations from
1591 * highmem and non-highmem zones separately.
1592 */
1593 pages_highmem = preallocate_image_highmem(highmem / 2);
1594 alloc = count - max_size;
1595 if (alloc > pages_highmem)
1596 alloc -= pages_highmem;
1597 else
1598 alloc = 0;
1599 pages = preallocate_image_memory(alloc, avail_normal);
1600 if (pages < alloc) {
1601 /* We have exhausted non-highmem pages, try highmem. */
1602 alloc -= pages;
1603 pages += pages_highmem;
1604 pages_highmem = preallocate_image_highmem(alloc);
1605 if (pages_highmem < alloc)
1606 goto err_out;
1607 pages += pages_highmem;
1608 /*
1609 * size is the desired number of saveable pages to leave in
1610 * memory, so try to preallocate (all memory - size) pages.
1611 */
1612 alloc = (count - pages) - size;
1613 pages += preallocate_image_highmem(alloc);
1614 } else {
1615 /*
1616 * There are approximately max_size saveable pages at this point
1617 * and we want to reduce this number down to size.
1618 */
1619 alloc = max_size - size;
1620 size = preallocate_highmem_fraction(alloc, highmem, count);
1621 pages_highmem += size;
1622 alloc -= size;
1623 size = preallocate_image_memory(alloc, avail_normal);
1624 pages_highmem += preallocate_image_highmem(alloc - size);
1625 pages += pages_highmem + size;
1626 }
1627
1628 /*
1629 * We only need as many page frames for the image as there are saveable
1630 * pages in memory, but we have allocated more. Release the excessive
1631 * ones now.
1632 */
1633 free_unnecessary_pages();
1634
1635 out:
1636 do_gettimeofday(&stop);
1637 printk(KERN_CONT "done (allocated %lu pages)\n", pages);
1638 swsusp_show_speed(&start, &stop, pages, "Allocated");
1639
1640 return 0;
1641
1642 err_out:
1643 printk(KERN_CONT "\n");
1644 swsusp_free();
1645 return -ENOMEM;
1646 }
1647
1648 #ifdef CONFIG_HIGHMEM
1649 /**
1650 * count_pages_for_highmem - compute the number of non-highmem pages
1651 * that will be necessary for creating copies of highmem pages.
1652 */
1653
1654 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1655 {
1656 unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
1657
1658 if (free_highmem >= nr_highmem)
1659 nr_highmem = 0;
1660 else
1661 nr_highmem -= free_highmem;
1662
1663 return nr_highmem;
1664 }
1665 #else
1666 static unsigned int
1667 count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1668 #endif /* CONFIG_HIGHMEM */
1669
1670 /**
1671 * enough_free_mem - Make sure we have enough free memory for the
1672 * snapshot image.
1673 */
1674
1675 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1676 {
1677 struct zone *zone;
1678 unsigned int free = alloc_normal;
1679
1680 for_each_populated_zone(zone)
1681 if (!is_highmem(zone))
1682 free += zone_page_state(zone, NR_FREE_PAGES);
1683
1684 nr_pages += count_pages_for_highmem(nr_highmem);
1685 pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n",
1686 nr_pages, PAGES_FOR_IO, free);
1687
1688 return free > nr_pages + PAGES_FOR_IO;
1689 }
1690
1691 #ifdef CONFIG_HIGHMEM
1692 /**
1693 * get_highmem_buffer - if there are some highmem pages in the suspend
1694 * image, we may need the buffer to copy them and/or load their data.
1695 */
1696
1697 static inline int get_highmem_buffer(int safe_needed)
1698 {
1699 buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
1700 return buffer ? 0 : -ENOMEM;
1701 }
1702
1703 /**
1704 * alloc_highmem_image_pages - allocate some highmem pages for the image.
1705 * Try to allocate as many pages as needed, but if the number of free
1706 * highmem pages is lesser than that, allocate them all.
1707 */
1708
1709 static inline unsigned int
1710 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1711 {
1712 unsigned int to_alloc = count_free_highmem_pages();
1713
1714 if (to_alloc > nr_highmem)
1715 to_alloc = nr_highmem;
1716
1717 nr_highmem -= to_alloc;
1718 while (to_alloc-- > 0) {
1719 struct page *page;
1720
1721 page = alloc_image_page(__GFP_HIGHMEM);
1722 memory_bm_set_bit(bm, page_to_pfn(page));
1723 }
1724 return nr_highmem;
1725 }
1726 #else
1727 static inline int get_highmem_buffer(int safe_needed) { return 0; }
1728
1729 static inline unsigned int
1730 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
1731 #endif /* CONFIG_HIGHMEM */
1732
1733 /**
1734 * swsusp_alloc - allocate memory for the suspend image
1735 *
1736 * We first try to allocate as many highmem pages as there are
1737 * saveable highmem pages in the system. If that fails, we allocate
1738 * non-highmem pages for the copies of the remaining highmem ones.
1739 *
1740 * In this approach it is likely that the copies of highmem pages will
1741 * also be located in the high memory, because of the way in which
1742 * copy_data_pages() works.
1743 */
1744
1745 static int
1746 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1747 unsigned int nr_pages, unsigned int nr_highmem)
1748 {
1749 if (nr_highmem > 0) {
1750 if (get_highmem_buffer(PG_ANY))
1751 goto err_out;
1752 if (nr_highmem > alloc_highmem) {
1753 nr_highmem -= alloc_highmem;
1754 nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
1755 }
1756 }
1757 if (nr_pages > alloc_normal) {
1758 nr_pages -= alloc_normal;
1759 while (nr_pages-- > 0) {
1760 struct page *page;
1761
1762 page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
1763 if (!page)
1764 goto err_out;
1765 memory_bm_set_bit(copy_bm, page_to_pfn(page));
1766 }
1767 }
1768
1769 return 0;
1770
1771 err_out:
1772 swsusp_free();
1773 return -ENOMEM;
1774 }
1775
1776 asmlinkage int swsusp_save(void)
1777 {
1778 unsigned int nr_pages, nr_highmem;
1779
1780 if (toi_running)
1781 return toi_post_context_save();
1782
1783 printk(KERN_INFO "PM: Creating hibernation image:\n");
1784
1785 drain_local_pages(NULL);
1786 nr_pages = count_data_pages();
1787 nr_highmem = count_highmem_pages();
1788 printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
1789
1790 if (!enough_free_mem(nr_pages, nr_highmem)) {
1791 printk(KERN_ERR "PM: Not enough free memory\n");
1792 return -ENOMEM;
1793 }
1794
1795 if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1796 printk(KERN_ERR "PM: Memory allocation failed\n");
1797 return -ENOMEM;
1798 }
1799
1800 /* During allocating of suspend pagedir, new cold pages may appear.
1801 * Kill them.
1802 */
1803 drain_local_pages(NULL);
1804 copy_data_pages(&copy_bm, &orig_bm);
1805
1806 /*
1807 * End of critical section. From now on, we can write to memory,
1808 * but we should not touch disk. This specially means we must _not_
1809 * touch swap space! Except we must write out our image of course.
1810 */
1811
1812 nr_pages += nr_highmem;
1813 nr_copy_pages = nr_pages;
1814 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1815
1816 printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
1817 nr_pages);
1818
1819 return 0;
1820 }
1821
1822 #ifndef CONFIG_ARCH_HIBERNATION_HEADER
1823 int init_header_complete(struct swsusp_info *info)
1824 {
1825 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
1826 info->version_code = LINUX_VERSION_CODE;
1827 return 0;
1828 }
1829
1830 char *check_image_kernel(struct swsusp_info *info)
1831 {
1832 if (info->version_code != LINUX_VERSION_CODE)
1833 return "kernel version";
1834 if (strcmp(info->uts.sysname,init_utsname()->sysname))
1835 return "system type";
1836 if (strcmp(info->uts.release,init_utsname()->release))
1837 return "kernel release";
1838 if (strcmp(info->uts.version,init_utsname()->version))
1839 return "version";
1840 if (strcmp(info->uts.machine,init_utsname()->machine))
1841 return "machine";
1842 return NULL;
1843 }
1844 EXPORT_SYMBOL_GPL(check_image_kernel);
1845 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
1846
1847 unsigned long snapshot_get_image_size(void)
1848 {
1849 return nr_copy_pages + nr_meta_pages + 1;
1850 }
1851
1852 int init_header(struct swsusp_info *info)
1853 {
1854 memset(info, 0, sizeof(struct swsusp_info));
1855 info->num_physpages = num_physpages;
1856 info->image_pages = nr_copy_pages;
1857 info->pages = snapshot_get_image_size();
1858 info->size = info->pages;
1859 info->size <<= PAGE_SHIFT;
1860 return init_header_complete(info);
1861 }
1862 EXPORT_SYMBOL_GPL(init_header);
1863
1864 /**
1865 * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
1866 * are stored in the array @buf[] (1 page at a time)
1867 */
1868
1869 static inline void
1870 pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1871 {
1872 int j;
1873
1874 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1875 buf[j] = memory_bm_next_pfn(bm);
1876 if (unlikely(buf[j] == BM_END_OF_MAP))
1877 break;
1878 /* Save page key for data page (s390 only). */
1879 page_key_read(buf + j);
1880 }
1881 }
1882
1883 /**
1884 * snapshot_read_next - used for reading the system memory snapshot.
1885 *
1886 * On the first call to it @handle should point to a zeroed
1887 * snapshot_handle structure. The structure gets updated and a pointer
1888 * to it should be passed to this function every next time.
1889 *
1890 * On success the function returns a positive number. Then, the caller
1891 * is allowed to read up to the returned number of bytes from the memory
1892 * location computed by the data_of() macro.
1893 *
1894 * The function returns 0 to indicate the end of data stream condition,
1895 * and a negative number is returned on error. In such cases the
1896 * structure pointed to by @handle is not updated and should not be used
1897 * any more.
1898 */
1899
1900 int snapshot_read_next(struct snapshot_handle *handle)
1901 {
1902 if (handle->cur > nr_meta_pages + nr_copy_pages)
1903 return 0;
1904
1905 if (!buffer) {
1906 /* This makes the buffer be freed by swsusp_free() */
1907 buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1908 if (!buffer)
1909 return -ENOMEM;
1910 }
1911 if (!handle->cur) {
1912 int error;
1913
1914 error = init_header((struct swsusp_info *)buffer);
1915 if (error)
1916 return error;
1917 handle->buffer = buffer;
1918 memory_bm_position_reset(&orig_bm);
1919 memory_bm_position_reset(&copy_bm);
1920 } else if (handle->cur <= nr_meta_pages) {
1921 clear_page(buffer);
1922 pack_pfns(buffer, &orig_bm);
1923 } else {
1924 struct page *page;
1925
1926 page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1927 if (PageHighMem(page)) {
1928 /* Highmem pages are copied to the buffer,
1929 * because we can't return with a kmapped
1930 * highmem page (we may not be called again).
1931 */
1932 void *kaddr;
1933
1934 kaddr = kmap_atomic(page);
1935 copy_page(buffer, kaddr);
1936 kunmap_atomic(kaddr);
1937 handle->buffer = buffer;
1938 } else {
1939 handle->buffer = page_address(page);
1940 }
1941 }
1942 handle->cur++;
1943 return PAGE_SIZE;
1944 }
1945
1946 /**
1947 * mark_unsafe_pages - mark the pages that cannot be used for storing
1948 * the image during resume, because they conflict with the pages that
1949 * had been used before suspend
1950 */
1951
1952 static int mark_unsafe_pages(struct memory_bitmap *bm)
1953 {
1954 struct zone *zone;
1955 unsigned long pfn, max_zone_pfn;
1956
1957 /* Clear page flags */
1958 for_each_populated_zone(zone) {
1959 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1960 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1961 if (pfn_valid(pfn))
1962 swsusp_unset_page_free(pfn_to_page(pfn));
1963 }
1964
1965 /* Mark pages that correspond to the "original" pfns as "unsafe" */
1966 memory_bm_position_reset(bm);
1967 do {
1968 pfn = memory_bm_next_pfn(bm);
1969 if (likely(pfn != BM_END_OF_MAP)) {
1970 if (likely(pfn_valid(pfn)))
1971 swsusp_set_page_free(pfn_to_page(pfn));
1972 else
1973 return -EFAULT;
1974 }
1975 } while (pfn != BM_END_OF_MAP);
1976
1977 allocated_unsafe_pages = 0;
1978
1979 return 0;
1980 }
1981
1982 static void
1983 duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
1984 {
1985 unsigned long pfn;
1986
1987 memory_bm_position_reset(src);
1988 pfn = memory_bm_next_pfn(src);
1989 while (pfn != BM_END_OF_MAP) {
1990 memory_bm_set_bit(dst, pfn);
1991 pfn = memory_bm_next_pfn(src);
1992 }
1993 }
1994
1995 static int check_header(struct swsusp_info *info)
1996 {
1997 char *reason;
1998
1999 reason = check_image_kernel(info);
2000 if (!reason && info->num_physpages != num_physpages)
2001 reason = "memory size";
2002 if (reason) {
2003 printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
2004 return -EPERM;
2005 }
2006 return 0;
2007 }
2008
2009 /**
2010 * load header - check the image header and copy data from it
2011 */
2012
2013 static int
2014 load_header(struct swsusp_info *info)
2015 {
2016 int error;
2017
2018 restore_pblist = NULL;
2019 error = check_header(info);
2020 if (!error) {
2021 nr_copy_pages = info->image_pages;
2022 nr_meta_pages = info->pages - info->image_pages - 1;
2023 }
2024 return error;
2025 }
2026
2027 /**
2028 * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
2029 * the corresponding bit in the memory bitmap @bm
2030 */
2031 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
2032 {
2033 int j;
2034
2035 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
2036 if (unlikely(buf[j] == BM_END_OF_MAP))
2037 break;
2038
2039 /* Extract and buffer page key for data page (s390 only). */
2040 page_key_memorize(buf + j);
2041
2042 if (memory_bm_pfn_present(bm, buf[j]))
2043 memory_bm_set_bit(bm, buf[j]);
2044 else
2045 return -EFAULT;
2046 }
2047
2048 return 0;
2049 }
2050
2051 /* List of "safe" pages that may be used to store data loaded from the suspend
2052 * image
2053 */
2054 static struct linked_page *safe_pages_list;
2055
2056 #ifdef CONFIG_HIGHMEM
2057 /* struct highmem_pbe is used for creating the list of highmem pages that
2058 * should be restored atomically during the resume from disk, because the page
2059 * frames they have occupied before the suspend are in use.
2060 */
2061 struct highmem_pbe {
2062 struct page *copy_page; /* data is here now */
2063 struct page *orig_page; /* data was here before the suspend */
2064 struct highmem_pbe *next;
2065 };
2066
2067 /* List of highmem PBEs needed for restoring the highmem pages that were
2068 * allocated before the suspend and included in the suspend image, but have
2069 * also been allocated by the "resume" kernel, so their contents cannot be
2070 * written directly to their "original" page frames.
2071 */
2072 static struct highmem_pbe *highmem_pblist;
2073
2074 /**
2075 * count_highmem_image_pages - compute the number of highmem pages in the
2076 * suspend image. The bits in the memory bitmap @bm that correspond to the
2077 * image pages are assumed to be set.
2078 */
2079
2080 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
2081 {
2082 unsigned long pfn;
2083 unsigned int cnt = 0;
2084
2085 memory_bm_position_reset(bm);
2086 pfn = memory_bm_next_pfn(bm);
2087 while (pfn != BM_END_OF_MAP) {
2088 if (PageHighMem(pfn_to_page(pfn)))
2089 cnt++;
2090
2091 pfn = memory_bm_next_pfn(bm);
2092 }
2093 return cnt;
2094 }
2095
2096 /**
2097 * prepare_highmem_image - try to allocate as many highmem pages as
2098 * there are highmem image pages (@nr_highmem_p points to the variable
2099 * containing the number of highmem image pages). The pages that are
2100 * "safe" (ie. will not be overwritten when the suspend image is
2101 * restored) have the corresponding bits set in @bm (it must be
2102 * unitialized).
2103 *
2104 * NOTE: This function should not be called if there are no highmem
2105 * image pages.
2106 */
2107
2108 static unsigned int safe_highmem_pages;
2109
2110 static struct memory_bitmap *safe_highmem_bm;
2111
2112 static int
2113 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
2114 {
2115 unsigned int to_alloc;
2116
2117 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
2118 return -ENOMEM;
2119
2120 if (get_highmem_buffer(PG_SAFE))
2121 return -ENOMEM;
2122
2123 to_alloc = count_free_highmem_pages();
2124 if (to_alloc > *nr_highmem_p)
2125 to_alloc = *nr_highmem_p;
2126 else
2127 *nr_highmem_p = to_alloc;
2128
2129 safe_highmem_pages = 0;
2130 while (to_alloc-- > 0) {
2131 struct page *page;
2132
2133 page = alloc_page(__GFP_HIGHMEM);
2134 if (!swsusp_page_is_free(page)) {
2135 /* The page is "safe", set its bit the bitmap */
2136 memory_bm_set_bit(bm, page_to_pfn(page));
2137 safe_highmem_pages++;
2138 }
2139 /* Mark the page as allocated */
2140 swsusp_set_page_forbidden(page);
2141 swsusp_set_page_free(page);
2142 }
2143 memory_bm_position_reset(bm);
2144 safe_highmem_bm = bm;
2145 return 0;
2146 }
2147
2148 /**
2149 * get_highmem_page_buffer - for given highmem image page find the buffer
2150 * that suspend_write_next() should set for its caller to write to.
2151 *
2152 * If the page is to be saved to its "original" page frame or a copy of
2153 * the page is to be made in the highmem, @buffer is returned. Otherwise,
2154 * the copy of the page is to be made in normal memory, so the address of
2155 * the copy is returned.
2156 *
2157 * If @buffer is returned, the caller of suspend_write_next() will write
2158 * the page's contents to @buffer, so they will have to be copied to the
2159 * right location on the next call to suspend_write_next() and it is done
2160 * with the help of copy_last_highmem_page(). For this purpose, if
2161 * @buffer is returned, @last_highmem page is set to the page to which
2162 * the data will have to be copied from @buffer.
2163 */
2164
2165 static struct page *last_highmem_page;
2166
2167 static void *
2168 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
2169 {
2170 struct highmem_pbe *pbe;
2171 void *kaddr;
2172
2173 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
2174 /* We have allocated the "original" page frame and we can
2175 * use it directly to store the loaded page.
2176 */
2177 last_highmem_page = page;
2178 return buffer;
2179 }
2180 /* The "original" page frame has not been allocated and we have to
2181 * use a "safe" page frame to store the loaded page.
2182 */
2183 pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
2184 if (!pbe) {
2185 swsusp_free();
2186 return ERR_PTR(-ENOMEM);
2187 }
2188 pbe->orig_page = page;
2189 if (safe_highmem_pages > 0) {
2190 struct page *tmp;
2191
2192 /* Copy of the page will be stored in high memory */
2193 kaddr = buffer;
2194 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
2195 safe_highmem_pages--;
2196 last_highmem_page = tmp;
2197 pbe->copy_page = tmp;
2198 } else {
2199 /* Copy of the page will be stored in normal memory */
2200 kaddr = safe_pages_list;
2201 safe_pages_list = safe_pages_list->next;
2202 pbe->copy_page = virt_to_page(kaddr);
2203 }
2204 pbe->next = highmem_pblist;
2205 highmem_pblist = pbe;
2206 return kaddr;
2207 }
2208
2209 /**
2210 * copy_last_highmem_page - copy the contents of a highmem image from
2211 * @buffer, where the caller of snapshot_write_next() has place them,
2212 * to the right location represented by @last_highmem_page .
2213 */
2214
2215 static void copy_last_highmem_page(void)
2216 {
2217 if (last_highmem_page) {
2218 void *dst;
2219
2220 dst = kmap_atomic(last_highmem_page);
2221 copy_page(dst, buffer);
2222 kunmap_atomic(dst);
2223 last_highmem_page = NULL;
2224 }
2225 }
2226
2227 static inline int last_highmem_page_copied(void)
2228 {
2229 return !last_highmem_page;
2230 }
2231
2232 static inline void free_highmem_data(void)
2233 {
2234 if (safe_highmem_bm)
2235 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
2236
2237 if (buffer)
2238 free_image_page(buffer, PG_UNSAFE_CLEAR);
2239 }
2240 #else
2241 static inline int get_safe_write_buffer(void) { return 0; }
2242
2243 static unsigned int
2244 count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
2245
2246 static inline int
2247 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
2248 {
2249 return 0;
2250 }
2251
2252 static inline void *
2253 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
2254 {
2255 return ERR_PTR(-EINVAL);
2256 }
2257
2258 static inline void copy_last_highmem_page(void) {}
2259 static inline int last_highmem_page_copied(void) { return 1; }
2260 static inline void free_highmem_data(void) {}
2261 #endif /* CONFIG_HIGHMEM */
2262
2263 /**
2264 * prepare_image - use the memory bitmap @bm to mark the pages that will
2265 * be overwritten in the process of restoring the system memory state
2266 * from the suspend image ("unsafe" pages) and allocate memory for the
2267 * image.
2268 *
2269 * The idea is to allocate a new memory bitmap first and then allocate
2270 * as many pages as needed for the image data, but not to assign these
2271 * pages to specific tasks initially. Instead, we just mark them as
2272 * allocated and create a lists of "safe" pages that will be used
2273 * later. On systems with high memory a list of "safe" highmem pages is
2274 * also created.
2275 */
2276
2277 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
2278
2279 static int
2280 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
2281 {
2282 unsigned int nr_pages, nr_highmem;
2283 struct linked_page *sp_list, *lp;
2284 int error;
2285
2286 /* If there is no highmem, the buffer will not be necessary */
2287 free_image_page(buffer, PG_UNSAFE_CLEAR);
2288 buffer = NULL;
2289
2290 nr_highmem = count_highmem_image_pages(bm);
2291 error = mark_unsafe_pages(bm);
2292 if (error)
2293 goto Free;
2294
2295 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
2296 if (error)
2297 goto Free;
2298
2299 duplicate_memory_bitmap(new_bm, bm);
2300 memory_bm_free(bm, PG_UNSAFE_KEEP);
2301 if (nr_highmem > 0) {
2302 error = prepare_highmem_image(bm, &nr_highmem);
2303 if (error)
2304 goto Free;
2305 }
2306 /* Reserve some safe pages for potential later use.
2307 *
2308 * NOTE: This way we make sure there will be enough safe pages for the
2309 * chain_alloc() in get_buffer(). It is a bit wasteful, but
2310 * nr_copy_pages cannot be greater than 50% of the memory anyway.
2311 */
2312 sp_list = NULL;
2313 /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
2314 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2315 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
2316 while (nr_pages > 0) {
2317 lp = get_image_page(GFP_ATOMIC, PG_SAFE);
2318 if (!lp) {
2319 error = -ENOMEM;
2320 goto Free;
2321 }
2322 lp->next = sp_list;
2323 sp_list = lp;
2324 nr_pages--;
2325 }
2326 /* Preallocate memory for the image */
2327 safe_pages_list = NULL;
2328 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2329 while (nr_pages > 0) {
2330 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
2331 if (!lp) {
2332 error = -ENOMEM;
2333 goto Free;
2334 }
2335 if (!swsusp_page_is_free(virt_to_page(lp))) {
2336 /* The page is "safe", add it to the list */
2337 lp->next = safe_pages_list;
2338 safe_pages_list = lp;
2339 }
2340 /* Mark the page as allocated */
2341 swsusp_set_page_forbidden(virt_to_page(lp));
2342 swsusp_set_page_free(virt_to_page(lp));
2343 nr_pages--;
2344 }
2345 /* Free the reserved safe pages so that chain_alloc() can use them */
2346 while (sp_list) {
2347 lp = sp_list->next;
2348 free_image_page(sp_list, PG_UNSAFE_CLEAR);
2349 sp_list = lp;
2350 }
2351 return 0;
2352
2353 Free:
2354 swsusp_free();
2355 return error;
2356 }
2357
2358 /**
2359 * get_buffer - compute the address that snapshot_write_next() should
2360 * set for its caller to write to.
2361 */
2362
2363 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
2364 {
2365 struct pbe *pbe;
2366 struct page *page;
2367 unsigned long pfn = memory_bm_next_pfn(bm);
2368
2369 if (pfn == BM_END_OF_MAP)
2370 return ERR_PTR(-EFAULT);
2371
2372 page = pfn_to_page(pfn);
2373 if (PageHighMem(page))
2374 return get_highmem_page_buffer(page, ca);
2375
2376 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
2377 /* We have allocated the "original" page frame and we can
2378 * use it directly to store the loaded page.
2379 */
2380 return page_address(page);
2381
2382 /* The "original" page frame has not been allocated and we have to
2383 * use a "safe" page frame to store the loaded page.
2384 */
2385 pbe = chain_alloc(ca, sizeof(struct pbe));
2386 if (!pbe) {
2387 swsusp_free();
2388 return ERR_PTR(-ENOMEM);
2389 }
2390 pbe->orig_address = page_address(page);
2391 pbe->address = safe_pages_list;
2392 safe_pages_list = safe_pages_list->next;
2393 pbe->next = restore_pblist;
2394 restore_pblist = pbe;
2395 return pbe->address;
2396 }
2397
2398 /**
2399 * snapshot_write_next - used for writing the system memory snapshot.
2400 *
2401 * On the first call to it @handle should point to a zeroed
2402 * snapshot_handle structure. The structure gets updated and a pointer
2403 * to it should be passed to this function every next time.
2404 *
2405 * On success the function returns a positive number. Then, the caller
2406 * is allowed to write up to the returned number of bytes to the memory
2407 * location computed by the data_of() macro.
2408 *
2409 * The function returns 0 to indicate the "end of file" condition,
2410 * and a negative number is returned on error. In such cases the
2411 * structure pointed to by @handle is not updated and should not be used
2412 * any more.
2413 */
2414
2415 int snapshot_write_next(struct snapshot_handle *handle)
2416 {
2417 static struct chain_allocator ca;
2418 int error = 0;
2419
2420 /* Check if we have already loaded the entire image */
2421 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
2422 return 0;
2423
2424 handle->sync_read = 1;
2425
2426 if (!handle->cur) {
2427 if (!buffer)
2428 /* This makes the buffer be freed by swsusp_free() */
2429 buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2430
2431 if (!buffer)
2432 return -ENOMEM;
2433
2434 handle->buffer = buffer;
2435 } else if (handle->cur == 1) {
2436 error = load_header(buffer);
2437 if (error)
2438 return error;
2439
2440 error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
2441 if (error)
2442 return error;
2443
2444 /* Allocate buffer for page keys. */
2445 error = page_key_alloc(nr_copy_pages);
2446 if (error)
2447 return error;
2448
2449 } else if (handle->cur <= nr_meta_pages + 1) {
2450 error = unpack_orig_pfns(buffer, &copy_bm);
2451 if (error)
2452 return error;
2453
2454 if (handle->cur == nr_meta_pages + 1) {
2455 error = prepare_image(&orig_bm, &copy_bm);
2456 if (error)
2457 return error;
2458
2459 chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2460 memory_bm_position_reset(&orig_bm);
2461 restore_pblist = NULL;
2462 handle->buffer = get_buffer(&orig_bm, &ca);
2463 handle->sync_read = 0;
2464 if (IS_ERR(handle->buffer))
2465 return PTR_ERR(handle->buffer);
2466 }
2467 } else {
2468 copy_last_highmem_page();
2469 /* Restore page key for data page (s390 only). */
2470 page_key_write(handle->buffer);
2471 handle->buffer = get_buffer(&orig_bm, &ca);
2472 if (IS_ERR(handle->buffer))
2473 return PTR_ERR(handle->buffer);
2474 if (handle->buffer != buffer)
2475 handle->sync_read = 0;
2476 }
2477 handle->cur++;
2478 return PAGE_SIZE;
2479 }
2480
2481 /**
2482 * snapshot_write_finalize - must be called after the last call to
2483 * snapshot_write_next() in case the last page in the image happens
2484 * to be a highmem page and its contents should be stored in the
2485 * highmem. Additionally, it releases the memory that will not be
2486 * used any more.
2487 */
2488
2489 void snapshot_write_finalize(struct snapshot_handle *handle)
2490 {
2491 copy_last_highmem_page();
2492 /* Restore page key for data page (s390 only). */
2493 page_key_write(handle->buffer);
2494 page_key_free();
2495 /* Free only if we have loaded the image entirely */
2496 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
2497 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
2498 free_highmem_data();
2499 }
2500 }
2501
2502 int snapshot_image_loaded(struct snapshot_handle *handle)
2503 {
2504 return !(!nr_copy_pages || !last_highmem_page_copied() ||
2505 handle->cur <= nr_meta_pages + nr_copy_pages);
2506 }
2507
2508 #ifdef CONFIG_HIGHMEM
2509 /* Assumes that @buf is ready and points to a "safe" page */
2510 static inline void
2511 swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
2512 {
2513 void *kaddr1, *kaddr2;
2514
2515 kaddr1 = kmap_atomic(p1);
2516 kaddr2 = kmap_atomic(p2);
2517 copy_page(buf, kaddr1);
2518 copy_page(kaddr1, kaddr2);
2519 copy_page(kaddr2, buf);
2520 kunmap_atomic(kaddr2);
2521 kunmap_atomic(kaddr1);
2522 }
2523
2524 /**
2525 * restore_highmem - for each highmem page that was allocated before
2526 * the suspend and included in the suspend image, and also has been
2527 * allocated by the "resume" kernel swap its current (ie. "before
2528 * resume") contents with the previous (ie. "before suspend") one.
2529 *
2530 * If the resume eventually fails, we can call this function once
2531 * again and restore the "before resume" highmem state.
2532 */
2533
2534 int restore_highmem(void)
2535 {
2536 struct highmem_pbe *pbe = highmem_pblist;
2537 void *buf;
2538
2539 if (!pbe)
2540 return 0;
2541
2542 buf = get_image_page(GFP_ATOMIC, PG_SAFE);
2543 if (!buf)
2544 return -ENOMEM;
2545
2546 while (pbe) {
2547 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
2548 pbe = pbe->next;
2549 }
2550 free_image_page(buf, PG_UNSAFE_CLEAR);
2551 return 0;
2552 }
2553 #endif /* CONFIG_HIGHMEM */