4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/swap.h>
18 #include <linux/timer.h>
19 #include <linux/freezer.h>
20 #include <linux/sched/signal.h>
27 #include <trace/events/f2fs.h>
29 #define __reverse_ffz(x) __reverse_ffs(~(x))
31 static struct kmem_cache
*discard_entry_slab
;
32 static struct kmem_cache
*discard_cmd_slab
;
33 static struct kmem_cache
*sit_entry_set_slab
;
34 static struct kmem_cache
*inmem_entry_slab
;
36 static unsigned long __reverse_ulong(unsigned char *str
)
38 unsigned long tmp
= 0;
39 int shift
= 24, idx
= 0;
41 #if BITS_PER_LONG == 64
45 tmp
|= (unsigned long)str
[idx
++] << shift
;
46 shift
-= BITS_PER_BYTE
;
52 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
53 * MSB and LSB are reversed in a byte by f2fs_set_bit.
55 static inline unsigned long __reverse_ffs(unsigned long word
)
59 #if BITS_PER_LONG == 64
60 if ((word
& 0xffffffff00000000UL
) == 0)
65 if ((word
& 0xffff0000) == 0)
70 if ((word
& 0xff00) == 0)
75 if ((word
& 0xf0) == 0)
80 if ((word
& 0xc) == 0)
85 if ((word
& 0x2) == 0)
91 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
92 * f2fs_set_bit makes MSB and LSB reversed in a byte.
93 * @size must be integral times of unsigned long.
96 * f2fs_set_bit(0, bitmap) => 1000 0000
97 * f2fs_set_bit(7, bitmap) => 0000 0001
99 static unsigned long __find_rev_next_bit(const unsigned long *addr
,
100 unsigned long size
, unsigned long offset
)
102 const unsigned long *p
= addr
+ BIT_WORD(offset
);
103 unsigned long result
= size
;
109 size
-= (offset
& ~(BITS_PER_LONG
- 1));
110 offset
%= BITS_PER_LONG
;
116 tmp
= __reverse_ulong((unsigned char *)p
);
118 tmp
&= ~0UL >> offset
;
119 if (size
< BITS_PER_LONG
)
120 tmp
&= (~0UL << (BITS_PER_LONG
- size
));
124 if (size
<= BITS_PER_LONG
)
126 size
-= BITS_PER_LONG
;
132 return result
- size
+ __reverse_ffs(tmp
);
135 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr
,
136 unsigned long size
, unsigned long offset
)
138 const unsigned long *p
= addr
+ BIT_WORD(offset
);
139 unsigned long result
= size
;
145 size
-= (offset
& ~(BITS_PER_LONG
- 1));
146 offset
%= BITS_PER_LONG
;
152 tmp
= __reverse_ulong((unsigned char *)p
);
155 tmp
|= ~0UL << (BITS_PER_LONG
- offset
);
156 if (size
< BITS_PER_LONG
)
161 if (size
<= BITS_PER_LONG
)
163 size
-= BITS_PER_LONG
;
169 return result
- size
+ __reverse_ffz(tmp
);
172 bool need_SSR(struct f2fs_sb_info
*sbi
)
174 int node_secs
= get_blocktype_secs(sbi
, F2FS_DIRTY_NODES
);
175 int dent_secs
= get_blocktype_secs(sbi
, F2FS_DIRTY_DENTS
);
176 int imeta_secs
= get_blocktype_secs(sbi
, F2FS_DIRTY_IMETA
);
178 if (test_opt(sbi
, LFS
))
180 if (sbi
->gc_thread
&& sbi
->gc_thread
->gc_urgent
)
183 return free_sections(sbi
) <= (node_secs
+ 2 * dent_secs
+ imeta_secs
+
184 SM_I(sbi
)->min_ssr_sections
+ reserved_sections(sbi
));
187 void register_inmem_page(struct inode
*inode
, struct page
*page
)
189 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
190 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
191 struct inmem_pages
*new;
193 f2fs_trace_pid(page
);
195 set_page_private(page
, (unsigned long)ATOMIC_WRITTEN_PAGE
);
196 SetPagePrivate(page
);
198 new = f2fs_kmem_cache_alloc(inmem_entry_slab
, GFP_NOFS
);
200 /* add atomic page indices to the list */
202 INIT_LIST_HEAD(&new->list
);
204 /* increase reference count with clean state */
205 mutex_lock(&fi
->inmem_lock
);
207 list_add_tail(&new->list
, &fi
->inmem_pages
);
208 spin_lock(&sbi
->inode_lock
[ATOMIC_FILE
]);
209 if (list_empty(&fi
->inmem_ilist
))
210 list_add_tail(&fi
->inmem_ilist
, &sbi
->inode_list
[ATOMIC_FILE
]);
211 spin_unlock(&sbi
->inode_lock
[ATOMIC_FILE
]);
212 inc_page_count(F2FS_I_SB(inode
), F2FS_INMEM_PAGES
);
213 mutex_unlock(&fi
->inmem_lock
);
215 trace_f2fs_register_inmem_page(page
, INMEM
);
218 static int __revoke_inmem_pages(struct inode
*inode
,
219 struct list_head
*head
, bool drop
, bool recover
)
221 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
222 struct inmem_pages
*cur
, *tmp
;
225 list_for_each_entry_safe(cur
, tmp
, head
, list
) {
226 struct page
*page
= cur
->page
;
229 trace_f2fs_commit_inmem_page(page
, INMEM_DROP
);
233 f2fs_wait_on_page_writeback(page
, DATA
, true);
236 struct dnode_of_data dn
;
239 trace_f2fs_commit_inmem_page(page
, INMEM_REVOKE
);
241 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
242 err
= get_dnode_of_data(&dn
, page
->index
, LOOKUP_NODE
);
244 if (err
== -ENOMEM
) {
245 congestion_wait(BLK_RW_ASYNC
, HZ
/50);
252 get_node_info(sbi
, dn
.nid
, &ni
);
253 if (cur
->old_addr
== NEW_ADDR
) {
254 invalidate_blocks(sbi
, dn
.data_blkaddr
);
255 f2fs_update_data_blkaddr(&dn
, NEW_ADDR
);
257 f2fs_replace_block(sbi
, &dn
, dn
.data_blkaddr
,
258 cur
->old_addr
, ni
.version
, true, true);
262 /* we don't need to invalidate this in the sccessful status */
264 ClearPageUptodate(page
);
265 set_page_private(page
, 0);
266 ClearPagePrivate(page
);
267 f2fs_put_page(page
, 1);
269 list_del(&cur
->list
);
270 kmem_cache_free(inmem_entry_slab
, cur
);
271 dec_page_count(F2FS_I_SB(inode
), F2FS_INMEM_PAGES
);
276 void drop_inmem_pages_all(struct f2fs_sb_info
*sbi
)
278 struct list_head
*head
= &sbi
->inode_list
[ATOMIC_FILE
];
280 struct f2fs_inode_info
*fi
;
282 spin_lock(&sbi
->inode_lock
[ATOMIC_FILE
]);
283 if (list_empty(head
)) {
284 spin_unlock(&sbi
->inode_lock
[ATOMIC_FILE
]);
287 fi
= list_first_entry(head
, struct f2fs_inode_info
, inmem_ilist
);
288 inode
= igrab(&fi
->vfs_inode
);
289 spin_unlock(&sbi
->inode_lock
[ATOMIC_FILE
]);
292 drop_inmem_pages(inode
);
295 congestion_wait(BLK_RW_ASYNC
, HZ
/50);
300 void drop_inmem_pages(struct inode
*inode
)
302 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
303 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
305 mutex_lock(&fi
->inmem_lock
);
306 __revoke_inmem_pages(inode
, &fi
->inmem_pages
, true, false);
307 spin_lock(&sbi
->inode_lock
[ATOMIC_FILE
]);
308 if (!list_empty(&fi
->inmem_ilist
))
309 list_del_init(&fi
->inmem_ilist
);
310 spin_unlock(&sbi
->inode_lock
[ATOMIC_FILE
]);
311 mutex_unlock(&fi
->inmem_lock
);
313 clear_inode_flag(inode
, FI_ATOMIC_FILE
);
314 clear_inode_flag(inode
, FI_HOT_DATA
);
315 stat_dec_atomic_write(inode
);
318 void drop_inmem_page(struct inode
*inode
, struct page
*page
)
320 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
321 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
322 struct list_head
*head
= &fi
->inmem_pages
;
323 struct inmem_pages
*cur
= NULL
;
325 f2fs_bug_on(sbi
, !IS_ATOMIC_WRITTEN_PAGE(page
));
327 mutex_lock(&fi
->inmem_lock
);
328 list_for_each_entry(cur
, head
, list
) {
329 if (cur
->page
== page
)
333 f2fs_bug_on(sbi
, !cur
|| cur
->page
!= page
);
334 list_del(&cur
->list
);
335 mutex_unlock(&fi
->inmem_lock
);
337 dec_page_count(sbi
, F2FS_INMEM_PAGES
);
338 kmem_cache_free(inmem_entry_slab
, cur
);
340 ClearPageUptodate(page
);
341 set_page_private(page
, 0);
342 ClearPagePrivate(page
);
343 f2fs_put_page(page
, 0);
345 trace_f2fs_commit_inmem_page(page
, INMEM_INVALIDATE
);
348 static int __commit_inmem_pages(struct inode
*inode
,
349 struct list_head
*revoke_list
)
351 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
352 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
353 struct inmem_pages
*cur
, *tmp
;
354 struct f2fs_io_info fio
= {
359 .op_flags
= REQ_SYNC
| REQ_PRIO
,
360 .io_type
= FS_DATA_IO
,
362 pgoff_t last_idx
= ULONG_MAX
;
365 list_for_each_entry_safe(cur
, tmp
, &fi
->inmem_pages
, list
) {
366 struct page
*page
= cur
->page
;
369 if (page
->mapping
== inode
->i_mapping
) {
370 trace_f2fs_commit_inmem_page(page
, INMEM
);
372 set_page_dirty(page
);
373 f2fs_wait_on_page_writeback(page
, DATA
, true);
374 if (clear_page_dirty_for_io(page
)) {
375 inode_dec_dirty_pages(inode
);
376 remove_dirty_inode(inode
);
380 fio
.old_blkaddr
= NULL_ADDR
;
381 fio
.encrypted_page
= NULL
;
382 fio
.need_lock
= LOCK_DONE
;
383 err
= do_write_data_page(&fio
);
385 if (err
== -ENOMEM
) {
386 congestion_wait(BLK_RW_ASYNC
, HZ
/50);
393 /* record old blkaddr for revoking */
394 cur
->old_addr
= fio
.old_blkaddr
;
395 last_idx
= page
->index
;
398 list_move_tail(&cur
->list
, revoke_list
);
401 if (last_idx
!= ULONG_MAX
)
402 f2fs_submit_merged_write_cond(sbi
, inode
, 0, last_idx
, DATA
);
405 __revoke_inmem_pages(inode
, revoke_list
, false, false);
410 int commit_inmem_pages(struct inode
*inode
)
412 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
413 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
414 struct list_head revoke_list
;
417 INIT_LIST_HEAD(&revoke_list
);
418 f2fs_balance_fs(sbi
, true);
421 set_inode_flag(inode
, FI_ATOMIC_COMMIT
);
423 mutex_lock(&fi
->inmem_lock
);
424 err
= __commit_inmem_pages(inode
, &revoke_list
);
428 * try to revoke all committed pages, but still we could fail
429 * due to no memory or other reason, if that happened, EAGAIN
430 * will be returned, which means in such case, transaction is
431 * already not integrity, caller should use journal to do the
432 * recovery or rewrite & commit last transaction. For other
433 * error number, revoking was done by filesystem itself.
435 ret
= __revoke_inmem_pages(inode
, &revoke_list
, false, true);
439 /* drop all uncommitted pages */
440 __revoke_inmem_pages(inode
, &fi
->inmem_pages
, true, false);
442 spin_lock(&sbi
->inode_lock
[ATOMIC_FILE
]);
443 if (!list_empty(&fi
->inmem_ilist
))
444 list_del_init(&fi
->inmem_ilist
);
445 spin_unlock(&sbi
->inode_lock
[ATOMIC_FILE
]);
446 mutex_unlock(&fi
->inmem_lock
);
448 clear_inode_flag(inode
, FI_ATOMIC_COMMIT
);
455 * This function balances dirty node and dentry pages.
456 * In addition, it controls garbage collection.
458 void f2fs_balance_fs(struct f2fs_sb_info
*sbi
, bool need
)
460 #ifdef CONFIG_F2FS_FAULT_INJECTION
461 if (time_to_inject(sbi
, FAULT_CHECKPOINT
)) {
462 f2fs_show_injection_info(FAULT_CHECKPOINT
);
463 f2fs_stop_checkpoint(sbi
, false);
467 /* balance_fs_bg is able to be pending */
468 if (need
&& excess_cached_nats(sbi
))
469 f2fs_balance_fs_bg(sbi
);
472 * We should do GC or end up with checkpoint, if there are so many dirty
473 * dir/node pages without enough free segments.
475 if (has_not_enough_free_secs(sbi
, 0, 0)) {
476 mutex_lock(&sbi
->gc_mutex
);
477 f2fs_gc(sbi
, false, false, NULL_SEGNO
);
481 void f2fs_balance_fs_bg(struct f2fs_sb_info
*sbi
)
483 if (unlikely(is_sbi_flag_set(sbi
, SBI_POR_DOING
)))
486 /* try to shrink extent cache when there is no enough memory */
487 if (!available_free_memory(sbi
, EXTENT_CACHE
))
488 f2fs_shrink_extent_tree(sbi
, EXTENT_CACHE_SHRINK_NUMBER
);
490 /* check the # of cached NAT entries */
491 if (!available_free_memory(sbi
, NAT_ENTRIES
))
492 try_to_free_nats(sbi
, NAT_ENTRY_PER_BLOCK
);
494 if (!available_free_memory(sbi
, FREE_NIDS
))
495 try_to_free_nids(sbi
, MAX_FREE_NIDS
);
497 build_free_nids(sbi
, false, false);
499 if (!is_idle(sbi
) && !excess_dirty_nats(sbi
))
502 /* checkpoint is the only way to shrink partial cached entries */
503 if (!available_free_memory(sbi
, NAT_ENTRIES
) ||
504 !available_free_memory(sbi
, INO_ENTRIES
) ||
505 excess_prefree_segs(sbi
) ||
506 excess_dirty_nats(sbi
) ||
507 f2fs_time_over(sbi
, CP_TIME
)) {
508 if (test_opt(sbi
, DATA_FLUSH
)) {
509 struct blk_plug plug
;
511 blk_start_plug(&plug
);
512 sync_dirty_inodes(sbi
, FILE_INODE
);
513 blk_finish_plug(&plug
);
515 f2fs_sync_fs(sbi
->sb
, true);
516 stat_inc_bg_cp_count(sbi
->stat_info
);
520 static int __submit_flush_wait(struct f2fs_sb_info
*sbi
,
521 struct block_device
*bdev
)
523 struct bio
*bio
= f2fs_bio_alloc(sbi
, 0, true);
526 bio
->bi_opf
= REQ_OP_WRITE
| REQ_SYNC
| REQ_PREFLUSH
;
527 bio_set_dev(bio
, bdev
);
528 ret
= submit_bio_wait(bio
);
531 trace_f2fs_issue_flush(bdev
, test_opt(sbi
, NOBARRIER
),
532 test_opt(sbi
, FLUSH_MERGE
), ret
);
536 static int submit_flush_wait(struct f2fs_sb_info
*sbi
, nid_t ino
)
542 return __submit_flush_wait(sbi
, sbi
->sb
->s_bdev
);
544 for (i
= 0; i
< sbi
->s_ndevs
; i
++) {
545 if (!is_dirty_device(sbi
, ino
, i
, FLUSH_INO
))
547 ret
= __submit_flush_wait(sbi
, FDEV(i
).bdev
);
554 static int issue_flush_thread(void *data
)
556 struct f2fs_sb_info
*sbi
= data
;
557 struct flush_cmd_control
*fcc
= SM_I(sbi
)->fcc_info
;
558 wait_queue_head_t
*q
= &fcc
->flush_wait_queue
;
560 if (kthread_should_stop())
563 sb_start_intwrite(sbi
->sb
);
565 if (!llist_empty(&fcc
->issue_list
)) {
566 struct flush_cmd
*cmd
, *next
;
569 fcc
->dispatch_list
= llist_del_all(&fcc
->issue_list
);
570 fcc
->dispatch_list
= llist_reverse_order(fcc
->dispatch_list
);
572 cmd
= llist_entry(fcc
->dispatch_list
, struct flush_cmd
, llnode
);
574 ret
= submit_flush_wait(sbi
, cmd
->ino
);
575 atomic_inc(&fcc
->issued_flush
);
577 llist_for_each_entry_safe(cmd
, next
,
578 fcc
->dispatch_list
, llnode
) {
580 complete(&cmd
->wait
);
582 fcc
->dispatch_list
= NULL
;
585 sb_end_intwrite(sbi
->sb
);
587 wait_event_interruptible(*q
,
588 kthread_should_stop() || !llist_empty(&fcc
->issue_list
));
592 int f2fs_issue_flush(struct f2fs_sb_info
*sbi
, nid_t ino
)
594 struct flush_cmd_control
*fcc
= SM_I(sbi
)->fcc_info
;
595 struct flush_cmd cmd
;
598 if (test_opt(sbi
, NOBARRIER
))
601 if (!test_opt(sbi
, FLUSH_MERGE
)) {
602 ret
= submit_flush_wait(sbi
, ino
);
603 atomic_inc(&fcc
->issued_flush
);
607 if (atomic_inc_return(&fcc
->issing_flush
) == 1 || sbi
->s_ndevs
> 1) {
608 ret
= submit_flush_wait(sbi
, ino
);
609 atomic_dec(&fcc
->issing_flush
);
611 atomic_inc(&fcc
->issued_flush
);
616 init_completion(&cmd
.wait
);
618 llist_add(&cmd
.llnode
, &fcc
->issue_list
);
620 /* update issue_list before we wake up issue_flush thread */
623 if (waitqueue_active(&fcc
->flush_wait_queue
))
624 wake_up(&fcc
->flush_wait_queue
);
626 if (fcc
->f2fs_issue_flush
) {
627 wait_for_completion(&cmd
.wait
);
628 atomic_dec(&fcc
->issing_flush
);
630 struct llist_node
*list
;
632 list
= llist_del_all(&fcc
->issue_list
);
634 wait_for_completion(&cmd
.wait
);
635 atomic_dec(&fcc
->issing_flush
);
637 struct flush_cmd
*tmp
, *next
;
639 ret
= submit_flush_wait(sbi
, ino
);
641 llist_for_each_entry_safe(tmp
, next
, list
, llnode
) {
644 atomic_dec(&fcc
->issing_flush
);
648 complete(&tmp
->wait
);
656 int create_flush_cmd_control(struct f2fs_sb_info
*sbi
)
658 dev_t dev
= sbi
->sb
->s_bdev
->bd_dev
;
659 struct flush_cmd_control
*fcc
;
662 if (SM_I(sbi
)->fcc_info
) {
663 fcc
= SM_I(sbi
)->fcc_info
;
664 if (fcc
->f2fs_issue_flush
)
669 fcc
= f2fs_kzalloc(sbi
, sizeof(struct flush_cmd_control
), GFP_KERNEL
);
672 atomic_set(&fcc
->issued_flush
, 0);
673 atomic_set(&fcc
->issing_flush
, 0);
674 init_waitqueue_head(&fcc
->flush_wait_queue
);
675 init_llist_head(&fcc
->issue_list
);
676 SM_I(sbi
)->fcc_info
= fcc
;
677 if (!test_opt(sbi
, FLUSH_MERGE
))
681 fcc
->f2fs_issue_flush
= kthread_run(issue_flush_thread
, sbi
,
682 "f2fs_flush-%u:%u", MAJOR(dev
), MINOR(dev
));
683 if (IS_ERR(fcc
->f2fs_issue_flush
)) {
684 err
= PTR_ERR(fcc
->f2fs_issue_flush
);
686 SM_I(sbi
)->fcc_info
= NULL
;
693 void destroy_flush_cmd_control(struct f2fs_sb_info
*sbi
, bool free
)
695 struct flush_cmd_control
*fcc
= SM_I(sbi
)->fcc_info
;
697 if (fcc
&& fcc
->f2fs_issue_flush
) {
698 struct task_struct
*flush_thread
= fcc
->f2fs_issue_flush
;
700 fcc
->f2fs_issue_flush
= NULL
;
701 kthread_stop(flush_thread
);
705 SM_I(sbi
)->fcc_info
= NULL
;
709 int f2fs_flush_device_cache(struct f2fs_sb_info
*sbi
)
716 for (i
= 1; i
< sbi
->s_ndevs
; i
++) {
717 if (!f2fs_test_bit(i
, (char *)&sbi
->dirty_device
))
719 ret
= __submit_flush_wait(sbi
, FDEV(i
).bdev
);
723 spin_lock(&sbi
->dev_lock
);
724 f2fs_clear_bit(i
, (char *)&sbi
->dirty_device
);
725 spin_unlock(&sbi
->dev_lock
);
731 static void __locate_dirty_segment(struct f2fs_sb_info
*sbi
, unsigned int segno
,
732 enum dirty_type dirty_type
)
734 struct dirty_seglist_info
*dirty_i
= DIRTY_I(sbi
);
736 /* need not be added */
737 if (IS_CURSEG(sbi
, segno
))
740 if (!test_and_set_bit(segno
, dirty_i
->dirty_segmap
[dirty_type
]))
741 dirty_i
->nr_dirty
[dirty_type
]++;
743 if (dirty_type
== DIRTY
) {
744 struct seg_entry
*sentry
= get_seg_entry(sbi
, segno
);
745 enum dirty_type t
= sentry
->type
;
747 if (unlikely(t
>= DIRTY
)) {
751 if (!test_and_set_bit(segno
, dirty_i
->dirty_segmap
[t
]))
752 dirty_i
->nr_dirty
[t
]++;
756 static void __remove_dirty_segment(struct f2fs_sb_info
*sbi
, unsigned int segno
,
757 enum dirty_type dirty_type
)
759 struct dirty_seglist_info
*dirty_i
= DIRTY_I(sbi
);
761 if (test_and_clear_bit(segno
, dirty_i
->dirty_segmap
[dirty_type
]))
762 dirty_i
->nr_dirty
[dirty_type
]--;
764 if (dirty_type
== DIRTY
) {
765 struct seg_entry
*sentry
= get_seg_entry(sbi
, segno
);
766 enum dirty_type t
= sentry
->type
;
768 if (test_and_clear_bit(segno
, dirty_i
->dirty_segmap
[t
]))
769 dirty_i
->nr_dirty
[t
]--;
771 if (get_valid_blocks(sbi
, segno
, true) == 0)
772 clear_bit(GET_SEC_FROM_SEG(sbi
, segno
),
773 dirty_i
->victim_secmap
);
778 * Should not occur error such as -ENOMEM.
779 * Adding dirty entry into seglist is not critical operation.
780 * If a given segment is one of current working segments, it won't be added.
782 static void locate_dirty_segment(struct f2fs_sb_info
*sbi
, unsigned int segno
)
784 struct dirty_seglist_info
*dirty_i
= DIRTY_I(sbi
);
785 unsigned short valid_blocks
;
787 if (segno
== NULL_SEGNO
|| IS_CURSEG(sbi
, segno
))
790 mutex_lock(&dirty_i
->seglist_lock
);
792 valid_blocks
= get_valid_blocks(sbi
, segno
, false);
794 if (valid_blocks
== 0) {
795 __locate_dirty_segment(sbi
, segno
, PRE
);
796 __remove_dirty_segment(sbi
, segno
, DIRTY
);
797 } else if (valid_blocks
< sbi
->blocks_per_seg
) {
798 __locate_dirty_segment(sbi
, segno
, DIRTY
);
800 /* Recovery routine with SSR needs this */
801 __remove_dirty_segment(sbi
, segno
, DIRTY
);
804 mutex_unlock(&dirty_i
->seglist_lock
);
807 static struct discard_cmd
*__create_discard_cmd(struct f2fs_sb_info
*sbi
,
808 struct block_device
*bdev
, block_t lstart
,
809 block_t start
, block_t len
)
811 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
812 struct list_head
*pend_list
;
813 struct discard_cmd
*dc
;
815 f2fs_bug_on(sbi
, !len
);
817 pend_list
= &dcc
->pend_list
[plist_idx(len
)];
819 dc
= f2fs_kmem_cache_alloc(discard_cmd_slab
, GFP_NOFS
);
820 INIT_LIST_HEAD(&dc
->list
);
828 init_completion(&dc
->wait
);
829 list_add_tail(&dc
->list
, pend_list
);
830 atomic_inc(&dcc
->discard_cmd_cnt
);
831 dcc
->undiscard_blks
+= len
;
836 static struct discard_cmd
*__attach_discard_cmd(struct f2fs_sb_info
*sbi
,
837 struct block_device
*bdev
, block_t lstart
,
838 block_t start
, block_t len
,
839 struct rb_node
*parent
, struct rb_node
**p
)
841 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
842 struct discard_cmd
*dc
;
844 dc
= __create_discard_cmd(sbi
, bdev
, lstart
, start
, len
);
846 rb_link_node(&dc
->rb_node
, parent
, p
);
847 rb_insert_color(&dc
->rb_node
, &dcc
->root
);
852 static void __detach_discard_cmd(struct discard_cmd_control
*dcc
,
853 struct discard_cmd
*dc
)
855 if (dc
->state
== D_DONE
)
856 atomic_dec(&dcc
->issing_discard
);
859 rb_erase(&dc
->rb_node
, &dcc
->root
);
860 dcc
->undiscard_blks
-= dc
->len
;
862 kmem_cache_free(discard_cmd_slab
, dc
);
864 atomic_dec(&dcc
->discard_cmd_cnt
);
867 static void __remove_discard_cmd(struct f2fs_sb_info
*sbi
,
868 struct discard_cmd
*dc
)
870 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
872 trace_f2fs_remove_discard(dc
->bdev
, dc
->start
, dc
->len
);
874 f2fs_bug_on(sbi
, dc
->ref
);
876 if (dc
->error
== -EOPNOTSUPP
)
880 f2fs_msg(sbi
->sb
, KERN_INFO
,
881 "Issue discard(%u, %u, %u) failed, ret: %d",
882 dc
->lstart
, dc
->start
, dc
->len
, dc
->error
);
883 __detach_discard_cmd(dcc
, dc
);
886 static void f2fs_submit_discard_endio(struct bio
*bio
)
888 struct discard_cmd
*dc
= (struct discard_cmd
*)bio
->bi_private
;
890 dc
->error
= blk_status_to_errno(bio
->bi_status
);
892 complete_all(&dc
->wait
);
896 static void __check_sit_bitmap(struct f2fs_sb_info
*sbi
,
897 block_t start
, block_t end
)
899 #ifdef CONFIG_F2FS_CHECK_FS
900 struct seg_entry
*sentry
;
903 unsigned long offset
, size
, max_blocks
= sbi
->blocks_per_seg
;
907 segno
= GET_SEGNO(sbi
, blk
);
908 sentry
= get_seg_entry(sbi
, segno
);
909 offset
= GET_BLKOFF_FROM_SEG0(sbi
, blk
);
911 if (end
< START_BLOCK(sbi
, segno
+ 1))
912 size
= GET_BLKOFF_FROM_SEG0(sbi
, end
);
915 map
= (unsigned long *)(sentry
->cur_valid_map
);
916 offset
= __find_rev_next_bit(map
, size
, offset
);
917 f2fs_bug_on(sbi
, offset
!= size
);
918 blk
= START_BLOCK(sbi
, segno
+ 1);
923 static void __init_discard_policy(struct f2fs_sb_info
*sbi
,
924 struct discard_policy
*dpolicy
,
925 int discard_type
, unsigned int granularity
)
928 dpolicy
->type
= discard_type
;
929 dpolicy
->sync
= true;
930 dpolicy
->granularity
= granularity
;
932 dpolicy
->max_requests
= DEF_MAX_DISCARD_REQUEST
;
933 dpolicy
->io_aware_gran
= MAX_PLIST_NUM
;
935 if (discard_type
== DPOLICY_BG
) {
936 dpolicy
->min_interval
= DEF_MIN_DISCARD_ISSUE_TIME
;
937 dpolicy
->max_interval
= DEF_MAX_DISCARD_ISSUE_TIME
;
938 dpolicy
->io_aware
= true;
939 dpolicy
->sync
= false;
940 if (utilization(sbi
) > DEF_DISCARD_URGENT_UTIL
) {
941 dpolicy
->granularity
= 1;
942 dpolicy
->max_interval
= DEF_MIN_DISCARD_ISSUE_TIME
;
944 } else if (discard_type
== DPOLICY_FORCE
) {
945 dpolicy
->min_interval
= DEF_MIN_DISCARD_ISSUE_TIME
;
946 dpolicy
->max_interval
= DEF_MAX_DISCARD_ISSUE_TIME
;
947 dpolicy
->io_aware
= false;
948 } else if (discard_type
== DPOLICY_FSTRIM
) {
949 dpolicy
->io_aware
= false;
950 } else if (discard_type
== DPOLICY_UMOUNT
) {
951 dpolicy
->io_aware
= false;
956 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
957 static void __submit_discard_cmd(struct f2fs_sb_info
*sbi
,
958 struct discard_policy
*dpolicy
,
959 struct discard_cmd
*dc
)
961 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
962 struct list_head
*wait_list
= (dpolicy
->type
== DPOLICY_FSTRIM
) ?
963 &(dcc
->fstrim_list
) : &(dcc
->wait_list
);
964 struct bio
*bio
= NULL
;
965 int flag
= dpolicy
->sync
? REQ_SYNC
: 0;
967 if (dc
->state
!= D_PREP
)
970 trace_f2fs_issue_discard(dc
->bdev
, dc
->start
, dc
->len
);
972 dc
->error
= __blkdev_issue_discard(dc
->bdev
,
973 SECTOR_FROM_BLOCK(dc
->start
),
974 SECTOR_FROM_BLOCK(dc
->len
),
977 /* should keep before submission to avoid D_DONE right away */
978 dc
->state
= D_SUBMIT
;
979 atomic_inc(&dcc
->issued_discard
);
980 atomic_inc(&dcc
->issing_discard
);
982 bio
->bi_private
= dc
;
983 bio
->bi_end_io
= f2fs_submit_discard_endio
;
986 list_move_tail(&dc
->list
, wait_list
);
987 __check_sit_bitmap(sbi
, dc
->start
, dc
->start
+ dc
->len
);
989 f2fs_update_iostat(sbi
, FS_DISCARD
, 1);
992 __remove_discard_cmd(sbi
, dc
);
996 static struct discard_cmd
*__insert_discard_tree(struct f2fs_sb_info
*sbi
,
997 struct block_device
*bdev
, block_t lstart
,
998 block_t start
, block_t len
,
999 struct rb_node
**insert_p
,
1000 struct rb_node
*insert_parent
)
1002 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1004 struct rb_node
*parent
= NULL
;
1005 struct discard_cmd
*dc
= NULL
;
1007 if (insert_p
&& insert_parent
) {
1008 parent
= insert_parent
;
1013 p
= __lookup_rb_tree_for_insert(sbi
, &dcc
->root
, &parent
, lstart
);
1015 dc
= __attach_discard_cmd(sbi
, bdev
, lstart
, start
, len
, parent
, p
);
1022 static void __relocate_discard_cmd(struct discard_cmd_control
*dcc
,
1023 struct discard_cmd
*dc
)
1025 list_move_tail(&dc
->list
, &dcc
->pend_list
[plist_idx(dc
->len
)]);
1028 static void __punch_discard_cmd(struct f2fs_sb_info
*sbi
,
1029 struct discard_cmd
*dc
, block_t blkaddr
)
1031 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1032 struct discard_info di
= dc
->di
;
1033 bool modified
= false;
1035 if (dc
->state
== D_DONE
|| dc
->len
== 1) {
1036 __remove_discard_cmd(sbi
, dc
);
1040 dcc
->undiscard_blks
-= di
.len
;
1042 if (blkaddr
> di
.lstart
) {
1043 dc
->len
= blkaddr
- dc
->lstart
;
1044 dcc
->undiscard_blks
+= dc
->len
;
1045 __relocate_discard_cmd(dcc
, dc
);
1049 if (blkaddr
< di
.lstart
+ di
.len
- 1) {
1051 __insert_discard_tree(sbi
, dc
->bdev
, blkaddr
+ 1,
1052 di
.start
+ blkaddr
+ 1 - di
.lstart
,
1053 di
.lstart
+ di
.len
- 1 - blkaddr
,
1059 dcc
->undiscard_blks
+= dc
->len
;
1060 __relocate_discard_cmd(dcc
, dc
);
1065 static void __update_discard_tree_range(struct f2fs_sb_info
*sbi
,
1066 struct block_device
*bdev
, block_t lstart
,
1067 block_t start
, block_t len
)
1069 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1070 struct discard_cmd
*prev_dc
= NULL
, *next_dc
= NULL
;
1071 struct discard_cmd
*dc
;
1072 struct discard_info di
= {0};
1073 struct rb_node
**insert_p
= NULL
, *insert_parent
= NULL
;
1074 block_t end
= lstart
+ len
;
1076 mutex_lock(&dcc
->cmd_lock
);
1078 dc
= (struct discard_cmd
*)__lookup_rb_tree_ret(&dcc
->root
,
1080 (struct rb_entry
**)&prev_dc
,
1081 (struct rb_entry
**)&next_dc
,
1082 &insert_p
, &insert_parent
, true);
1088 di
.len
= next_dc
? next_dc
->lstart
- lstart
: len
;
1089 di
.len
= min(di
.len
, len
);
1094 struct rb_node
*node
;
1095 bool merged
= false;
1096 struct discard_cmd
*tdc
= NULL
;
1099 di
.lstart
= prev_dc
->lstart
+ prev_dc
->len
;
1100 if (di
.lstart
< lstart
)
1102 if (di
.lstart
>= end
)
1105 if (!next_dc
|| next_dc
->lstart
> end
)
1106 di
.len
= end
- di
.lstart
;
1108 di
.len
= next_dc
->lstart
- di
.lstart
;
1109 di
.start
= start
+ di
.lstart
- lstart
;
1115 if (prev_dc
&& prev_dc
->state
== D_PREP
&&
1116 prev_dc
->bdev
== bdev
&&
1117 __is_discard_back_mergeable(&di
, &prev_dc
->di
)) {
1118 prev_dc
->di
.len
+= di
.len
;
1119 dcc
->undiscard_blks
+= di
.len
;
1120 __relocate_discard_cmd(dcc
, prev_dc
);
1126 if (next_dc
&& next_dc
->state
== D_PREP
&&
1127 next_dc
->bdev
== bdev
&&
1128 __is_discard_front_mergeable(&di
, &next_dc
->di
)) {
1129 next_dc
->di
.lstart
= di
.lstart
;
1130 next_dc
->di
.len
+= di
.len
;
1131 next_dc
->di
.start
= di
.start
;
1132 dcc
->undiscard_blks
+= di
.len
;
1133 __relocate_discard_cmd(dcc
, next_dc
);
1135 __remove_discard_cmd(sbi
, tdc
);
1140 __insert_discard_tree(sbi
, bdev
, di
.lstart
, di
.start
,
1141 di
.len
, NULL
, NULL
);
1148 node
= rb_next(&prev_dc
->rb_node
);
1149 next_dc
= rb_entry_safe(node
, struct discard_cmd
, rb_node
);
1152 mutex_unlock(&dcc
->cmd_lock
);
1155 static int __queue_discard_cmd(struct f2fs_sb_info
*sbi
,
1156 struct block_device
*bdev
, block_t blkstart
, block_t blklen
)
1158 block_t lblkstart
= blkstart
;
1160 trace_f2fs_queue_discard(bdev
, blkstart
, blklen
);
1163 int devi
= f2fs_target_device_index(sbi
, blkstart
);
1165 blkstart
-= FDEV(devi
).start_blk
;
1167 __update_discard_tree_range(sbi
, bdev
, lblkstart
, blkstart
, blklen
);
1171 static int __issue_discard_cmd(struct f2fs_sb_info
*sbi
,
1172 struct discard_policy
*dpolicy
)
1174 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1175 struct list_head
*pend_list
;
1176 struct discard_cmd
*dc
, *tmp
;
1177 struct blk_plug plug
;
1178 int i
, iter
= 0, issued
= 0;
1179 bool io_interrupted
= false;
1181 for (i
= MAX_PLIST_NUM
- 1; i
>= 0; i
--) {
1182 if (i
+ 1 < dpolicy
->granularity
)
1184 pend_list
= &dcc
->pend_list
[i
];
1186 mutex_lock(&dcc
->cmd_lock
);
1187 if (list_empty(pend_list
))
1189 f2fs_bug_on(sbi
, !__check_rb_tree_consistence(sbi
, &dcc
->root
));
1190 blk_start_plug(&plug
);
1191 list_for_each_entry_safe(dc
, tmp
, pend_list
, list
) {
1192 f2fs_bug_on(sbi
, dc
->state
!= D_PREP
);
1194 if (dpolicy
->io_aware
&& i
< dpolicy
->io_aware_gran
&&
1196 io_interrupted
= true;
1200 __submit_discard_cmd(sbi
, dpolicy
, dc
);
1203 if (++iter
>= dpolicy
->max_requests
)
1206 blk_finish_plug(&plug
);
1208 mutex_unlock(&dcc
->cmd_lock
);
1210 if (iter
>= dpolicy
->max_requests
)
1214 if (!issued
&& io_interrupted
)
1220 static bool __drop_discard_cmd(struct f2fs_sb_info
*sbi
)
1222 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1223 struct list_head
*pend_list
;
1224 struct discard_cmd
*dc
, *tmp
;
1226 bool dropped
= false;
1228 mutex_lock(&dcc
->cmd_lock
);
1229 for (i
= MAX_PLIST_NUM
- 1; i
>= 0; i
--) {
1230 pend_list
= &dcc
->pend_list
[i
];
1231 list_for_each_entry_safe(dc
, tmp
, pend_list
, list
) {
1232 f2fs_bug_on(sbi
, dc
->state
!= D_PREP
);
1233 __remove_discard_cmd(sbi
, dc
);
1237 mutex_unlock(&dcc
->cmd_lock
);
1242 void drop_discard_cmd(struct f2fs_sb_info
*sbi
)
1244 __drop_discard_cmd(sbi
);
1247 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info
*sbi
,
1248 struct discard_cmd
*dc
)
1250 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1251 unsigned int len
= 0;
1253 wait_for_completion_io(&dc
->wait
);
1254 mutex_lock(&dcc
->cmd_lock
);
1255 f2fs_bug_on(sbi
, dc
->state
!= D_DONE
);
1260 __remove_discard_cmd(sbi
, dc
);
1262 mutex_unlock(&dcc
->cmd_lock
);
1267 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info
*sbi
,
1268 struct discard_policy
*dpolicy
,
1269 block_t start
, block_t end
)
1271 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1272 struct list_head
*wait_list
= (dpolicy
->type
== DPOLICY_FSTRIM
) ?
1273 &(dcc
->fstrim_list
) : &(dcc
->wait_list
);
1274 struct discard_cmd
*dc
, *tmp
;
1276 unsigned int trimmed
= 0;
1281 mutex_lock(&dcc
->cmd_lock
);
1282 list_for_each_entry_safe(dc
, tmp
, wait_list
, list
) {
1283 if (dc
->lstart
+ dc
->len
<= start
|| end
<= dc
->lstart
)
1285 if (dc
->len
< dpolicy
->granularity
)
1287 if (dc
->state
== D_DONE
&& !dc
->ref
) {
1288 wait_for_completion_io(&dc
->wait
);
1291 __remove_discard_cmd(sbi
, dc
);
1298 mutex_unlock(&dcc
->cmd_lock
);
1301 trimmed
+= __wait_one_discard_bio(sbi
, dc
);
1308 static void __wait_all_discard_cmd(struct f2fs_sb_info
*sbi
,
1309 struct discard_policy
*dpolicy
)
1311 struct discard_policy dp
;
1314 __wait_discard_cmd_range(sbi
, dpolicy
, 0, UINT_MAX
);
1319 __init_discard_policy(sbi
, &dp
, DPOLICY_FSTRIM
, 1);
1320 __wait_discard_cmd_range(sbi
, &dp
, 0, UINT_MAX
);
1321 __init_discard_policy(sbi
, &dp
, DPOLICY_UMOUNT
, 1);
1322 __wait_discard_cmd_range(sbi
, &dp
, 0, UINT_MAX
);
1325 /* This should be covered by global mutex, &sit_i->sentry_lock */
1326 static void f2fs_wait_discard_bio(struct f2fs_sb_info
*sbi
, block_t blkaddr
)
1328 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1329 struct discard_cmd
*dc
;
1330 bool need_wait
= false;
1332 mutex_lock(&dcc
->cmd_lock
);
1333 dc
= (struct discard_cmd
*)__lookup_rb_tree(&dcc
->root
, NULL
, blkaddr
);
1335 if (dc
->state
== D_PREP
) {
1336 __punch_discard_cmd(sbi
, dc
, blkaddr
);
1342 mutex_unlock(&dcc
->cmd_lock
);
1345 __wait_one_discard_bio(sbi
, dc
);
1348 void stop_discard_thread(struct f2fs_sb_info
*sbi
)
1350 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1352 if (dcc
&& dcc
->f2fs_issue_discard
) {
1353 struct task_struct
*discard_thread
= dcc
->f2fs_issue_discard
;
1355 dcc
->f2fs_issue_discard
= NULL
;
1356 kthread_stop(discard_thread
);
1360 /* This comes from f2fs_put_super */
1361 bool f2fs_wait_discard_bios(struct f2fs_sb_info
*sbi
)
1363 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1364 struct discard_policy dpolicy
;
1367 __init_discard_policy(sbi
, &dpolicy
, DPOLICY_UMOUNT
,
1368 dcc
->discard_granularity
);
1369 __issue_discard_cmd(sbi
, &dpolicy
);
1370 dropped
= __drop_discard_cmd(sbi
);
1372 /* just to make sure there is no pending discard commands */
1373 __wait_all_discard_cmd(sbi
, NULL
);
1377 static int issue_discard_thread(void *data
)
1379 struct f2fs_sb_info
*sbi
= data
;
1380 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1381 wait_queue_head_t
*q
= &dcc
->discard_wait_queue
;
1382 struct discard_policy dpolicy
;
1383 unsigned int wait_ms
= DEF_MIN_DISCARD_ISSUE_TIME
;
1389 __init_discard_policy(sbi
, &dpolicy
, DPOLICY_BG
,
1390 dcc
->discard_granularity
);
1392 wait_event_interruptible_timeout(*q
,
1393 kthread_should_stop() || freezing(current
) ||
1395 msecs_to_jiffies(wait_ms
));
1396 if (try_to_freeze())
1398 if (f2fs_readonly(sbi
->sb
))
1400 if (kthread_should_stop())
1403 if (dcc
->discard_wake
)
1404 dcc
->discard_wake
= 0;
1406 if (sbi
->gc_thread
&& sbi
->gc_thread
->gc_urgent
)
1407 __init_discard_policy(sbi
, &dpolicy
, DPOLICY_FORCE
, 1);
1409 sb_start_intwrite(sbi
->sb
);
1411 issued
= __issue_discard_cmd(sbi
, &dpolicy
);
1413 __wait_all_discard_cmd(sbi
, &dpolicy
);
1414 wait_ms
= dpolicy
.min_interval
;
1416 wait_ms
= dpolicy
.max_interval
;
1419 sb_end_intwrite(sbi
->sb
);
1421 } while (!kthread_should_stop());
1425 #ifdef CONFIG_BLK_DEV_ZONED
1426 static int __f2fs_issue_discard_zone(struct f2fs_sb_info
*sbi
,
1427 struct block_device
*bdev
, block_t blkstart
, block_t blklen
)
1429 sector_t sector
, nr_sects
;
1430 block_t lblkstart
= blkstart
;
1434 devi
= f2fs_target_device_index(sbi
, blkstart
);
1435 blkstart
-= FDEV(devi
).start_blk
;
1439 * We need to know the type of the zone: for conventional zones,
1440 * use regular discard if the drive supports it. For sequential
1441 * zones, reset the zone write pointer.
1443 switch (get_blkz_type(sbi
, bdev
, blkstart
)) {
1445 case BLK_ZONE_TYPE_CONVENTIONAL
:
1446 if (!blk_queue_discard(bdev_get_queue(bdev
)))
1448 return __queue_discard_cmd(sbi
, bdev
, lblkstart
, blklen
);
1449 case BLK_ZONE_TYPE_SEQWRITE_REQ
:
1450 case BLK_ZONE_TYPE_SEQWRITE_PREF
:
1451 sector
= SECTOR_FROM_BLOCK(blkstart
);
1452 nr_sects
= SECTOR_FROM_BLOCK(blklen
);
1454 if (sector
& (bdev_zone_sectors(bdev
) - 1) ||
1455 nr_sects
!= bdev_zone_sectors(bdev
)) {
1456 f2fs_msg(sbi
->sb
, KERN_INFO
,
1457 "(%d) %s: Unaligned discard attempted (block %x + %x)",
1458 devi
, sbi
->s_ndevs
? FDEV(devi
).path
: "",
1462 trace_f2fs_issue_reset_zone(bdev
, blkstart
);
1463 return blkdev_reset_zones(bdev
, sector
,
1464 nr_sects
, GFP_NOFS
);
1466 /* Unknown zone type: broken device ? */
1472 static int __issue_discard_async(struct f2fs_sb_info
*sbi
,
1473 struct block_device
*bdev
, block_t blkstart
, block_t blklen
)
1475 #ifdef CONFIG_BLK_DEV_ZONED
1476 if (f2fs_sb_has_blkzoned(sbi
->sb
) &&
1477 bdev_zoned_model(bdev
) != BLK_ZONED_NONE
)
1478 return __f2fs_issue_discard_zone(sbi
, bdev
, blkstart
, blklen
);
1480 return __queue_discard_cmd(sbi
, bdev
, blkstart
, blklen
);
1483 static int f2fs_issue_discard(struct f2fs_sb_info
*sbi
,
1484 block_t blkstart
, block_t blklen
)
1486 sector_t start
= blkstart
, len
= 0;
1487 struct block_device
*bdev
;
1488 struct seg_entry
*se
;
1489 unsigned int offset
;
1493 bdev
= f2fs_target_device(sbi
, blkstart
, NULL
);
1495 for (i
= blkstart
; i
< blkstart
+ blklen
; i
++, len
++) {
1497 struct block_device
*bdev2
=
1498 f2fs_target_device(sbi
, i
, NULL
);
1500 if (bdev2
!= bdev
) {
1501 err
= __issue_discard_async(sbi
, bdev
,
1511 se
= get_seg_entry(sbi
, GET_SEGNO(sbi
, i
));
1512 offset
= GET_BLKOFF_FROM_SEG0(sbi
, i
);
1514 if (!f2fs_test_and_set_bit(offset
, se
->discard_map
))
1515 sbi
->discard_blks
--;
1519 err
= __issue_discard_async(sbi
, bdev
, start
, len
);
1523 static bool add_discard_addrs(struct f2fs_sb_info
*sbi
, struct cp_control
*cpc
,
1526 int entries
= SIT_VBLOCK_MAP_SIZE
/ sizeof(unsigned long);
1527 int max_blocks
= sbi
->blocks_per_seg
;
1528 struct seg_entry
*se
= get_seg_entry(sbi
, cpc
->trim_start
);
1529 unsigned long *cur_map
= (unsigned long *)se
->cur_valid_map
;
1530 unsigned long *ckpt_map
= (unsigned long *)se
->ckpt_valid_map
;
1531 unsigned long *discard_map
= (unsigned long *)se
->discard_map
;
1532 unsigned long *dmap
= SIT_I(sbi
)->tmp_map
;
1533 unsigned int start
= 0, end
= -1;
1534 bool force
= (cpc
->reason
& CP_DISCARD
);
1535 struct discard_entry
*de
= NULL
;
1536 struct list_head
*head
= &SM_I(sbi
)->dcc_info
->entry_list
;
1539 if (se
->valid_blocks
== max_blocks
|| !f2fs_discard_en(sbi
))
1543 if (!test_opt(sbi
, DISCARD
) || !se
->valid_blocks
||
1544 SM_I(sbi
)->dcc_info
->nr_discards
>=
1545 SM_I(sbi
)->dcc_info
->max_discards
)
1549 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
1550 for (i
= 0; i
< entries
; i
++)
1551 dmap
[i
] = force
? ~ckpt_map
[i
] & ~discard_map
[i
] :
1552 (cur_map
[i
] ^ ckpt_map
[i
]) & ckpt_map
[i
];
1554 while (force
|| SM_I(sbi
)->dcc_info
->nr_discards
<=
1555 SM_I(sbi
)->dcc_info
->max_discards
) {
1556 start
= __find_rev_next_bit(dmap
, max_blocks
, end
+ 1);
1557 if (start
>= max_blocks
)
1560 end
= __find_rev_next_zero_bit(dmap
, max_blocks
, start
+ 1);
1561 if (force
&& start
&& end
!= max_blocks
1562 && (end
- start
) < cpc
->trim_minlen
)
1569 de
= f2fs_kmem_cache_alloc(discard_entry_slab
,
1571 de
->start_blkaddr
= START_BLOCK(sbi
, cpc
->trim_start
);
1572 list_add_tail(&de
->list
, head
);
1575 for (i
= start
; i
< end
; i
++)
1576 __set_bit_le(i
, (void *)de
->discard_map
);
1578 SM_I(sbi
)->dcc_info
->nr_discards
+= end
- start
;
1583 void release_discard_addrs(struct f2fs_sb_info
*sbi
)
1585 struct list_head
*head
= &(SM_I(sbi
)->dcc_info
->entry_list
);
1586 struct discard_entry
*entry
, *this;
1589 list_for_each_entry_safe(entry
, this, head
, list
) {
1590 list_del(&entry
->list
);
1591 kmem_cache_free(discard_entry_slab
, entry
);
1596 * Should call clear_prefree_segments after checkpoint is done.
1598 static void set_prefree_as_free_segments(struct f2fs_sb_info
*sbi
)
1600 struct dirty_seglist_info
*dirty_i
= DIRTY_I(sbi
);
1603 mutex_lock(&dirty_i
->seglist_lock
);
1604 for_each_set_bit(segno
, dirty_i
->dirty_segmap
[PRE
], MAIN_SEGS(sbi
))
1605 __set_test_and_free(sbi
, segno
);
1606 mutex_unlock(&dirty_i
->seglist_lock
);
1609 void clear_prefree_segments(struct f2fs_sb_info
*sbi
, struct cp_control
*cpc
)
1611 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1612 struct list_head
*head
= &dcc
->entry_list
;
1613 struct discard_entry
*entry
, *this;
1614 struct dirty_seglist_info
*dirty_i
= DIRTY_I(sbi
);
1615 unsigned long *prefree_map
= dirty_i
->dirty_segmap
[PRE
];
1616 unsigned int start
= 0, end
= -1;
1617 unsigned int secno
, start_segno
;
1618 bool force
= (cpc
->reason
& CP_DISCARD
);
1620 mutex_lock(&dirty_i
->seglist_lock
);
1624 start
= find_next_bit(prefree_map
, MAIN_SEGS(sbi
), end
+ 1);
1625 if (start
>= MAIN_SEGS(sbi
))
1627 end
= find_next_zero_bit(prefree_map
, MAIN_SEGS(sbi
),
1630 for (i
= start
; i
< end
; i
++)
1631 clear_bit(i
, prefree_map
);
1633 dirty_i
->nr_dirty
[PRE
] -= end
- start
;
1635 if (!test_opt(sbi
, DISCARD
))
1638 if (force
&& start
>= cpc
->trim_start
&&
1639 (end
- 1) <= cpc
->trim_end
)
1642 if (!test_opt(sbi
, LFS
) || sbi
->segs_per_sec
== 1) {
1643 f2fs_issue_discard(sbi
, START_BLOCK(sbi
, start
),
1644 (end
- start
) << sbi
->log_blocks_per_seg
);
1648 secno
= GET_SEC_FROM_SEG(sbi
, start
);
1649 start_segno
= GET_SEG_FROM_SEC(sbi
, secno
);
1650 if (!IS_CURSEC(sbi
, secno
) &&
1651 !get_valid_blocks(sbi
, start
, true))
1652 f2fs_issue_discard(sbi
, START_BLOCK(sbi
, start_segno
),
1653 sbi
->segs_per_sec
<< sbi
->log_blocks_per_seg
);
1655 start
= start_segno
+ sbi
->segs_per_sec
;
1661 mutex_unlock(&dirty_i
->seglist_lock
);
1663 /* send small discards */
1664 list_for_each_entry_safe(entry
, this, head
, list
) {
1665 unsigned int cur_pos
= 0, next_pos
, len
, total_len
= 0;
1666 bool is_valid
= test_bit_le(0, entry
->discard_map
);
1670 next_pos
= find_next_zero_bit_le(entry
->discard_map
,
1671 sbi
->blocks_per_seg
, cur_pos
);
1672 len
= next_pos
- cur_pos
;
1674 if (f2fs_sb_has_blkzoned(sbi
->sb
) ||
1675 (force
&& len
< cpc
->trim_minlen
))
1678 f2fs_issue_discard(sbi
, entry
->start_blkaddr
+ cur_pos
,
1682 next_pos
= find_next_bit_le(entry
->discard_map
,
1683 sbi
->blocks_per_seg
, cur_pos
);
1687 is_valid
= !is_valid
;
1689 if (cur_pos
< sbi
->blocks_per_seg
)
1692 list_del(&entry
->list
);
1693 dcc
->nr_discards
-= total_len
;
1694 kmem_cache_free(discard_entry_slab
, entry
);
1697 wake_up_discard_thread(sbi
, false);
1700 static int create_discard_cmd_control(struct f2fs_sb_info
*sbi
)
1702 dev_t dev
= sbi
->sb
->s_bdev
->bd_dev
;
1703 struct discard_cmd_control
*dcc
;
1706 if (SM_I(sbi
)->dcc_info
) {
1707 dcc
= SM_I(sbi
)->dcc_info
;
1711 dcc
= f2fs_kzalloc(sbi
, sizeof(struct discard_cmd_control
), GFP_KERNEL
);
1715 dcc
->discard_granularity
= DEFAULT_DISCARD_GRANULARITY
;
1716 INIT_LIST_HEAD(&dcc
->entry_list
);
1717 for (i
= 0; i
< MAX_PLIST_NUM
; i
++)
1718 INIT_LIST_HEAD(&dcc
->pend_list
[i
]);
1719 INIT_LIST_HEAD(&dcc
->wait_list
);
1720 INIT_LIST_HEAD(&dcc
->fstrim_list
);
1721 mutex_init(&dcc
->cmd_lock
);
1722 atomic_set(&dcc
->issued_discard
, 0);
1723 atomic_set(&dcc
->issing_discard
, 0);
1724 atomic_set(&dcc
->discard_cmd_cnt
, 0);
1725 dcc
->nr_discards
= 0;
1726 dcc
->max_discards
= MAIN_SEGS(sbi
) << sbi
->log_blocks_per_seg
;
1727 dcc
->undiscard_blks
= 0;
1728 dcc
->root
= RB_ROOT
;
1730 init_waitqueue_head(&dcc
->discard_wait_queue
);
1731 SM_I(sbi
)->dcc_info
= dcc
;
1733 dcc
->f2fs_issue_discard
= kthread_run(issue_discard_thread
, sbi
,
1734 "f2fs_discard-%u:%u", MAJOR(dev
), MINOR(dev
));
1735 if (IS_ERR(dcc
->f2fs_issue_discard
)) {
1736 err
= PTR_ERR(dcc
->f2fs_issue_discard
);
1738 SM_I(sbi
)->dcc_info
= NULL
;
1745 static void destroy_discard_cmd_control(struct f2fs_sb_info
*sbi
)
1747 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
1752 stop_discard_thread(sbi
);
1755 SM_I(sbi
)->dcc_info
= NULL
;
1758 static bool __mark_sit_entry_dirty(struct f2fs_sb_info
*sbi
, unsigned int segno
)
1760 struct sit_info
*sit_i
= SIT_I(sbi
);
1762 if (!__test_and_set_bit(segno
, sit_i
->dirty_sentries_bitmap
)) {
1763 sit_i
->dirty_sentries
++;
1770 static void __set_sit_entry_type(struct f2fs_sb_info
*sbi
, int type
,
1771 unsigned int segno
, int modified
)
1773 struct seg_entry
*se
= get_seg_entry(sbi
, segno
);
1776 __mark_sit_entry_dirty(sbi
, segno
);
1779 static void update_sit_entry(struct f2fs_sb_info
*sbi
, block_t blkaddr
, int del
)
1781 struct seg_entry
*se
;
1782 unsigned int segno
, offset
;
1783 long int new_vblocks
;
1785 #ifdef CONFIG_F2FS_CHECK_FS
1789 segno
= GET_SEGNO(sbi
, blkaddr
);
1791 se
= get_seg_entry(sbi
, segno
);
1792 new_vblocks
= se
->valid_blocks
+ del
;
1793 offset
= GET_BLKOFF_FROM_SEG0(sbi
, blkaddr
);
1795 f2fs_bug_on(sbi
, (new_vblocks
>> (sizeof(unsigned short) << 3) ||
1796 (new_vblocks
> sbi
->blocks_per_seg
)));
1798 se
->valid_blocks
= new_vblocks
;
1799 se
->mtime
= get_mtime(sbi
);
1800 SIT_I(sbi
)->max_mtime
= se
->mtime
;
1802 /* Update valid block bitmap */
1804 exist
= f2fs_test_and_set_bit(offset
, se
->cur_valid_map
);
1805 #ifdef CONFIG_F2FS_CHECK_FS
1806 mir_exist
= f2fs_test_and_set_bit(offset
,
1807 se
->cur_valid_map_mir
);
1808 if (unlikely(exist
!= mir_exist
)) {
1809 f2fs_msg(sbi
->sb
, KERN_ERR
, "Inconsistent error "
1810 "when setting bitmap, blk:%u, old bit:%d",
1812 f2fs_bug_on(sbi
, 1);
1815 if (unlikely(exist
)) {
1816 f2fs_msg(sbi
->sb
, KERN_ERR
,
1817 "Bitmap was wrongly set, blk:%u", blkaddr
);
1818 f2fs_bug_on(sbi
, 1);
1823 if (f2fs_discard_en(sbi
) &&
1824 !f2fs_test_and_set_bit(offset
, se
->discard_map
))
1825 sbi
->discard_blks
--;
1827 /* don't overwrite by SSR to keep node chain */
1828 if (IS_NODESEG(se
->type
)) {
1829 if (!f2fs_test_and_set_bit(offset
, se
->ckpt_valid_map
))
1830 se
->ckpt_valid_blocks
++;
1833 exist
= f2fs_test_and_clear_bit(offset
, se
->cur_valid_map
);
1834 #ifdef CONFIG_F2FS_CHECK_FS
1835 mir_exist
= f2fs_test_and_clear_bit(offset
,
1836 se
->cur_valid_map_mir
);
1837 if (unlikely(exist
!= mir_exist
)) {
1838 f2fs_msg(sbi
->sb
, KERN_ERR
, "Inconsistent error "
1839 "when clearing bitmap, blk:%u, old bit:%d",
1841 f2fs_bug_on(sbi
, 1);
1844 if (unlikely(!exist
)) {
1845 f2fs_msg(sbi
->sb
, KERN_ERR
,
1846 "Bitmap was wrongly cleared, blk:%u", blkaddr
);
1847 f2fs_bug_on(sbi
, 1);
1852 if (f2fs_discard_en(sbi
) &&
1853 f2fs_test_and_clear_bit(offset
, se
->discard_map
))
1854 sbi
->discard_blks
++;
1856 if (!f2fs_test_bit(offset
, se
->ckpt_valid_map
))
1857 se
->ckpt_valid_blocks
+= del
;
1859 __mark_sit_entry_dirty(sbi
, segno
);
1861 /* update total number of valid blocks to be written in ckpt area */
1862 SIT_I(sbi
)->written_valid_blocks
+= del
;
1864 if (sbi
->segs_per_sec
> 1)
1865 get_sec_entry(sbi
, segno
)->valid_blocks
+= del
;
1868 void invalidate_blocks(struct f2fs_sb_info
*sbi
, block_t addr
)
1870 unsigned int segno
= GET_SEGNO(sbi
, addr
);
1871 struct sit_info
*sit_i
= SIT_I(sbi
);
1873 f2fs_bug_on(sbi
, addr
== NULL_ADDR
);
1874 if (addr
== NEW_ADDR
)
1877 /* add it into sit main buffer */
1878 down_write(&sit_i
->sentry_lock
);
1880 update_sit_entry(sbi
, addr
, -1);
1882 /* add it into dirty seglist */
1883 locate_dirty_segment(sbi
, segno
);
1885 up_write(&sit_i
->sentry_lock
);
1888 bool is_checkpointed_data(struct f2fs_sb_info
*sbi
, block_t blkaddr
)
1890 struct sit_info
*sit_i
= SIT_I(sbi
);
1891 unsigned int segno
, offset
;
1892 struct seg_entry
*se
;
1895 if (!is_valid_data_blkaddr(sbi
, blkaddr
))
1898 down_read(&sit_i
->sentry_lock
);
1900 segno
= GET_SEGNO(sbi
, blkaddr
);
1901 se
= get_seg_entry(sbi
, segno
);
1902 offset
= GET_BLKOFF_FROM_SEG0(sbi
, blkaddr
);
1904 if (f2fs_test_bit(offset
, se
->ckpt_valid_map
))
1907 up_read(&sit_i
->sentry_lock
);
1913 * This function should be resided under the curseg_mutex lock
1915 static void __add_sum_entry(struct f2fs_sb_info
*sbi
, int type
,
1916 struct f2fs_summary
*sum
)
1918 struct curseg_info
*curseg
= CURSEG_I(sbi
, type
);
1919 void *addr
= curseg
->sum_blk
;
1920 addr
+= curseg
->next_blkoff
* sizeof(struct f2fs_summary
);
1921 memcpy(addr
, sum
, sizeof(struct f2fs_summary
));
1925 * Calculate the number of current summary pages for writing
1927 int npages_for_summary_flush(struct f2fs_sb_info
*sbi
, bool for_ra
)
1929 int valid_sum_count
= 0;
1932 for (i
= CURSEG_HOT_DATA
; i
<= CURSEG_COLD_DATA
; i
++) {
1933 if (sbi
->ckpt
->alloc_type
[i
] == SSR
)
1934 valid_sum_count
+= sbi
->blocks_per_seg
;
1937 valid_sum_count
+= le16_to_cpu(
1938 F2FS_CKPT(sbi
)->cur_data_blkoff
[i
]);
1940 valid_sum_count
+= curseg_blkoff(sbi
, i
);
1944 sum_in_page
= (PAGE_SIZE
- 2 * SUM_JOURNAL_SIZE
-
1945 SUM_FOOTER_SIZE
) / SUMMARY_SIZE
;
1946 if (valid_sum_count
<= sum_in_page
)
1948 else if ((valid_sum_count
- sum_in_page
) <=
1949 (PAGE_SIZE
- SUM_FOOTER_SIZE
) / SUMMARY_SIZE
)
1955 * Caller should put this summary page
1957 struct page
*get_sum_page(struct f2fs_sb_info
*sbi
, unsigned int segno
)
1959 return get_meta_page(sbi
, GET_SUM_BLOCK(sbi
, segno
));
1962 void update_meta_page(struct f2fs_sb_info
*sbi
, void *src
, block_t blk_addr
)
1964 struct page
*page
= grab_meta_page(sbi
, blk_addr
);
1966 memcpy(page_address(page
), src
, PAGE_SIZE
);
1967 set_page_dirty(page
);
1968 f2fs_put_page(page
, 1);
1971 static void write_sum_page(struct f2fs_sb_info
*sbi
,
1972 struct f2fs_summary_block
*sum_blk
, block_t blk_addr
)
1974 update_meta_page(sbi
, (void *)sum_blk
, blk_addr
);
1977 static void write_current_sum_page(struct f2fs_sb_info
*sbi
,
1978 int type
, block_t blk_addr
)
1980 struct curseg_info
*curseg
= CURSEG_I(sbi
, type
);
1981 struct page
*page
= grab_meta_page(sbi
, blk_addr
);
1982 struct f2fs_summary_block
*src
= curseg
->sum_blk
;
1983 struct f2fs_summary_block
*dst
;
1985 dst
= (struct f2fs_summary_block
*)page_address(page
);
1987 mutex_lock(&curseg
->curseg_mutex
);
1989 down_read(&curseg
->journal_rwsem
);
1990 memcpy(&dst
->journal
, curseg
->journal
, SUM_JOURNAL_SIZE
);
1991 up_read(&curseg
->journal_rwsem
);
1993 memcpy(dst
->entries
, src
->entries
, SUM_ENTRY_SIZE
);
1994 memcpy(&dst
->footer
, &src
->footer
, SUM_FOOTER_SIZE
);
1996 mutex_unlock(&curseg
->curseg_mutex
);
1998 set_page_dirty(page
);
1999 f2fs_put_page(page
, 1);
2002 static int is_next_segment_free(struct f2fs_sb_info
*sbi
, int type
)
2004 struct curseg_info
*curseg
= CURSEG_I(sbi
, type
);
2005 unsigned int segno
= curseg
->segno
+ 1;
2006 struct free_segmap_info
*free_i
= FREE_I(sbi
);
2008 if (segno
< MAIN_SEGS(sbi
) && segno
% sbi
->segs_per_sec
)
2009 return !test_bit(segno
, free_i
->free_segmap
);
2014 * Find a new segment from the free segments bitmap to right order
2015 * This function should be returned with success, otherwise BUG
2017 static void get_new_segment(struct f2fs_sb_info
*sbi
,
2018 unsigned int *newseg
, bool new_sec
, int dir
)
2020 struct free_segmap_info
*free_i
= FREE_I(sbi
);
2021 unsigned int segno
, secno
, zoneno
;
2022 unsigned int total_zones
= MAIN_SECS(sbi
) / sbi
->secs_per_zone
;
2023 unsigned int hint
= GET_SEC_FROM_SEG(sbi
, *newseg
);
2024 unsigned int old_zoneno
= GET_ZONE_FROM_SEG(sbi
, *newseg
);
2025 unsigned int left_start
= hint
;
2030 spin_lock(&free_i
->segmap_lock
);
2032 if (!new_sec
&& ((*newseg
+ 1) % sbi
->segs_per_sec
)) {
2033 segno
= find_next_zero_bit(free_i
->free_segmap
,
2034 GET_SEG_FROM_SEC(sbi
, hint
+ 1), *newseg
+ 1);
2035 if (segno
< GET_SEG_FROM_SEC(sbi
, hint
+ 1))
2039 secno
= find_next_zero_bit(free_i
->free_secmap
, MAIN_SECS(sbi
), hint
);
2040 if (secno
>= MAIN_SECS(sbi
)) {
2041 if (dir
== ALLOC_RIGHT
) {
2042 secno
= find_next_zero_bit(free_i
->free_secmap
,
2044 f2fs_bug_on(sbi
, secno
>= MAIN_SECS(sbi
));
2047 left_start
= hint
- 1;
2053 while (test_bit(left_start
, free_i
->free_secmap
)) {
2054 if (left_start
> 0) {
2058 left_start
= find_next_zero_bit(free_i
->free_secmap
,
2060 f2fs_bug_on(sbi
, left_start
>= MAIN_SECS(sbi
));
2065 segno
= GET_SEG_FROM_SEC(sbi
, secno
);
2066 zoneno
= GET_ZONE_FROM_SEC(sbi
, secno
);
2068 /* give up on finding another zone */
2071 if (sbi
->secs_per_zone
== 1)
2073 if (zoneno
== old_zoneno
)
2075 if (dir
== ALLOC_LEFT
) {
2076 if (!go_left
&& zoneno
+ 1 >= total_zones
)
2078 if (go_left
&& zoneno
== 0)
2081 for (i
= 0; i
< NR_CURSEG_TYPE
; i
++)
2082 if (CURSEG_I(sbi
, i
)->zone
== zoneno
)
2085 if (i
< NR_CURSEG_TYPE
) {
2086 /* zone is in user, try another */
2088 hint
= zoneno
* sbi
->secs_per_zone
- 1;
2089 else if (zoneno
+ 1 >= total_zones
)
2092 hint
= (zoneno
+ 1) * sbi
->secs_per_zone
;
2094 goto find_other_zone
;
2097 /* set it as dirty segment in free segmap */
2098 f2fs_bug_on(sbi
, test_bit(segno
, free_i
->free_segmap
));
2099 __set_inuse(sbi
, segno
);
2101 spin_unlock(&free_i
->segmap_lock
);
2104 static void reset_curseg(struct f2fs_sb_info
*sbi
, int type
, int modified
)
2106 struct curseg_info
*curseg
= CURSEG_I(sbi
, type
);
2107 struct summary_footer
*sum_footer
;
2109 curseg
->segno
= curseg
->next_segno
;
2110 curseg
->zone
= GET_ZONE_FROM_SEG(sbi
, curseg
->segno
);
2111 curseg
->next_blkoff
= 0;
2112 curseg
->next_segno
= NULL_SEGNO
;
2114 sum_footer
= &(curseg
->sum_blk
->footer
);
2115 memset(sum_footer
, 0, sizeof(struct summary_footer
));
2116 if (IS_DATASEG(type
))
2117 SET_SUM_TYPE(sum_footer
, SUM_TYPE_DATA
);
2118 if (IS_NODESEG(type
))
2119 SET_SUM_TYPE(sum_footer
, SUM_TYPE_NODE
);
2120 __set_sit_entry_type(sbi
, type
, curseg
->segno
, modified
);
2123 static unsigned int __get_next_segno(struct f2fs_sb_info
*sbi
, int type
)
2125 /* if segs_per_sec is large than 1, we need to keep original policy. */
2126 if (sbi
->segs_per_sec
!= 1)
2127 return CURSEG_I(sbi
, type
)->segno
;
2129 if (test_opt(sbi
, NOHEAP
) &&
2130 (type
== CURSEG_HOT_DATA
|| IS_NODESEG(type
)))
2133 if (SIT_I(sbi
)->last_victim
[ALLOC_NEXT
])
2134 return SIT_I(sbi
)->last_victim
[ALLOC_NEXT
];
2136 /* find segments from 0 to reuse freed segments */
2137 if (F2FS_OPTION(sbi
).alloc_mode
== ALLOC_MODE_REUSE
)
2140 return CURSEG_I(sbi
, type
)->segno
;
2144 * Allocate a current working segment.
2145 * This function always allocates a free segment in LFS manner.
2147 static void new_curseg(struct f2fs_sb_info
*sbi
, int type
, bool new_sec
)
2149 struct curseg_info
*curseg
= CURSEG_I(sbi
, type
);
2150 unsigned int segno
= curseg
->segno
;
2151 int dir
= ALLOC_LEFT
;
2153 write_sum_page(sbi
, curseg
->sum_blk
,
2154 GET_SUM_BLOCK(sbi
, segno
));
2155 if (type
== CURSEG_WARM_DATA
|| type
== CURSEG_COLD_DATA
)
2158 if (test_opt(sbi
, NOHEAP
))
2161 segno
= __get_next_segno(sbi
, type
);
2162 get_new_segment(sbi
, &segno
, new_sec
, dir
);
2163 curseg
->next_segno
= segno
;
2164 reset_curseg(sbi
, type
, 1);
2165 curseg
->alloc_type
= LFS
;
2168 static void __next_free_blkoff(struct f2fs_sb_info
*sbi
,
2169 struct curseg_info
*seg
, block_t start
)
2171 struct seg_entry
*se
= get_seg_entry(sbi
, seg
->segno
);
2172 int entries
= SIT_VBLOCK_MAP_SIZE
/ sizeof(unsigned long);
2173 unsigned long *target_map
= SIT_I(sbi
)->tmp_map
;
2174 unsigned long *ckpt_map
= (unsigned long *)se
->ckpt_valid_map
;
2175 unsigned long *cur_map
= (unsigned long *)se
->cur_valid_map
;
2178 for (i
= 0; i
< entries
; i
++)
2179 target_map
[i
] = ckpt_map
[i
] | cur_map
[i
];
2181 pos
= __find_rev_next_zero_bit(target_map
, sbi
->blocks_per_seg
, start
);
2183 seg
->next_blkoff
= pos
;
2187 * If a segment is written by LFS manner, next block offset is just obtained
2188 * by increasing the current block offset. However, if a segment is written by
2189 * SSR manner, next block offset obtained by calling __next_free_blkoff
2191 static void __refresh_next_blkoff(struct f2fs_sb_info
*sbi
,
2192 struct curseg_info
*seg
)
2194 if (seg
->alloc_type
== SSR
)
2195 __next_free_blkoff(sbi
, seg
, seg
->next_blkoff
+ 1);
2201 * This function always allocates a used segment(from dirty seglist) by SSR
2202 * manner, so it should recover the existing segment information of valid blocks
2204 static void change_curseg(struct f2fs_sb_info
*sbi
, int type
)
2206 struct dirty_seglist_info
*dirty_i
= DIRTY_I(sbi
);
2207 struct curseg_info
*curseg
= CURSEG_I(sbi
, type
);
2208 unsigned int new_segno
= curseg
->next_segno
;
2209 struct f2fs_summary_block
*sum_node
;
2210 struct page
*sum_page
;
2212 write_sum_page(sbi
, curseg
->sum_blk
,
2213 GET_SUM_BLOCK(sbi
, curseg
->segno
));
2214 __set_test_and_inuse(sbi
, new_segno
);
2216 mutex_lock(&dirty_i
->seglist_lock
);
2217 __remove_dirty_segment(sbi
, new_segno
, PRE
);
2218 __remove_dirty_segment(sbi
, new_segno
, DIRTY
);
2219 mutex_unlock(&dirty_i
->seglist_lock
);
2221 reset_curseg(sbi
, type
, 1);
2222 curseg
->alloc_type
= SSR
;
2223 __next_free_blkoff(sbi
, curseg
, 0);
2225 sum_page
= get_sum_page(sbi
, new_segno
);
2226 sum_node
= (struct f2fs_summary_block
*)page_address(sum_page
);
2227 memcpy(curseg
->sum_blk
, sum_node
, SUM_ENTRY_SIZE
);
2228 f2fs_put_page(sum_page
, 1);
2231 static int get_ssr_segment(struct f2fs_sb_info
*sbi
, int type
)
2233 struct curseg_info
*curseg
= CURSEG_I(sbi
, type
);
2234 const struct victim_selection
*v_ops
= DIRTY_I(sbi
)->v_ops
;
2235 unsigned segno
= NULL_SEGNO
;
2237 bool reversed
= false;
2239 /* need_SSR() already forces to do this */
2240 if (v_ops
->get_victim(sbi
, &segno
, BG_GC
, type
, SSR
)) {
2241 curseg
->next_segno
= segno
;
2245 /* For node segments, let's do SSR more intensively */
2246 if (IS_NODESEG(type
)) {
2247 if (type
>= CURSEG_WARM_NODE
) {
2249 i
= CURSEG_COLD_NODE
;
2251 i
= CURSEG_HOT_NODE
;
2253 cnt
= NR_CURSEG_NODE_TYPE
;
2255 if (type
>= CURSEG_WARM_DATA
) {
2257 i
= CURSEG_COLD_DATA
;
2259 i
= CURSEG_HOT_DATA
;
2261 cnt
= NR_CURSEG_DATA_TYPE
;
2264 for (; cnt
-- > 0; reversed
? i
-- : i
++) {
2267 if (v_ops
->get_victim(sbi
, &segno
, BG_GC
, i
, SSR
)) {
2268 curseg
->next_segno
= segno
;
2276 * flush out current segment and replace it with new segment
2277 * This function should be returned with success, otherwise BUG
2279 static void allocate_segment_by_default(struct f2fs_sb_info
*sbi
,
2280 int type
, bool force
)
2282 struct curseg_info
*curseg
= CURSEG_I(sbi
, type
);
2285 new_curseg(sbi
, type
, true);
2286 else if (!is_set_ckpt_flags(sbi
, CP_CRC_RECOVERY_FLAG
) &&
2287 type
== CURSEG_WARM_NODE
)
2288 new_curseg(sbi
, type
, false);
2289 else if (curseg
->alloc_type
== LFS
&& is_next_segment_free(sbi
, type
))
2290 new_curseg(sbi
, type
, false);
2291 else if (need_SSR(sbi
) && get_ssr_segment(sbi
, type
))
2292 change_curseg(sbi
, type
);
2294 new_curseg(sbi
, type
, false);
2296 stat_inc_seg_type(sbi
, curseg
);
2299 void allocate_new_segments(struct f2fs_sb_info
*sbi
)
2301 struct curseg_info
*curseg
;
2302 unsigned int old_segno
;
2305 down_write(&SIT_I(sbi
)->sentry_lock
);
2307 for (i
= CURSEG_HOT_DATA
; i
<= CURSEG_COLD_DATA
; i
++) {
2308 curseg
= CURSEG_I(sbi
, i
);
2309 old_segno
= curseg
->segno
;
2310 SIT_I(sbi
)->s_ops
->allocate_segment(sbi
, i
, true);
2311 locate_dirty_segment(sbi
, old_segno
);
2314 up_write(&SIT_I(sbi
)->sentry_lock
);
2317 static const struct segment_allocation default_salloc_ops
= {
2318 .allocate_segment
= allocate_segment_by_default
,
2321 bool exist_trim_candidates(struct f2fs_sb_info
*sbi
, struct cp_control
*cpc
)
2323 __u64 trim_start
= cpc
->trim_start
;
2324 bool has_candidate
= false;
2326 down_write(&SIT_I(sbi
)->sentry_lock
);
2327 for (; cpc
->trim_start
<= cpc
->trim_end
; cpc
->trim_start
++) {
2328 if (add_discard_addrs(sbi
, cpc
, true)) {
2329 has_candidate
= true;
2333 up_write(&SIT_I(sbi
)->sentry_lock
);
2335 cpc
->trim_start
= trim_start
;
2336 return has_candidate
;
2339 static void __issue_discard_cmd_range(struct f2fs_sb_info
*sbi
,
2340 struct discard_policy
*dpolicy
,
2341 unsigned int start
, unsigned int end
)
2343 struct discard_cmd_control
*dcc
= SM_I(sbi
)->dcc_info
;
2344 struct discard_cmd
*prev_dc
= NULL
, *next_dc
= NULL
;
2345 struct rb_node
**insert_p
= NULL
, *insert_parent
= NULL
;
2346 struct discard_cmd
*dc
;
2347 struct blk_plug plug
;
2353 mutex_lock(&dcc
->cmd_lock
);
2354 f2fs_bug_on(sbi
, !__check_rb_tree_consistence(sbi
, &dcc
->root
));
2356 dc
= (struct discard_cmd
*)__lookup_rb_tree_ret(&dcc
->root
,
2358 (struct rb_entry
**)&prev_dc
,
2359 (struct rb_entry
**)&next_dc
,
2360 &insert_p
, &insert_parent
, true);
2364 blk_start_plug(&plug
);
2366 while (dc
&& dc
->lstart
<= end
) {
2367 struct rb_node
*node
;
2369 if (dc
->len
< dpolicy
->granularity
)
2372 if (dc
->state
!= D_PREP
) {
2373 list_move_tail(&dc
->list
, &dcc
->fstrim_list
);
2377 __submit_discard_cmd(sbi
, dpolicy
, dc
);
2379 if (++issued
>= dpolicy
->max_requests
) {
2380 start
= dc
->lstart
+ dc
->len
;
2382 blk_finish_plug(&plug
);
2383 mutex_unlock(&dcc
->cmd_lock
);
2384 __wait_all_discard_cmd(sbi
, NULL
);
2385 congestion_wait(BLK_RW_ASYNC
, HZ
/50);
2389 node
= rb_next(&dc
->rb_node
);
2390 dc
= rb_entry_safe(node
, struct discard_cmd
, rb_node
);
2392 if (fatal_signal_pending(current
))
2396 blk_finish_plug(&plug
);
2397 mutex_unlock(&dcc
->cmd_lock
);
2400 int f2fs_trim_fs(struct f2fs_sb_info
*sbi
, struct fstrim_range
*range
)
2402 __u64 start
= F2FS_BYTES_TO_BLK(range
->start
);
2403 __u64 end
= start
+ F2FS_BYTES_TO_BLK(range
->len
) - 1;
2404 unsigned int start_segno
, end_segno
;
2405 block_t start_block
, end_block
;
2406 struct cp_control cpc
;
2407 struct discard_policy dpolicy
;
2408 unsigned long long trimmed
= 0;
2411 if (start
>= MAX_BLKADDR(sbi
) || range
->len
< sbi
->blocksize
)
2414 if (end
<= MAIN_BLKADDR(sbi
))
2417 if (is_sbi_flag_set(sbi
, SBI_NEED_FSCK
)) {
2418 f2fs_msg(sbi
->sb
, KERN_WARNING
,
2419 "Found FS corruption, run fsck to fix.");
2423 /* start/end segment number in main_area */
2424 start_segno
= (start
<= MAIN_BLKADDR(sbi
)) ? 0 : GET_SEGNO(sbi
, start
);
2425 end_segno
= (end
>= MAX_BLKADDR(sbi
)) ? MAIN_SEGS(sbi
) - 1 :
2426 GET_SEGNO(sbi
, end
);
2428 cpc
.reason
= CP_DISCARD
;
2429 cpc
.trim_minlen
= max_t(__u64
, 1, F2FS_BYTES_TO_BLK(range
->minlen
));
2430 cpc
.trim_start
= start_segno
;
2431 cpc
.trim_end
= end_segno
;
2433 if (sbi
->discard_blks
== 0)
2436 mutex_lock(&sbi
->gc_mutex
);
2437 err
= write_checkpoint(sbi
, &cpc
);
2438 mutex_unlock(&sbi
->gc_mutex
);
2442 start_block
= START_BLOCK(sbi
, start_segno
);
2443 end_block
= START_BLOCK(sbi
, end_segno
+ 1);
2445 __init_discard_policy(sbi
, &dpolicy
, DPOLICY_FSTRIM
, cpc
.trim_minlen
);
2446 __issue_discard_cmd_range(sbi
, &dpolicy
, start_block
, end_block
);
2449 * We filed discard candidates, but actually we don't need to wait for
2450 * all of them, since they'll be issued in idle time along with runtime
2451 * discard option. User configuration looks like using runtime discard
2452 * or periodic fstrim instead of it.
2454 if (!test_opt(sbi
, DISCARD
)) {
2455 trimmed
= __wait_discard_cmd_range(sbi
, &dpolicy
,
2456 start_block
, end_block
);
2457 range
->len
= F2FS_BLK_TO_BYTES(trimmed
);
2463 static bool __has_curseg_space(struct f2fs_sb_info
*sbi
, int type
)
2465 struct curseg_info
*curseg
= CURSEG_I(sbi
, type
);
2466 if (curseg
->next_blkoff
< sbi
->blocks_per_seg
)
2471 int rw_hint_to_seg_type(enum rw_hint hint
)
2474 case WRITE_LIFE_SHORT
:
2475 return CURSEG_HOT_DATA
;
2476 case WRITE_LIFE_EXTREME
:
2477 return CURSEG_COLD_DATA
;
2479 return CURSEG_WARM_DATA
;
2483 /* This returns write hints for each segment type. This hints will be
2484 * passed down to block layer. There are mapping tables which depend on
2485 * the mount option 'whint_mode'.
2487 * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
2489 * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
2493 * META WRITE_LIFE_NOT_SET
2497 * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
2498 * extension list " "
2501 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
2502 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
2503 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
2504 * WRITE_LIFE_NONE " "
2505 * WRITE_LIFE_MEDIUM " "
2506 * WRITE_LIFE_LONG " "
2509 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
2510 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
2511 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
2512 * WRITE_LIFE_NONE " WRITE_LIFE_NONE
2513 * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
2514 * WRITE_LIFE_LONG " WRITE_LIFE_LONG
2516 * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
2520 * META WRITE_LIFE_MEDIUM;
2521 * HOT_NODE WRITE_LIFE_NOT_SET
2523 * COLD_NODE WRITE_LIFE_NONE
2524 * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
2525 * extension list " "
2528 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
2529 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
2530 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG
2531 * WRITE_LIFE_NONE " "
2532 * WRITE_LIFE_MEDIUM " "
2533 * WRITE_LIFE_LONG " "
2536 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
2537 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
2538 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
2539 * WRITE_LIFE_NONE " WRITE_LIFE_NONE
2540 * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
2541 * WRITE_LIFE_LONG " WRITE_LIFE_LONG
2544 enum rw_hint
io_type_to_rw_hint(struct f2fs_sb_info
*sbi
,
2545 enum page_type type
, enum temp_type temp
)
2547 if (F2FS_OPTION(sbi
).whint_mode
== WHINT_MODE_USER
) {
2550 return WRITE_LIFE_NOT_SET
;
2551 else if (temp
== HOT
)
2552 return WRITE_LIFE_SHORT
;
2553 else if (temp
== COLD
)
2554 return WRITE_LIFE_EXTREME
;
2556 return WRITE_LIFE_NOT_SET
;
2558 } else if (F2FS_OPTION(sbi
).whint_mode
== WHINT_MODE_FS
) {
2561 return WRITE_LIFE_LONG
;
2562 else if (temp
== HOT
)
2563 return WRITE_LIFE_SHORT
;
2564 else if (temp
== COLD
)
2565 return WRITE_LIFE_EXTREME
;
2566 } else if (type
== NODE
) {
2567 if (temp
== WARM
|| temp
== HOT
)
2568 return WRITE_LIFE_NOT_SET
;
2569 else if (temp
== COLD
)
2570 return WRITE_LIFE_NONE
;
2571 } else if (type
== META
) {
2572 return WRITE_LIFE_MEDIUM
;
2575 return WRITE_LIFE_NOT_SET
;
2578 static int __get_segment_type_2(struct f2fs_io_info
*fio
)
2580 if (fio
->type
== DATA
)
2581 return CURSEG_HOT_DATA
;
2583 return CURSEG_HOT_NODE
;
2586 static int __get_segment_type_4(struct f2fs_io_info
*fio
)
2588 if (fio
->type
== DATA
) {
2589 struct inode
*inode
= fio
->page
->mapping
->host
;
2591 if (S_ISDIR(inode
->i_mode
))
2592 return CURSEG_HOT_DATA
;
2594 return CURSEG_COLD_DATA
;
2596 if (IS_DNODE(fio
->page
) && is_cold_node(fio
->page
))
2597 return CURSEG_WARM_NODE
;
2599 return CURSEG_COLD_NODE
;
2603 static int __get_segment_type_6(struct f2fs_io_info
*fio
)
2605 if (fio
->type
== DATA
) {
2606 struct inode
*inode
= fio
->page
->mapping
->host
;
2608 if (is_cold_data(fio
->page
) || file_is_cold(inode
))
2609 return CURSEG_COLD_DATA
;
2610 if (file_is_hot(inode
) ||
2611 is_inode_flag_set(inode
, FI_HOT_DATA
))
2612 return CURSEG_HOT_DATA
;
2613 return rw_hint_to_seg_type(inode
->i_write_hint
);
2615 if (IS_DNODE(fio
->page
))
2616 return is_cold_node(fio
->page
) ? CURSEG_WARM_NODE
:
2618 return CURSEG_COLD_NODE
;
2622 static int __get_segment_type(struct f2fs_io_info
*fio
)
2626 switch (F2FS_OPTION(fio
->sbi
).active_logs
) {
2628 type
= __get_segment_type_2(fio
);
2631 type
= __get_segment_type_4(fio
);
2634 type
= __get_segment_type_6(fio
);
2637 f2fs_bug_on(fio
->sbi
, true);
2642 else if (IS_WARM(type
))
2649 void allocate_data_block(struct f2fs_sb_info
*sbi
, struct page
*page
,
2650 block_t old_blkaddr
, block_t
*new_blkaddr
,
2651 struct f2fs_summary
*sum
, int type
,
2652 struct f2fs_io_info
*fio
, bool add_list
)
2654 struct sit_info
*sit_i
= SIT_I(sbi
);
2655 struct curseg_info
*curseg
= CURSEG_I(sbi
, type
);
2657 down_read(&SM_I(sbi
)->curseg_lock
);
2659 mutex_lock(&curseg
->curseg_mutex
);
2660 down_write(&sit_i
->sentry_lock
);
2662 *new_blkaddr
= NEXT_FREE_BLKADDR(sbi
, curseg
);
2664 f2fs_wait_discard_bio(sbi
, *new_blkaddr
);
2667 * __add_sum_entry should be resided under the curseg_mutex
2668 * because, this function updates a summary entry in the
2669 * current summary block.
2671 __add_sum_entry(sbi
, type
, sum
);
2673 __refresh_next_blkoff(sbi
, curseg
);
2675 stat_inc_block_count(sbi
, curseg
);
2678 * SIT information should be updated before segment allocation,
2679 * since SSR needs latest valid block information.
2681 update_sit_entry(sbi
, *new_blkaddr
, 1);
2682 if (GET_SEGNO(sbi
, old_blkaddr
) != NULL_SEGNO
)
2683 update_sit_entry(sbi
, old_blkaddr
, -1);
2685 if (!__has_curseg_space(sbi
, type
))
2686 sit_i
->s_ops
->allocate_segment(sbi
, type
, false);
2689 * segment dirty status should be updated after segment allocation,
2690 * so we just need to update status only one time after previous
2691 * segment being closed.
2693 locate_dirty_segment(sbi
, GET_SEGNO(sbi
, old_blkaddr
));
2694 locate_dirty_segment(sbi
, GET_SEGNO(sbi
, *new_blkaddr
));
2696 up_write(&sit_i
->sentry_lock
);
2698 if (page
&& IS_NODESEG(type
)) {
2699 fill_node_footer_blkaddr(page
, NEXT_FREE_BLKADDR(sbi
, curseg
));
2701 f2fs_inode_chksum_set(sbi
, page
);
2705 struct f2fs_bio_info
*io
;
2707 INIT_LIST_HEAD(&fio
->list
);
2708 fio
->in_list
= true;
2709 io
= sbi
->write_io
[fio
->type
] + fio
->temp
;
2710 spin_lock(&io
->io_lock
);
2711 list_add_tail(&fio
->list
, &io
->io_list
);
2712 spin_unlock(&io
->io_lock
);
2715 mutex_unlock(&curseg
->curseg_mutex
);
2717 up_read(&SM_I(sbi
)->curseg_lock
);
2720 static void update_device_state(struct f2fs_io_info
*fio
)
2722 struct f2fs_sb_info
*sbi
= fio
->sbi
;
2723 unsigned int devidx
;
2728 devidx
= f2fs_target_device_index(sbi
, fio
->new_blkaddr
);
2730 /* update device state for fsync */
2731 set_dirty_device(sbi
, fio
->ino
, devidx
, FLUSH_INO
);
2733 /* update device state for checkpoint */
2734 if (!f2fs_test_bit(devidx
, (char *)&sbi
->dirty_device
)) {
2735 spin_lock(&sbi
->dev_lock
);
2736 f2fs_set_bit(devidx
, (char *)&sbi
->dirty_device
);
2737 spin_unlock(&sbi
->dev_lock
);
2741 static void do_write_page(struct f2fs_summary
*sum
, struct f2fs_io_info
*fio
)
2743 int type
= __get_segment_type(fio
);
2747 allocate_data_block(fio
->sbi
, fio
->page
, fio
->old_blkaddr
,
2748 &fio
->new_blkaddr
, sum
, type
, fio
, true);
2750 /* writeout dirty page into bdev */
2751 err
= f2fs_submit_page_write(fio
);
2752 if (err
== -EAGAIN
) {
2753 fio
->old_blkaddr
= fio
->new_blkaddr
;
2756 update_device_state(fio
);
2760 void write_meta_page(struct f2fs_sb_info
*sbi
, struct page
*page
,
2761 enum iostat_type io_type
)
2763 struct f2fs_io_info fio
= {
2768 .op_flags
= REQ_SYNC
| REQ_META
| REQ_PRIO
,
2769 .old_blkaddr
= page
->index
,
2770 .new_blkaddr
= page
->index
,
2772 .encrypted_page
= NULL
,
2776 if (unlikely(page
->index
>= MAIN_BLKADDR(sbi
)))
2777 fio
.op_flags
&= ~REQ_META
;
2779 set_page_writeback(page
);
2780 ClearPageError(page
);
2781 f2fs_submit_page_write(&fio
);
2783 f2fs_update_iostat(sbi
, io_type
, F2FS_BLKSIZE
);
2786 void write_node_page(unsigned int nid
, struct f2fs_io_info
*fio
)
2788 struct f2fs_summary sum
;
2790 set_summary(&sum
, nid
, 0, 0);
2791 do_write_page(&sum
, fio
);
2793 f2fs_update_iostat(fio
->sbi
, fio
->io_type
, F2FS_BLKSIZE
);
2796 void write_data_page(struct dnode_of_data
*dn
, struct f2fs_io_info
*fio
)
2798 struct f2fs_sb_info
*sbi
= fio
->sbi
;
2799 struct f2fs_summary sum
;
2800 struct node_info ni
;
2802 f2fs_bug_on(sbi
, dn
->data_blkaddr
== NULL_ADDR
);
2803 get_node_info(sbi
, dn
->nid
, &ni
);
2804 set_summary(&sum
, dn
->nid
, dn
->ofs_in_node
, ni
.version
);
2805 do_write_page(&sum
, fio
);
2806 f2fs_update_data_blkaddr(dn
, fio
->new_blkaddr
);
2808 f2fs_update_iostat(sbi
, fio
->io_type
, F2FS_BLKSIZE
);
2811 int rewrite_data_page(struct f2fs_io_info
*fio
)
2814 struct f2fs_sb_info
*sbi
= fio
->sbi
;
2816 fio
->new_blkaddr
= fio
->old_blkaddr
;
2817 /* i/o temperature is needed for passing down write hints */
2818 __get_segment_type(fio
);
2820 f2fs_bug_on(sbi
, !IS_DATASEG(get_seg_entry(sbi
,
2821 GET_SEGNO(sbi
, fio
->new_blkaddr
))->type
));
2823 stat_inc_inplace_blocks(fio
->sbi
);
2825 err
= f2fs_submit_page_bio(fio
);
2827 update_device_state(fio
);
2829 f2fs_update_iostat(fio
->sbi
, fio
->io_type
, F2FS_BLKSIZE
);
2834 static inline int __f2fs_get_curseg(struct f2fs_sb_info
*sbi
,
2839 for (i
= CURSEG_HOT_DATA
; i
< NO_CHECK_TYPE
; i
++) {
2840 if (CURSEG_I(sbi
, i
)->segno
== segno
)
2846 void __f2fs_replace_block(struct f2fs_sb_info
*sbi
, struct f2fs_summary
*sum
,
2847 block_t old_blkaddr
, block_t new_blkaddr
,
2848 bool recover_curseg
, bool recover_newaddr
)
2850 struct sit_info
*sit_i
= SIT_I(sbi
);
2851 struct curseg_info
*curseg
;
2852 unsigned int segno
, old_cursegno
;
2853 struct seg_entry
*se
;
2855 unsigned short old_blkoff
;
2857 segno
= GET_SEGNO(sbi
, new_blkaddr
);
2858 se
= get_seg_entry(sbi
, segno
);
2861 down_write(&SM_I(sbi
)->curseg_lock
);
2863 if (!recover_curseg
) {
2864 /* for recovery flow */
2865 if (se
->valid_blocks
== 0 && !IS_CURSEG(sbi
, segno
)) {
2866 if (old_blkaddr
== NULL_ADDR
)
2867 type
= CURSEG_COLD_DATA
;
2869 type
= CURSEG_WARM_DATA
;
2872 if (IS_CURSEG(sbi
, segno
)) {
2873 /* se->type is volatile as SSR allocation */
2874 type
= __f2fs_get_curseg(sbi
, segno
);
2875 f2fs_bug_on(sbi
, type
== NO_CHECK_TYPE
);
2877 type
= CURSEG_WARM_DATA
;
2881 f2fs_bug_on(sbi
, !IS_DATASEG(type
));
2882 curseg
= CURSEG_I(sbi
, type
);
2884 mutex_lock(&curseg
->curseg_mutex
);
2885 down_write(&sit_i
->sentry_lock
);
2887 old_cursegno
= curseg
->segno
;
2888 old_blkoff
= curseg
->next_blkoff
;
2890 /* change the current segment */
2891 if (segno
!= curseg
->segno
) {
2892 curseg
->next_segno
= segno
;
2893 change_curseg(sbi
, type
);
2896 curseg
->next_blkoff
= GET_BLKOFF_FROM_SEG0(sbi
, new_blkaddr
);
2897 __add_sum_entry(sbi
, type
, sum
);
2899 if (!recover_curseg
|| recover_newaddr
)
2900 update_sit_entry(sbi
, new_blkaddr
, 1);
2901 if (GET_SEGNO(sbi
, old_blkaddr
) != NULL_SEGNO
)
2902 update_sit_entry(sbi
, old_blkaddr
, -1);
2904 locate_dirty_segment(sbi
, GET_SEGNO(sbi
, old_blkaddr
));
2905 locate_dirty_segment(sbi
, GET_SEGNO(sbi
, new_blkaddr
));
2907 locate_dirty_segment(sbi
, old_cursegno
);
2909 if (recover_curseg
) {
2910 if (old_cursegno
!= curseg
->segno
) {
2911 curseg
->next_segno
= old_cursegno
;
2912 change_curseg(sbi
, type
);
2914 curseg
->next_blkoff
= old_blkoff
;
2917 up_write(&sit_i
->sentry_lock
);
2918 mutex_unlock(&curseg
->curseg_mutex
);
2919 up_write(&SM_I(sbi
)->curseg_lock
);
2922 void f2fs_replace_block(struct f2fs_sb_info
*sbi
, struct dnode_of_data
*dn
,
2923 block_t old_addr
, block_t new_addr
,
2924 unsigned char version
, bool recover_curseg
,
2925 bool recover_newaddr
)
2927 struct f2fs_summary sum
;
2929 set_summary(&sum
, dn
->nid
, dn
->ofs_in_node
, version
);
2931 __f2fs_replace_block(sbi
, &sum
, old_addr
, new_addr
,
2932 recover_curseg
, recover_newaddr
);
2934 f2fs_update_data_blkaddr(dn
, new_addr
);
2937 void f2fs_wait_on_page_writeback(struct page
*page
,
2938 enum page_type type
, bool ordered
)
2940 if (PageWriteback(page
)) {
2941 struct f2fs_sb_info
*sbi
= F2FS_P_SB(page
);
2943 f2fs_submit_merged_write_cond(sbi
, page
->mapping
->host
,
2944 0, page
->index
, type
);
2946 wait_on_page_writeback(page
);
2948 wait_for_stable_page(page
);
2952 void f2fs_wait_on_block_writeback(struct inode
*inode
, block_t blkaddr
)
2954 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2957 if (!f2fs_post_read_required(inode
))
2960 if (!is_valid_data_blkaddr(sbi
, blkaddr
))
2963 cpage
= find_lock_page(META_MAPPING(sbi
), blkaddr
);
2965 f2fs_wait_on_page_writeback(cpage
, DATA
, true);
2966 f2fs_put_page(cpage
, 1);
2970 static void read_compacted_summaries(struct f2fs_sb_info
*sbi
)
2972 struct f2fs_checkpoint
*ckpt
= F2FS_CKPT(sbi
);
2973 struct curseg_info
*seg_i
;
2974 unsigned char *kaddr
;
2979 start
= start_sum_block(sbi
);
2981 page
= get_meta_page(sbi
, start
++);
2982 kaddr
= (unsigned char *)page_address(page
);
2984 /* Step 1: restore nat cache */
2985 seg_i
= CURSEG_I(sbi
, CURSEG_HOT_DATA
);
2986 memcpy(seg_i
->journal
, kaddr
, SUM_JOURNAL_SIZE
);
2988 /* Step 2: restore sit cache */
2989 seg_i
= CURSEG_I(sbi
, CURSEG_COLD_DATA
);
2990 memcpy(seg_i
->journal
, kaddr
+ SUM_JOURNAL_SIZE
, SUM_JOURNAL_SIZE
);
2991 offset
= 2 * SUM_JOURNAL_SIZE
;
2993 /* Step 3: restore summary entries */
2994 for (i
= CURSEG_HOT_DATA
; i
<= CURSEG_COLD_DATA
; i
++) {
2995 unsigned short blk_off
;
2998 seg_i
= CURSEG_I(sbi
, i
);
2999 segno
= le32_to_cpu(ckpt
->cur_data_segno
[i
]);
3000 blk_off
= le16_to_cpu(ckpt
->cur_data_blkoff
[i
]);
3001 seg_i
->next_segno
= segno
;
3002 reset_curseg(sbi
, i
, 0);
3003 seg_i
->alloc_type
= ckpt
->alloc_type
[i
];
3004 seg_i
->next_blkoff
= blk_off
;
3006 if (seg_i
->alloc_type
== SSR
)
3007 blk_off
= sbi
->blocks_per_seg
;
3009 for (j
= 0; j
< blk_off
; j
++) {
3010 struct f2fs_summary
*s
;
3011 s
= (struct f2fs_summary
*)(kaddr
+ offset
);
3012 seg_i
->sum_blk
->entries
[j
] = *s
;
3013 offset
+= SUMMARY_SIZE
;
3014 if (offset
+ SUMMARY_SIZE
<= PAGE_SIZE
-
3018 f2fs_put_page(page
, 1);
3021 page
= get_meta_page(sbi
, start
++);
3022 kaddr
= (unsigned char *)page_address(page
);
3026 f2fs_put_page(page
, 1);
3029 static int read_normal_summaries(struct f2fs_sb_info
*sbi
, int type
)
3031 struct f2fs_checkpoint
*ckpt
= F2FS_CKPT(sbi
);
3032 struct f2fs_summary_block
*sum
;
3033 struct curseg_info
*curseg
;
3035 unsigned short blk_off
;
3036 unsigned int segno
= 0;
3037 block_t blk_addr
= 0;
3039 /* get segment number and block addr */
3040 if (IS_DATASEG(type
)) {
3041 segno
= le32_to_cpu(ckpt
->cur_data_segno
[type
]);
3042 blk_off
= le16_to_cpu(ckpt
->cur_data_blkoff
[type
-
3044 if (__exist_node_summaries(sbi
))
3045 blk_addr
= sum_blk_addr(sbi
, NR_CURSEG_TYPE
, type
);
3047 blk_addr
= sum_blk_addr(sbi
, NR_CURSEG_DATA_TYPE
, type
);
3049 segno
= le32_to_cpu(ckpt
->cur_node_segno
[type
-
3051 blk_off
= le16_to_cpu(ckpt
->cur_node_blkoff
[type
-
3053 if (__exist_node_summaries(sbi
))
3054 blk_addr
= sum_blk_addr(sbi
, NR_CURSEG_NODE_TYPE
,
3055 type
- CURSEG_HOT_NODE
);
3057 blk_addr
= GET_SUM_BLOCK(sbi
, segno
);
3060 new = get_meta_page(sbi
, blk_addr
);
3061 sum
= (struct f2fs_summary_block
*)page_address(new);
3063 if (IS_NODESEG(type
)) {
3064 if (__exist_node_summaries(sbi
)) {
3065 struct f2fs_summary
*ns
= &sum
->entries
[0];
3067 for (i
= 0; i
< sbi
->blocks_per_seg
; i
++, ns
++) {
3069 ns
->ofs_in_node
= 0;
3072 restore_node_summary(sbi
, segno
, sum
);
3076 /* set uncompleted segment to curseg */
3077 curseg
= CURSEG_I(sbi
, type
);
3078 mutex_lock(&curseg
->curseg_mutex
);
3080 /* update journal info */
3081 down_write(&curseg
->journal_rwsem
);
3082 memcpy(curseg
->journal
, &sum
->journal
, SUM_JOURNAL_SIZE
);
3083 up_write(&curseg
->journal_rwsem
);
3085 memcpy(curseg
->sum_blk
->entries
, sum
->entries
, SUM_ENTRY_SIZE
);
3086 memcpy(&curseg
->sum_blk
->footer
, &sum
->footer
, SUM_FOOTER_SIZE
);
3087 curseg
->next_segno
= segno
;
3088 reset_curseg(sbi
, type
, 0);
3089 curseg
->alloc_type
= ckpt
->alloc_type
[type
];
3090 curseg
->next_blkoff
= blk_off
;
3091 mutex_unlock(&curseg
->curseg_mutex
);
3092 f2fs_put_page(new, 1);
3096 static int restore_curseg_summaries(struct f2fs_sb_info
*sbi
)
3098 struct f2fs_journal
*sit_j
= CURSEG_I(sbi
, CURSEG_COLD_DATA
)->journal
;
3099 struct f2fs_journal
*nat_j
= CURSEG_I(sbi
, CURSEG_HOT_DATA
)->journal
;
3100 int type
= CURSEG_HOT_DATA
;
3103 if (is_set_ckpt_flags(sbi
, CP_COMPACT_SUM_FLAG
)) {
3104 int npages
= npages_for_summary_flush(sbi
, true);
3107 ra_meta_pages(sbi
, start_sum_block(sbi
), npages
,
3110 /* restore for compacted data summary */
3111 read_compacted_summaries(sbi
);
3112 type
= CURSEG_HOT_NODE
;
3115 if (__exist_node_summaries(sbi
))
3116 ra_meta_pages(sbi
, sum_blk_addr(sbi
, NR_CURSEG_TYPE
, type
),
3117 NR_CURSEG_TYPE
- type
, META_CP
, true);
3119 for (; type
<= CURSEG_COLD_NODE
; type
++) {
3120 err
= read_normal_summaries(sbi
, type
);
3125 /* sanity check for summary blocks */
3126 if (nats_in_cursum(nat_j
) > NAT_JOURNAL_ENTRIES
||
3127 sits_in_cursum(sit_j
) > SIT_JOURNAL_ENTRIES
)
3133 static void write_compacted_summaries(struct f2fs_sb_info
*sbi
, block_t blkaddr
)
3136 unsigned char *kaddr
;
3137 struct f2fs_summary
*summary
;
3138 struct curseg_info
*seg_i
;
3139 int written_size
= 0;
3142 page
= grab_meta_page(sbi
, blkaddr
++);
3143 kaddr
= (unsigned char *)page_address(page
);
3145 /* Step 1: write nat cache */
3146 seg_i
= CURSEG_I(sbi
, CURSEG_HOT_DATA
);
3147 memcpy(kaddr
, seg_i
->journal
, SUM_JOURNAL_SIZE
);
3148 written_size
+= SUM_JOURNAL_SIZE
;
3150 /* Step 2: write sit cache */
3151 seg_i
= CURSEG_I(sbi
, CURSEG_COLD_DATA
);
3152 memcpy(kaddr
+ written_size
, seg_i
->journal
, SUM_JOURNAL_SIZE
);
3153 written_size
+= SUM_JOURNAL_SIZE
;
3155 /* Step 3: write summary entries */
3156 for (i
= CURSEG_HOT_DATA
; i
<= CURSEG_COLD_DATA
; i
++) {
3157 unsigned short blkoff
;
3158 seg_i
= CURSEG_I(sbi
, i
);
3159 if (sbi
->ckpt
->alloc_type
[i
] == SSR
)
3160 blkoff
= sbi
->blocks_per_seg
;
3162 blkoff
= curseg_blkoff(sbi
, i
);
3164 for (j
= 0; j
< blkoff
; j
++) {
3166 page
= grab_meta_page(sbi
, blkaddr
++);
3167 kaddr
= (unsigned char *)page_address(page
);
3170 summary
= (struct f2fs_summary
*)(kaddr
+ written_size
);
3171 *summary
= seg_i
->sum_blk
->entries
[j
];
3172 written_size
+= SUMMARY_SIZE
;
3174 if (written_size
+ SUMMARY_SIZE
<= PAGE_SIZE
-
3178 set_page_dirty(page
);
3179 f2fs_put_page(page
, 1);
3184 set_page_dirty(page
);
3185 f2fs_put_page(page
, 1);
3189 static void write_normal_summaries(struct f2fs_sb_info
*sbi
,
3190 block_t blkaddr
, int type
)
3193 if (IS_DATASEG(type
))
3194 end
= type
+ NR_CURSEG_DATA_TYPE
;
3196 end
= type
+ NR_CURSEG_NODE_TYPE
;
3198 for (i
= type
; i
< end
; i
++)
3199 write_current_sum_page(sbi
, i
, blkaddr
+ (i
- type
));
3202 void write_data_summaries(struct f2fs_sb_info
*sbi
, block_t start_blk
)
3204 if (is_set_ckpt_flags(sbi
, CP_COMPACT_SUM_FLAG
))
3205 write_compacted_summaries(sbi
, start_blk
);
3207 write_normal_summaries(sbi
, start_blk
, CURSEG_HOT_DATA
);
3210 void write_node_summaries(struct f2fs_sb_info
*sbi
, block_t start_blk
)
3212 write_normal_summaries(sbi
, start_blk
, CURSEG_HOT_NODE
);
3215 int lookup_journal_in_cursum(struct f2fs_journal
*journal
, int type
,
3216 unsigned int val
, int alloc
)
3220 if (type
== NAT_JOURNAL
) {
3221 for (i
= 0; i
< nats_in_cursum(journal
); i
++) {
3222 if (le32_to_cpu(nid_in_journal(journal
, i
)) == val
)
3225 if (alloc
&& __has_cursum_space(journal
, 1, NAT_JOURNAL
))
3226 return update_nats_in_cursum(journal
, 1);
3227 } else if (type
== SIT_JOURNAL
) {
3228 for (i
= 0; i
< sits_in_cursum(journal
); i
++)
3229 if (le32_to_cpu(segno_in_journal(journal
, i
)) == val
)
3231 if (alloc
&& __has_cursum_space(journal
, 1, SIT_JOURNAL
))
3232 return update_sits_in_cursum(journal
, 1);
3237 static struct page
*get_current_sit_page(struct f2fs_sb_info
*sbi
,
3240 return get_meta_page(sbi
, current_sit_addr(sbi
, segno
));
3243 static struct page
*get_next_sit_page(struct f2fs_sb_info
*sbi
,
3246 struct sit_info
*sit_i
= SIT_I(sbi
);
3248 pgoff_t src_off
, dst_off
;
3250 src_off
= current_sit_addr(sbi
, start
);
3251 dst_off
= next_sit_addr(sbi
, src_off
);
3253 page
= grab_meta_page(sbi
, dst_off
);
3254 seg_info_to_sit_page(sbi
, page
, start
);
3256 set_page_dirty(page
);
3257 set_to_next_sit(sit_i
, start
);
3262 static struct sit_entry_set
*grab_sit_entry_set(void)
3264 struct sit_entry_set
*ses
=
3265 f2fs_kmem_cache_alloc(sit_entry_set_slab
, GFP_NOFS
);
3268 INIT_LIST_HEAD(&ses
->set_list
);
3272 static void release_sit_entry_set(struct sit_entry_set
*ses
)
3274 list_del(&ses
->set_list
);
3275 kmem_cache_free(sit_entry_set_slab
, ses
);
3278 static void adjust_sit_entry_set(struct sit_entry_set
*ses
,
3279 struct list_head
*head
)
3281 struct sit_entry_set
*next
= ses
;
3283 if (list_is_last(&ses
->set_list
, head
))
3286 list_for_each_entry_continue(next
, head
, set_list
)
3287 if (ses
->entry_cnt
<= next
->entry_cnt
)
3290 list_move_tail(&ses
->set_list
, &next
->set_list
);
3293 static void add_sit_entry(unsigned int segno
, struct list_head
*head
)
3295 struct sit_entry_set
*ses
;
3296 unsigned int start_segno
= START_SEGNO(segno
);
3298 list_for_each_entry(ses
, head
, set_list
) {
3299 if (ses
->start_segno
== start_segno
) {
3301 adjust_sit_entry_set(ses
, head
);
3306 ses
= grab_sit_entry_set();
3308 ses
->start_segno
= start_segno
;
3310 list_add(&ses
->set_list
, head
);
3313 static void add_sits_in_set(struct f2fs_sb_info
*sbi
)
3315 struct f2fs_sm_info
*sm_info
= SM_I(sbi
);
3316 struct list_head
*set_list
= &sm_info
->sit_entry_set
;
3317 unsigned long *bitmap
= SIT_I(sbi
)->dirty_sentries_bitmap
;
3320 for_each_set_bit(segno
, bitmap
, MAIN_SEGS(sbi
))
3321 add_sit_entry(segno
, set_list
);
3324 static void remove_sits_in_journal(struct f2fs_sb_info
*sbi
)
3326 struct curseg_info
*curseg
= CURSEG_I(sbi
, CURSEG_COLD_DATA
);
3327 struct f2fs_journal
*journal
= curseg
->journal
;
3330 down_write(&curseg
->journal_rwsem
);
3331 for (i
= 0; i
< sits_in_cursum(journal
); i
++) {
3335 segno
= le32_to_cpu(segno_in_journal(journal
, i
));
3336 dirtied
= __mark_sit_entry_dirty(sbi
, segno
);
3339 add_sit_entry(segno
, &SM_I(sbi
)->sit_entry_set
);
3341 update_sits_in_cursum(journal
, -i
);
3342 up_write(&curseg
->journal_rwsem
);
3346 * CP calls this function, which flushes SIT entries including sit_journal,
3347 * and moves prefree segs to free segs.
3349 void flush_sit_entries(struct f2fs_sb_info
*sbi
, struct cp_control
*cpc
)
3351 struct sit_info
*sit_i
= SIT_I(sbi
);
3352 unsigned long *bitmap
= sit_i
->dirty_sentries_bitmap
;
3353 struct curseg_info
*curseg
= CURSEG_I(sbi
, CURSEG_COLD_DATA
);
3354 struct f2fs_journal
*journal
= curseg
->journal
;
3355 struct sit_entry_set
*ses
, *tmp
;
3356 struct list_head
*head
= &SM_I(sbi
)->sit_entry_set
;
3357 bool to_journal
= true;
3358 struct seg_entry
*se
;
3360 down_write(&sit_i
->sentry_lock
);
3362 if (!sit_i
->dirty_sentries
)
3366 * add and account sit entries of dirty bitmap in sit entry
3369 add_sits_in_set(sbi
);
3372 * if there are no enough space in journal to store dirty sit
3373 * entries, remove all entries from journal and add and account
3374 * them in sit entry set.
3376 if (!__has_cursum_space(journal
, sit_i
->dirty_sentries
, SIT_JOURNAL
))
3377 remove_sits_in_journal(sbi
);
3380 * there are two steps to flush sit entries:
3381 * #1, flush sit entries to journal in current cold data summary block.
3382 * #2, flush sit entries to sit page.
3384 list_for_each_entry_safe(ses
, tmp
, head
, set_list
) {
3385 struct page
*page
= NULL
;
3386 struct f2fs_sit_block
*raw_sit
= NULL
;
3387 unsigned int start_segno
= ses
->start_segno
;
3388 unsigned int end
= min(start_segno
+ SIT_ENTRY_PER_BLOCK
,
3389 (unsigned long)MAIN_SEGS(sbi
));
3390 unsigned int segno
= start_segno
;
3393 !__has_cursum_space(journal
, ses
->entry_cnt
, SIT_JOURNAL
))
3397 down_write(&curseg
->journal_rwsem
);
3399 page
= get_next_sit_page(sbi
, start_segno
);
3400 raw_sit
= page_address(page
);
3403 /* flush dirty sit entries in region of current sit set */
3404 for_each_set_bit_from(segno
, bitmap
, end
) {
3405 int offset
, sit_offset
;
3407 se
= get_seg_entry(sbi
, segno
);
3409 /* add discard candidates */
3410 if (!(cpc
->reason
& CP_DISCARD
)) {
3411 cpc
->trim_start
= segno
;
3412 add_discard_addrs(sbi
, cpc
, false);
3416 offset
= lookup_journal_in_cursum(journal
,
3417 SIT_JOURNAL
, segno
, 1);
3418 f2fs_bug_on(sbi
, offset
< 0);
3419 segno_in_journal(journal
, offset
) =
3421 seg_info_to_raw_sit(se
,
3422 &sit_in_journal(journal
, offset
));
3424 sit_offset
= SIT_ENTRY_OFFSET(sit_i
, segno
);
3425 seg_info_to_raw_sit(se
,
3426 &raw_sit
->entries
[sit_offset
]);
3429 __clear_bit(segno
, bitmap
);
3430 sit_i
->dirty_sentries
--;
3435 up_write(&curseg
->journal_rwsem
);
3437 f2fs_put_page(page
, 1);
3439 f2fs_bug_on(sbi
, ses
->entry_cnt
);
3440 release_sit_entry_set(ses
);
3443 f2fs_bug_on(sbi
, !list_empty(head
));
3444 f2fs_bug_on(sbi
, sit_i
->dirty_sentries
);
3446 if (cpc
->reason
& CP_DISCARD
) {
3447 __u64 trim_start
= cpc
->trim_start
;
3449 for (; cpc
->trim_start
<= cpc
->trim_end
; cpc
->trim_start
++)
3450 add_discard_addrs(sbi
, cpc
, false);
3452 cpc
->trim_start
= trim_start
;
3454 up_write(&sit_i
->sentry_lock
);
3456 set_prefree_as_free_segments(sbi
);
3459 static int build_sit_info(struct f2fs_sb_info
*sbi
)
3461 struct f2fs_super_block
*raw_super
= F2FS_RAW_SUPER(sbi
);
3462 struct sit_info
*sit_i
;
3463 unsigned int sit_segs
, start
;
3465 unsigned int bitmap_size
;
3467 /* allocate memory for SIT information */
3468 sit_i
= f2fs_kzalloc(sbi
, sizeof(struct sit_info
), GFP_KERNEL
);
3472 SM_I(sbi
)->sit_info
= sit_i
;
3474 sit_i
->sentries
= f2fs_kvzalloc(sbi
, MAIN_SEGS(sbi
) *
3475 sizeof(struct seg_entry
), GFP_KERNEL
);
3476 if (!sit_i
->sentries
)
3479 bitmap_size
= f2fs_bitmap_size(MAIN_SEGS(sbi
));
3480 sit_i
->dirty_sentries_bitmap
= f2fs_kvzalloc(sbi
, bitmap_size
,
3482 if (!sit_i
->dirty_sentries_bitmap
)
3485 for (start
= 0; start
< MAIN_SEGS(sbi
); start
++) {
3486 sit_i
->sentries
[start
].cur_valid_map
3487 = f2fs_kzalloc(sbi
, SIT_VBLOCK_MAP_SIZE
, GFP_KERNEL
);
3488 sit_i
->sentries
[start
].ckpt_valid_map
3489 = f2fs_kzalloc(sbi
, SIT_VBLOCK_MAP_SIZE
, GFP_KERNEL
);
3490 if (!sit_i
->sentries
[start
].cur_valid_map
||
3491 !sit_i
->sentries
[start
].ckpt_valid_map
)
3494 #ifdef CONFIG_F2FS_CHECK_FS
3495 sit_i
->sentries
[start
].cur_valid_map_mir
3496 = f2fs_kzalloc(sbi
, SIT_VBLOCK_MAP_SIZE
, GFP_KERNEL
);
3497 if (!sit_i
->sentries
[start
].cur_valid_map_mir
)
3501 if (f2fs_discard_en(sbi
)) {
3502 sit_i
->sentries
[start
].discard_map
3503 = f2fs_kzalloc(sbi
, SIT_VBLOCK_MAP_SIZE
,
3505 if (!sit_i
->sentries
[start
].discard_map
)
3510 sit_i
->tmp_map
= f2fs_kzalloc(sbi
, SIT_VBLOCK_MAP_SIZE
, GFP_KERNEL
);
3511 if (!sit_i
->tmp_map
)
3514 if (sbi
->segs_per_sec
> 1) {
3515 sit_i
->sec_entries
= f2fs_kvzalloc(sbi
, MAIN_SECS(sbi
) *
3516 sizeof(struct sec_entry
), GFP_KERNEL
);
3517 if (!sit_i
->sec_entries
)
3521 /* get information related with SIT */
3522 sit_segs
= le32_to_cpu(raw_super
->segment_count_sit
) >> 1;
3524 /* setup SIT bitmap from ckeckpoint pack */
3525 bitmap_size
= __bitmap_size(sbi
, SIT_BITMAP
);
3526 src_bitmap
= __bitmap_ptr(sbi
, SIT_BITMAP
);
3528 sit_i
->sit_bitmap
= kmemdup(src_bitmap
, bitmap_size
, GFP_KERNEL
);
3529 if (!sit_i
->sit_bitmap
)
3532 #ifdef CONFIG_F2FS_CHECK_FS
3533 sit_i
->sit_bitmap_mir
= kmemdup(src_bitmap
, bitmap_size
, GFP_KERNEL
);
3534 if (!sit_i
->sit_bitmap_mir
)
3538 /* init SIT information */
3539 sit_i
->s_ops
= &default_salloc_ops
;
3541 sit_i
->sit_base_addr
= le32_to_cpu(raw_super
->sit_blkaddr
);
3542 sit_i
->sit_blocks
= sit_segs
<< sbi
->log_blocks_per_seg
;
3543 sit_i
->written_valid_blocks
= 0;
3544 sit_i
->bitmap_size
= bitmap_size
;
3545 sit_i
->dirty_sentries
= 0;
3546 sit_i
->sents_per_block
= SIT_ENTRY_PER_BLOCK
;
3547 sit_i
->elapsed_time
= le64_to_cpu(sbi
->ckpt
->elapsed_time
);
3548 sit_i
->mounted_time
= ktime_get_real_seconds();
3549 init_rwsem(&sit_i
->sentry_lock
);
3553 static int build_free_segmap(struct f2fs_sb_info
*sbi
)
3555 struct free_segmap_info
*free_i
;
3556 unsigned int bitmap_size
, sec_bitmap_size
;
3558 /* allocate memory for free segmap information */
3559 free_i
= f2fs_kzalloc(sbi
, sizeof(struct free_segmap_info
), GFP_KERNEL
);
3563 SM_I(sbi
)->free_info
= free_i
;
3565 bitmap_size
= f2fs_bitmap_size(MAIN_SEGS(sbi
));
3566 free_i
->free_segmap
= f2fs_kvmalloc(sbi
, bitmap_size
, GFP_KERNEL
);
3567 if (!free_i
->free_segmap
)
3570 sec_bitmap_size
= f2fs_bitmap_size(MAIN_SECS(sbi
));
3571 free_i
->free_secmap
= f2fs_kvmalloc(sbi
, sec_bitmap_size
, GFP_KERNEL
);
3572 if (!free_i
->free_secmap
)
3575 /* set all segments as dirty temporarily */
3576 memset(free_i
->free_segmap
, 0xff, bitmap_size
);
3577 memset(free_i
->free_secmap
, 0xff, sec_bitmap_size
);
3579 /* init free segmap information */
3580 free_i
->start_segno
= GET_SEGNO_FROM_SEG0(sbi
, MAIN_BLKADDR(sbi
));
3581 free_i
->free_segments
= 0;
3582 free_i
->free_sections
= 0;
3583 spin_lock_init(&free_i
->segmap_lock
);
3587 static int build_curseg(struct f2fs_sb_info
*sbi
)
3589 struct curseg_info
*array
;
3592 array
= f2fs_kzalloc(sbi
, sizeof(*array
) * NR_CURSEG_TYPE
, GFP_KERNEL
);
3596 SM_I(sbi
)->curseg_array
= array
;
3598 for (i
= 0; i
< NR_CURSEG_TYPE
; i
++) {
3599 mutex_init(&array
[i
].curseg_mutex
);
3600 array
[i
].sum_blk
= f2fs_kzalloc(sbi
, PAGE_SIZE
, GFP_KERNEL
);
3601 if (!array
[i
].sum_blk
)
3603 init_rwsem(&array
[i
].journal_rwsem
);
3604 array
[i
].journal
= f2fs_kzalloc(sbi
,
3605 sizeof(struct f2fs_journal
), GFP_KERNEL
);
3606 if (!array
[i
].journal
)
3608 array
[i
].segno
= NULL_SEGNO
;
3609 array
[i
].next_blkoff
= 0;
3611 return restore_curseg_summaries(sbi
);
3614 static int build_sit_entries(struct f2fs_sb_info
*sbi
)
3616 struct sit_info
*sit_i
= SIT_I(sbi
);
3617 struct curseg_info
*curseg
= CURSEG_I(sbi
, CURSEG_COLD_DATA
);
3618 struct f2fs_journal
*journal
= curseg
->journal
;
3619 struct seg_entry
*se
;
3620 struct f2fs_sit_entry sit
;
3621 int sit_blk_cnt
= SIT_BLK_CNT(sbi
);
3622 unsigned int i
, start
, end
;
3623 unsigned int readed
, start_blk
= 0;
3625 block_t total_node_blocks
= 0;
3628 readed
= ra_meta_pages(sbi
, start_blk
, BIO_MAX_PAGES
,
3631 start
= start_blk
* sit_i
->sents_per_block
;
3632 end
= (start_blk
+ readed
) * sit_i
->sents_per_block
;
3634 for (; start
< end
&& start
< MAIN_SEGS(sbi
); start
++) {
3635 struct f2fs_sit_block
*sit_blk
;
3638 se
= &sit_i
->sentries
[start
];
3639 page
= get_current_sit_page(sbi
, start
);
3640 sit_blk
= (struct f2fs_sit_block
*)page_address(page
);
3641 sit
= sit_blk
->entries
[SIT_ENTRY_OFFSET(sit_i
, start
)];
3642 f2fs_put_page(page
, 1);
3644 err
= check_block_count(sbi
, start
, &sit
);
3647 seg_info_from_raw_sit(se
, &sit
);
3648 if (IS_NODESEG(se
->type
))
3649 total_node_blocks
+= se
->valid_blocks
;
3651 /* build discard map only one time */
3652 if (f2fs_discard_en(sbi
)) {
3653 if (is_set_ckpt_flags(sbi
, CP_TRIMMED_FLAG
)) {
3654 memset(se
->discard_map
, 0xff,
3655 SIT_VBLOCK_MAP_SIZE
);
3657 memcpy(se
->discard_map
,
3659 SIT_VBLOCK_MAP_SIZE
);
3660 sbi
->discard_blks
+=
3661 sbi
->blocks_per_seg
-
3666 if (sbi
->segs_per_sec
> 1)
3667 get_sec_entry(sbi
, start
)->valid_blocks
+=
3670 start_blk
+= readed
;
3671 } while (start_blk
< sit_blk_cnt
);
3673 down_read(&curseg
->journal_rwsem
);
3674 for (i
= 0; i
< sits_in_cursum(journal
); i
++) {
3675 unsigned int old_valid_blocks
;
3677 start
= le32_to_cpu(segno_in_journal(journal
, i
));
3678 if (start
>= MAIN_SEGS(sbi
)) {
3679 f2fs_msg(sbi
->sb
, KERN_ERR
,
3680 "Wrong journal entry on segno %u",
3682 set_sbi_flag(sbi
, SBI_NEED_FSCK
);
3687 se
= &sit_i
->sentries
[start
];
3688 sit
= sit_in_journal(journal
, i
);
3690 old_valid_blocks
= se
->valid_blocks
;
3691 if (IS_NODESEG(se
->type
))
3692 total_node_blocks
-= old_valid_blocks
;
3694 err
= check_block_count(sbi
, start
, &sit
);
3697 seg_info_from_raw_sit(se
, &sit
);
3698 if (IS_NODESEG(se
->type
))
3699 total_node_blocks
+= se
->valid_blocks
;
3701 if (f2fs_discard_en(sbi
)) {
3702 if (is_set_ckpt_flags(sbi
, CP_TRIMMED_FLAG
)) {
3703 memset(se
->discard_map
, 0xff,
3704 SIT_VBLOCK_MAP_SIZE
);
3706 memcpy(se
->discard_map
, se
->cur_valid_map
,
3707 SIT_VBLOCK_MAP_SIZE
);
3708 sbi
->discard_blks
+= old_valid_blocks
-
3713 if (sbi
->segs_per_sec
> 1)
3714 get_sec_entry(sbi
, start
)->valid_blocks
+=
3715 se
->valid_blocks
- old_valid_blocks
;
3717 up_read(&curseg
->journal_rwsem
);
3719 if (!err
&& total_node_blocks
!= valid_node_count(sbi
)) {
3720 f2fs_msg(sbi
->sb
, KERN_ERR
,
3721 "SIT is corrupted node# %u vs %u",
3722 total_node_blocks
, valid_node_count(sbi
));
3723 set_sbi_flag(sbi
, SBI_NEED_FSCK
);
3730 static void init_free_segmap(struct f2fs_sb_info
*sbi
)
3735 for (start
= 0; start
< MAIN_SEGS(sbi
); start
++) {
3736 struct seg_entry
*sentry
= get_seg_entry(sbi
, start
);
3737 if (!sentry
->valid_blocks
)
3738 __set_free(sbi
, start
);
3740 SIT_I(sbi
)->written_valid_blocks
+=
3741 sentry
->valid_blocks
;
3744 /* set use the current segments */
3745 for (type
= CURSEG_HOT_DATA
; type
<= CURSEG_COLD_NODE
; type
++) {
3746 struct curseg_info
*curseg_t
= CURSEG_I(sbi
, type
);
3747 __set_test_and_inuse(sbi
, curseg_t
->segno
);
3751 static void init_dirty_segmap(struct f2fs_sb_info
*sbi
)
3753 struct dirty_seglist_info
*dirty_i
= DIRTY_I(sbi
);
3754 struct free_segmap_info
*free_i
= FREE_I(sbi
);
3755 unsigned int segno
= 0, offset
= 0;
3756 unsigned short valid_blocks
;
3759 /* find dirty segment based on free segmap */
3760 segno
= find_next_inuse(free_i
, MAIN_SEGS(sbi
), offset
);
3761 if (segno
>= MAIN_SEGS(sbi
))
3764 valid_blocks
= get_valid_blocks(sbi
, segno
, false);
3765 if (valid_blocks
== sbi
->blocks_per_seg
|| !valid_blocks
)
3767 if (valid_blocks
> sbi
->blocks_per_seg
) {
3768 f2fs_bug_on(sbi
, 1);
3771 mutex_lock(&dirty_i
->seglist_lock
);
3772 __locate_dirty_segment(sbi
, segno
, DIRTY
);
3773 mutex_unlock(&dirty_i
->seglist_lock
);
3777 static int init_victim_secmap(struct f2fs_sb_info
*sbi
)
3779 struct dirty_seglist_info
*dirty_i
= DIRTY_I(sbi
);
3780 unsigned int bitmap_size
= f2fs_bitmap_size(MAIN_SECS(sbi
));
3782 dirty_i
->victim_secmap
= f2fs_kvzalloc(sbi
, bitmap_size
, GFP_KERNEL
);
3783 if (!dirty_i
->victim_secmap
)
3788 static int build_dirty_segmap(struct f2fs_sb_info
*sbi
)
3790 struct dirty_seglist_info
*dirty_i
;
3791 unsigned int bitmap_size
, i
;
3793 /* allocate memory for dirty segments list information */
3794 dirty_i
= f2fs_kzalloc(sbi
, sizeof(struct dirty_seglist_info
),
3799 SM_I(sbi
)->dirty_info
= dirty_i
;
3800 mutex_init(&dirty_i
->seglist_lock
);
3802 bitmap_size
= f2fs_bitmap_size(MAIN_SEGS(sbi
));
3804 for (i
= 0; i
< NR_DIRTY_TYPE
; i
++) {
3805 dirty_i
->dirty_segmap
[i
] = f2fs_kvzalloc(sbi
, bitmap_size
,
3807 if (!dirty_i
->dirty_segmap
[i
])
3811 init_dirty_segmap(sbi
);
3812 return init_victim_secmap(sbi
);
3816 * Update min, max modified time for cost-benefit GC algorithm
3818 static void init_min_max_mtime(struct f2fs_sb_info
*sbi
)
3820 struct sit_info
*sit_i
= SIT_I(sbi
);
3823 down_write(&sit_i
->sentry_lock
);
3825 sit_i
->min_mtime
= LLONG_MAX
;
3827 for (segno
= 0; segno
< MAIN_SEGS(sbi
); segno
+= sbi
->segs_per_sec
) {
3829 unsigned long long mtime
= 0;
3831 for (i
= 0; i
< sbi
->segs_per_sec
; i
++)
3832 mtime
+= get_seg_entry(sbi
, segno
+ i
)->mtime
;
3834 mtime
= div_u64(mtime
, sbi
->segs_per_sec
);
3836 if (sit_i
->min_mtime
> mtime
)
3837 sit_i
->min_mtime
= mtime
;
3839 sit_i
->max_mtime
= get_mtime(sbi
);
3840 up_write(&sit_i
->sentry_lock
);
3843 int build_segment_manager(struct f2fs_sb_info
*sbi
)
3845 struct f2fs_super_block
*raw_super
= F2FS_RAW_SUPER(sbi
);
3846 struct f2fs_checkpoint
*ckpt
= F2FS_CKPT(sbi
);
3847 struct f2fs_sm_info
*sm_info
;
3850 sm_info
= f2fs_kzalloc(sbi
, sizeof(struct f2fs_sm_info
), GFP_KERNEL
);
3855 sbi
->sm_info
= sm_info
;
3856 sm_info
->seg0_blkaddr
= le32_to_cpu(raw_super
->segment0_blkaddr
);
3857 sm_info
->main_blkaddr
= le32_to_cpu(raw_super
->main_blkaddr
);
3858 sm_info
->segment_count
= le32_to_cpu(raw_super
->segment_count
);
3859 sm_info
->reserved_segments
= le32_to_cpu(ckpt
->rsvd_segment_count
);
3860 sm_info
->ovp_segments
= le32_to_cpu(ckpt
->overprov_segment_count
);
3861 sm_info
->main_segments
= le32_to_cpu(raw_super
->segment_count_main
);
3862 sm_info
->ssa_blkaddr
= le32_to_cpu(raw_super
->ssa_blkaddr
);
3863 sm_info
->rec_prefree_segments
= sm_info
->main_segments
*
3864 DEF_RECLAIM_PREFREE_SEGMENTS
/ 100;
3865 if (sm_info
->rec_prefree_segments
> DEF_MAX_RECLAIM_PREFREE_SEGMENTS
)
3866 sm_info
->rec_prefree_segments
= DEF_MAX_RECLAIM_PREFREE_SEGMENTS
;
3868 if (!test_opt(sbi
, LFS
))
3869 sm_info
->ipu_policy
= 1 << F2FS_IPU_FSYNC
;
3870 sm_info
->min_ipu_util
= DEF_MIN_IPU_UTIL
;
3871 sm_info
->min_fsync_blocks
= DEF_MIN_FSYNC_BLOCKS
;
3872 sm_info
->min_hot_blocks
= DEF_MIN_HOT_BLOCKS
;
3873 sm_info
->min_ssr_sections
= reserved_sections(sbi
);
3875 INIT_LIST_HEAD(&sm_info
->sit_entry_set
);
3877 init_rwsem(&sm_info
->curseg_lock
);
3879 if (!f2fs_readonly(sbi
->sb
)) {
3880 err
= create_flush_cmd_control(sbi
);
3885 err
= create_discard_cmd_control(sbi
);
3889 err
= build_sit_info(sbi
);
3892 err
= build_free_segmap(sbi
);
3895 err
= build_curseg(sbi
);
3899 /* reinit free segmap based on SIT */
3900 err
= build_sit_entries(sbi
);
3904 init_free_segmap(sbi
);
3905 err
= build_dirty_segmap(sbi
);
3909 init_min_max_mtime(sbi
);
3913 static void discard_dirty_segmap(struct f2fs_sb_info
*sbi
,
3914 enum dirty_type dirty_type
)
3916 struct dirty_seglist_info
*dirty_i
= DIRTY_I(sbi
);
3918 mutex_lock(&dirty_i
->seglist_lock
);
3919 kvfree(dirty_i
->dirty_segmap
[dirty_type
]);
3920 dirty_i
->nr_dirty
[dirty_type
] = 0;
3921 mutex_unlock(&dirty_i
->seglist_lock
);
3924 static void destroy_victim_secmap(struct f2fs_sb_info
*sbi
)
3926 struct dirty_seglist_info
*dirty_i
= DIRTY_I(sbi
);
3927 kvfree(dirty_i
->victim_secmap
);
3930 static void destroy_dirty_segmap(struct f2fs_sb_info
*sbi
)
3932 struct dirty_seglist_info
*dirty_i
= DIRTY_I(sbi
);
3938 /* discard pre-free/dirty segments list */
3939 for (i
= 0; i
< NR_DIRTY_TYPE
; i
++)
3940 discard_dirty_segmap(sbi
, i
);
3942 destroy_victim_secmap(sbi
);
3943 SM_I(sbi
)->dirty_info
= NULL
;
3947 static void destroy_curseg(struct f2fs_sb_info
*sbi
)
3949 struct curseg_info
*array
= SM_I(sbi
)->curseg_array
;
3954 SM_I(sbi
)->curseg_array
= NULL
;
3955 for (i
= 0; i
< NR_CURSEG_TYPE
; i
++) {
3956 kfree(array
[i
].sum_blk
);
3957 kfree(array
[i
].journal
);
3962 static void destroy_free_segmap(struct f2fs_sb_info
*sbi
)
3964 struct free_segmap_info
*free_i
= SM_I(sbi
)->free_info
;
3967 SM_I(sbi
)->free_info
= NULL
;
3968 kvfree(free_i
->free_segmap
);
3969 kvfree(free_i
->free_secmap
);
3973 static void destroy_sit_info(struct f2fs_sb_info
*sbi
)
3975 struct sit_info
*sit_i
= SIT_I(sbi
);
3981 if (sit_i
->sentries
) {
3982 for (start
= 0; start
< MAIN_SEGS(sbi
); start
++) {
3983 kfree(sit_i
->sentries
[start
].cur_valid_map
);
3984 #ifdef CONFIG_F2FS_CHECK_FS
3985 kfree(sit_i
->sentries
[start
].cur_valid_map_mir
);
3987 kfree(sit_i
->sentries
[start
].ckpt_valid_map
);
3988 kfree(sit_i
->sentries
[start
].discard_map
);
3991 kfree(sit_i
->tmp_map
);
3993 kvfree(sit_i
->sentries
);
3994 kvfree(sit_i
->sec_entries
);
3995 kvfree(sit_i
->dirty_sentries_bitmap
);
3997 SM_I(sbi
)->sit_info
= NULL
;
3998 kfree(sit_i
->sit_bitmap
);
3999 #ifdef CONFIG_F2FS_CHECK_FS
4000 kfree(sit_i
->sit_bitmap_mir
);
4005 void destroy_segment_manager(struct f2fs_sb_info
*sbi
)
4007 struct f2fs_sm_info
*sm_info
= SM_I(sbi
);
4011 destroy_flush_cmd_control(sbi
, true);
4012 destroy_discard_cmd_control(sbi
);
4013 destroy_dirty_segmap(sbi
);
4014 destroy_curseg(sbi
);
4015 destroy_free_segmap(sbi
);
4016 destroy_sit_info(sbi
);
4017 sbi
->sm_info
= NULL
;
4021 int __init
create_segment_manager_caches(void)
4023 discard_entry_slab
= f2fs_kmem_cache_create("discard_entry",
4024 sizeof(struct discard_entry
));
4025 if (!discard_entry_slab
)
4028 discard_cmd_slab
= f2fs_kmem_cache_create("discard_cmd",
4029 sizeof(struct discard_cmd
));
4030 if (!discard_cmd_slab
)
4031 goto destroy_discard_entry
;
4033 sit_entry_set_slab
= f2fs_kmem_cache_create("sit_entry_set",
4034 sizeof(struct sit_entry_set
));
4035 if (!sit_entry_set_slab
)
4036 goto destroy_discard_cmd
;
4038 inmem_entry_slab
= f2fs_kmem_cache_create("inmem_page_entry",
4039 sizeof(struct inmem_pages
));
4040 if (!inmem_entry_slab
)
4041 goto destroy_sit_entry_set
;
4044 destroy_sit_entry_set
:
4045 kmem_cache_destroy(sit_entry_set_slab
);
4046 destroy_discard_cmd
:
4047 kmem_cache_destroy(discard_cmd_slab
);
4048 destroy_discard_entry
:
4049 kmem_cache_destroy(discard_entry_slab
);
4054 void destroy_segment_manager_caches(void)
4056 kmem_cache_destroy(sit_entry_set_slab
);
4057 kmem_cache_destroy(discard_cmd_slab
);
4058 kmem_cache_destroy(discard_entry_slab
);
4059 kmem_cache_destroy(inmem_entry_slab
);