4 * Copyright (C) 2002, Linus Torvalds.
6 * Contains functions related to preparing and submitting BIOs which contain
7 * multiple pagecache pages.
9 * 15May2002 Andrew Morton
11 * 27Jun2002 axboe@suse.de
12 * use bio_add_page() to build bio's just the right size
16 * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version 2
21 * of the License, or (at your option) any later version.
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, see <http://www.gnu.org/licenses/>.
32 /************************************************************************/
34 /* PROJECT : exFAT & FAT12/16/32 File System */
36 /* PURPOSE : sdFAT glue layer for supporting VFS */
38 /*----------------------------------------------------------------------*/
42 /************************************************************************/
44 #include <linux/version.h>
45 #include <linux/module.h>
46 #include <linux/time.h>
47 #include <linux/buffer_head.h>
48 #include <linux/exportfs.h>
49 #include <linux/mount.h>
50 #include <linux/vfs.h>
51 #include <linux/parser.h>
52 #include <linux/uio.h>
53 #include <linux/writeback.h>
54 #include <linux/log2.h>
55 #include <linux/hash.h>
56 #include <linux/backing-dev.h>
57 #include <linux/sched.h>
58 #include <linux/fs_struct.h>
59 #include <linux/namei.h>
60 #include <linux/bio.h>
61 #include <linux/blkdev.h>
62 #include <linux/swap.h> /* for mark_page_accessed() */
63 #include <asm/current.h>
64 #include <asm/unaligned.h>
65 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
66 #include <linux/aio.h>
71 #ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
73 /*************************************************************************
74 * INNER FUNCTIONS FOR FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
75 *************************************************************************/
76 static void __mpage_write_end_io(struct bio
*bio
, int err
);
78 /*************************************************************************
79 * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
80 *************************************************************************/
81 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
82 static inline void __sdfat_submit_bio_write2(int flags
, struct bio
*bio
)
84 bio_set_op_attrs(bio
, REQ_OP_WRITE
, flags
);
87 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) */
88 static inline void __sdfat_submit_bio_write2(int flags
, struct bio
*bio
)
90 submit_bio(WRITE
| flags
, bio
);
94 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
95 static void mpage_write_end_io(struct bio
*bio
)
97 __mpage_write_end_io(bio
, bio
->bi_error
);
99 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0) */
100 static void mpage_write_end_io(struct bio
*bio
, int err
)
102 if (test_bit(BIO_UPTODATE
, &bio
->bi_flags
))
104 __mpage_write_end_io(bio
, err
);
108 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
109 static inline int bio_get_nr_vecs(struct block_device
*bdev
)
111 return BIO_MAX_PAGES
;
113 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) */
117 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
118 static inline sector_t
__sdfat_bio_sector(struct bio
*bio
)
120 return bio
->bi_iter
.bi_sector
;
123 static inline void __sdfat_set_bio_sector(struct bio
*bio
, sector_t sector
)
125 bio
->bi_iter
.bi_sector
= sector
;
128 static inline unsigned int __sdfat_bio_size(struct bio
*bio
)
130 return bio
->bi_iter
.bi_size
;
133 static inline void __sdfat_set_bio_size(struct bio
*bio
, unsigned int size
)
135 bio
->bi_iter
.bi_size
= size
;
137 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) */
138 static inline sector_t
__sdfat_bio_sector(struct bio
*bio
)
140 return bio
->bi_sector
;
143 static inline void __sdfat_set_bio_sector(struct bio
*bio
, sector_t sector
)
145 bio
->bi_sector
= sector
;
148 static inline unsigned int __sdfat_bio_size(struct bio
*bio
)
153 static inline void __sdfat_set_bio_size(struct bio
*bio
, unsigned int size
)
159 /* __check_dfr_on() and __dfr_writepage_end_io() functions
160 * are copied from sdfat.c
161 * Each function should be same perfectly
163 static inline int __check_dfr_on(struct inode
*inode
, loff_t start
, loff_t end
, const char *fname
)
165 #ifdef CONFIG_SDFAT_DFR
166 struct defrag_info
*ino_dfr
= &(SDFAT_I(inode
)->dfr_info
);
168 if ((atomic_read(&ino_dfr
->stat
) == DFR_INO_STAT_REQ
) &&
169 fsapi_dfr_check_dfr_on(inode
, start
, end
, 0, fname
))
175 static inline int __dfr_writepage_end_io(struct page
*page
)
177 #ifdef CONFIG_SDFAT_DFR
178 struct defrag_info
*ino_dfr
= &(SDFAT_I(page
->mapping
->host
)->dfr_info
);
180 if (atomic_read(&ino_dfr
->stat
) == DFR_INO_STAT_REQ
)
181 fsapi_dfr_writepage_endio(page
);
187 static inline unsigned int __calc_size_to_align(struct super_block
*sb
)
189 struct block_device
*bdev
= sb
->s_bdev
;
190 struct gendisk
*disk
;
191 struct request_queue
*queue
;
192 struct queue_limits
*limit
;
193 unsigned int max_sectors
;
194 unsigned int aligned
= 0;
196 disk
= bdev
->bd_disk
;
204 limit
= &queue
->limits
;
205 max_sectors
= limit
->max_sectors
;
206 aligned
= 1 << ilog2(max_sectors
);
208 if (aligned
&& (max_sectors
& (aligned
- 1)))
216 sector_t last_block_in_bio
;
217 get_block_t
*get_block
;
218 unsigned int use_writepage
;
219 unsigned int size_to_align
;
223 * I/O completion handler for multipage BIOs.
225 * The mpage code never puts partial pages into a BIO (except for end-of-file).
226 * If a page does not map to a contiguous run of blocks then it simply falls
227 * back to block_read_full_page().
229 * Why is this? If a page's completion depends on a number of different BIOs
230 * which can complete in any order (or at the same time) then determining the
231 * status of that page is hard. See end_buffer_async_read() for the details.
232 * There is no point in duplicating all that complexity.
234 static void __mpage_write_end_io(struct bio
*bio
, int err
)
236 struct bio_vec
*bvec
= bio
->bi_io_vec
+ bio
->bi_vcnt
- 1;
238 ASSERT(bio_data_dir(bio
) == WRITE
); /* only write */
241 struct page
*page
= bvec
->bv_page
;
243 if (--bvec
>= bio
->bi_io_vec
)
244 prefetchw(&bvec
->bv_page
->flags
);
248 mapping_set_error(page
->mapping
, err
);
251 __dfr_writepage_end_io(page
);
253 end_page_writeback(page
);
254 } while (bvec
>= bio
->bi_io_vec
);
258 static struct bio
*mpage_bio_submit_write(int flags
, struct bio
*bio
)
260 bio
->bi_end_io
= mpage_write_end_io
;
261 __sdfat_submit_bio_write2(flags
, bio
);
266 mpage_alloc(struct block_device
*bdev
,
267 sector_t first_sector
, int nr_vecs
,
272 bio
= bio_alloc(gfp_flags
, nr_vecs
);
274 if (bio
== NULL
&& (current
->flags
& PF_MEMALLOC
)) {
275 while (!bio
&& (nr_vecs
/= 2))
276 bio
= bio_alloc(gfp_flags
, nr_vecs
);
281 __sdfat_set_bio_sector(bio
, first_sector
);
286 static int sdfat_mpage_writepage(struct page
*page
,
287 struct writeback_control
*wbc
, void *data
)
289 struct mpage_data
*mpd
= data
;
290 struct bio
*bio
= mpd
->bio
;
291 struct address_space
*mapping
= page
->mapping
;
292 struct inode
*inode
= page
->mapping
->host
;
293 const unsigned int blkbits
= inode
->i_blkbits
;
294 const unsigned int blocks_per_page
= PAGE_SIZE
>> blkbits
;
296 sector_t block_in_file
;
297 sector_t blocks
[MAX_BUF_PER_PAGE
];
298 unsigned int page_block
;
299 unsigned int first_unmapped
= blocks_per_page
;
300 struct block_device
*bdev
= NULL
;
302 sector_t boundary_block
= 0;
303 struct block_device
*boundary_bdev
= NULL
;
305 struct buffer_head map_bh
;
306 loff_t i_size
= i_size_read(inode
);
307 unsigned long end_index
= i_size
>> PAGE_SHIFT
;
310 if (page_has_buffers(page
)) {
311 struct buffer_head
*head
= page_buffers(page
);
312 struct buffer_head
*bh
= head
;
314 /* If they're all mapped and dirty, do it */
317 BUG_ON(buffer_locked(bh
));
318 if (!buffer_mapped(bh
)) {
320 * unmapped dirty buffers are created by
321 * __set_page_dirty_buffers -> mmapped data
323 if (buffer_dirty(bh
))
325 if (first_unmapped
== blocks_per_page
)
326 first_unmapped
= page_block
;
330 if (first_unmapped
!= blocks_per_page
)
331 goto confused
; /* hole -> non-hole */
333 if (!buffer_dirty(bh
) || !buffer_uptodate(bh
))
336 /* bh should be mapped if delay is set */
337 if (buffer_delay(bh
)) {
338 sector_t blk_in_file
=
339 (sector_t
)(page
->index
<< (PAGE_SHIFT
- blkbits
)) + page_block
;
341 BUG_ON(bh
->b_size
!= (1 << blkbits
));
342 if (page
->index
> end_index
) {
344 "over end with delayed buffer"
345 "(page_idx:%u, end_idx:%u)\n",
352 ret
= mpd
->get_block(inode
, blk_in_file
, bh
, 1);
355 "failed to getblk(ret:%d)\n",
356 __func__
, inode
, ret
);
360 BUG_ON(buffer_delay(bh
));
362 if (buffer_new(bh
)) {
363 clear_buffer_new(bh
);
364 unmap_underlying_metadata(bh
->b_bdev
, bh
->b_blocknr
);
369 if (bh
->b_blocknr
!= blocks
[page_block
-1] + 1) {
370 MMSG("%s(inode:%p) pblk(%d) "
371 "no_seq(prev:%lld, new:%lld)\n",
372 __func__
, inode
, page_block
,
373 (u64
)blocks
[page_block
-1],
378 blocks
[page_block
++] = bh
->b_blocknr
;
379 boundary
= buffer_boundary(bh
);
381 boundary_block
= bh
->b_blocknr
;
382 boundary_bdev
= bh
->b_bdev
;
385 } while ((bh
= bh
->b_this_page
) != head
);
391 * Page has buffers, but they are all unmapped. The page was
392 * created by pagein or read over a hole which was handled by
393 * block_read_full_page(). If this address_space is also
394 * using mpage_readpages then this can rarely happen.
400 * The page has no buffers: map it to disk
402 BUG_ON(!PageUptodate(page
));
403 block_in_file
= (sector_t
)page
->index
<< (PAGE_SHIFT
- blkbits
);
404 last_block
= (i_size
- 1) >> blkbits
;
405 map_bh
.b_page
= page
;
406 for (page_block
= 0; page_block
< blocks_per_page
; ) {
409 map_bh
.b_size
= 1 << blkbits
;
410 if (mpd
->get_block(inode
, block_in_file
, &map_bh
, 1))
413 if (buffer_new(&map_bh
))
414 unmap_underlying_metadata(map_bh
.b_bdev
,
416 if (buffer_boundary(&map_bh
)) {
417 boundary_block
= map_bh
.b_blocknr
;
418 boundary_bdev
= map_bh
.b_bdev
;
422 if (map_bh
.b_blocknr
!= blocks
[page_block
-1] + 1)
425 blocks
[page_block
++] = map_bh
.b_blocknr
;
426 boundary
= buffer_boundary(&map_bh
);
427 bdev
= map_bh
.b_bdev
;
428 if (block_in_file
== last_block
)
432 BUG_ON(page_block
== 0);
434 first_unmapped
= page_block
;
437 if (page
->index
>= end_index
) {
439 * The page straddles i_size. It must be zeroed out on each
440 * and every writepage invocation because it may be mmapped.
441 * "A file is mapped in multiples of the page size. For a file
442 * that is not a multiple of the page size, the remaining memory
443 * is zeroed when mapped, and writes to that region are not
444 * written out to the file."
446 unsigned int offset
= i_size
& (PAGE_SIZE
- 1);
448 if (page
->index
> end_index
|| !offset
) {
449 MMSG("%s(inode:%p) over end "
450 "(page_idx:%u, end_idx:%u off:%u)\n",
451 __func__
, inode
, (u32
)page
->index
,
452 (u32
)end_index
, (u32
)offset
);
455 zero_user_segment(page
, offset
, PAGE_SIZE
);
459 * This page will go to BIO. Do we need to send this BIO off first?
461 * REMARK : added ELSE_IF for ALIGNMENT_MPAGE_WRITE of SDFAT
464 if (mpd
->last_block_in_bio
!= blocks
[0] - 1) {
465 bio
= mpage_bio_submit_write(0, bio
);
466 } else if (mpd
->size_to_align
) {
467 unsigned int mask
= mpd
->size_to_align
- 1;
468 sector_t max_end_block
=
469 (__sdfat_bio_sector(bio
) & ~(mask
)) + mask
;
471 if ((__sdfat_bio_size(bio
) != (1 << (mask
+ 1))) &&
472 (mpd
->last_block_in_bio
== max_end_block
)) {
473 MMSG("%s(inode:%p) alignment mpage_bio_submit"
474 "(start:%u, len:%u aligned:%u)\n",
476 (unsigned int)__sdfat_bio_sector(bio
),
477 (unsigned int)(mpd
->last_block_in_bio
-
478 __sdfat_bio_sector(bio
) + 1),
479 (unsigned int)mpd
->size_to_align
);
480 bio
= mpage_bio_submit_write(REQ_NOMERGE
, bio
);
487 bio
= mpage_alloc(bdev
, blocks
[0] << (blkbits
- 9),
488 bio_get_nr_vecs(bdev
), GFP_NOFS
|__GFP_HIGH
);
494 * Must try to add the page before marking the buffer clean or
495 * the confused fail path above (OOM) will be very confused when
496 * it finds all bh marked clean (i.e. it will not write anything)
498 length
= first_unmapped
<< blkbits
;
499 if (bio_add_page(bio
, page
, length
, 0) < length
) {
500 bio
= mpage_bio_submit_write(0, bio
);
505 * OK, we have our BIO, so we can now mark the buffers clean. Make
506 * sure to only clean buffers which we know we'll be writing.
508 if (page_has_buffers(page
)) {
509 struct buffer_head
*head
= page_buffers(page
);
510 struct buffer_head
*bh
= head
;
511 unsigned int buffer_counter
= 0;
514 if (buffer_counter
++ == first_unmapped
)
516 clear_buffer_dirty(bh
);
517 bh
= bh
->b_this_page
;
518 } while (bh
!= head
);
521 * we cannot drop the bh if the page is not uptodate
522 * or a concurrent readpage would fail to serialize with the bh
523 * and it would read from disk before we reach the platter.
525 if (buffer_heads_over_limit
&& PageUptodate(page
))
526 try_to_free_buffers(page
);
529 BUG_ON(PageWriteback(page
));
530 set_page_writeback(page
);
533 * FIXME FOR DEFRAGMENTATION : CODE REVIEW IS REQUIRED
535 * Turn off MAPPED flag in victim's bh if defrag on.
536 * Another write_begin can starts after get_block for defrag victims
538 * In this case, write_begin calls get_block and get original block
539 * number and previous defrag will be canceled.
541 if (unlikely(__check_dfr_on(inode
, (loff_t
)(page
->index
<< PAGE_SHIFT
),
542 (loff_t
)((page
->index
+ 1) << PAGE_SHIFT
), __func__
))) {
543 struct buffer_head
*head
= page_buffers(page
);
544 struct buffer_head
*bh
= head
;
547 clear_buffer_mapped(bh
);
548 bh
= bh
->b_this_page
;
549 } while (bh
!= head
);
553 if (boundary
|| (first_unmapped
!= blocks_per_page
)) {
554 bio
= mpage_bio_submit_write(0, bio
);
555 if (boundary_block
) {
556 write_boundary_block(boundary_bdev
,
557 boundary_block
, 1 << blkbits
);
560 mpd
->last_block_in_bio
= blocks
[blocks_per_page
- 1];
567 bio
= mpage_bio_submit_write(0, bio
);
569 if (mpd
->use_writepage
) {
570 ret
= mapping
->a_ops
->writepage(page
, wbc
);
576 * The caller has a ref on the inode, so *mapping is stable
578 mapping_set_error(mapping
, ret
);
584 int sdfat_mpage_writepages(struct address_space
*mapping
,
585 struct writeback_control
*wbc
, get_block_t
*get_block
)
587 struct blk_plug plug
;
589 struct mpage_data mpd
= {
591 .last_block_in_bio
= 0,
592 .get_block
= get_block
,
594 .size_to_align
= __calc_size_to_align(mapping
->host
->i_sb
),
598 blk_start_plug(&plug
);
599 ret
= write_cache_pages(mapping
, wbc
, sdfat_mpage_writepage
, &mpd
);
601 mpage_bio_submit_write(0, mpd
.bio
);
602 blk_finish_plug(&plug
);
606 #endif /* CONFIG_SDFAT_ALIGNED_MPAGE_WRITE */