fs/sdfat/mpage.c

   1 /*
   2  * fs/mpage.c
   3  *
   4  * Copyright (C) 2002, Linus Torvalds.
   5  *
   6  * Contains functions related to preparing and submitting BIOs which contain
   7  * multiple pagecache pages.
   8  *
   9  * 15May2002    Andrew Morton
  10  *              Initial version
  11  * 27Jun2002    axboe@suse.de
  12  *              use bio_add_page() to build bio's just the right size
  13  */
  14
  15 /*
  16  *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
  17  *
  18  *  This program is free software; you can redistribute it and/or
  19  *  modify it under the terms of the GNU General Public License
  20  *  as published by the Free Software Foundation; either version 2
  21  *  of the License, or (at your option) any later version.
  22  *
  23  *  This program is distributed in the hope that it will be useful,
  24  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  25  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  26  *  GNU General Public License for more details.
  27  *
  28  *  You should have received a copy of the GNU General Public License
  29  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  30  */
  31
  32 /************************************************************************/
  33 /*                                                                      */
  34 /*  PROJECT : exFAT & FAT12/16/32 File System                           */
  35 /*  FILE    : core.c                                                    */
  36 /*  PURPOSE : sdFAT glue layer for supporting VFS                       */
  37 /*                                                                      */
  38 /*----------------------------------------------------------------------*/
  39 /*  NOTES                                                               */
  40 /*                                                                      */
  41 /*                                                                      */
  42 /************************************************************************/
  43
  44 #include <linux/version.h>
  45 #include <linux/module.h>
  46 #include <linux/time.h>
  47 #include <linux/buffer_head.h>
  48 #include <linux/exportfs.h>
  49 #include <linux/mount.h>
  50 #include <linux/vfs.h>
  51 #include <linux/parser.h>
  52 #include <linux/uio.h>
  53 #include <linux/writeback.h>
  54 #include <linux/log2.h>
  55 #include <linux/hash.h>
  56 #include <linux/backing-dev.h>
  57 #include <linux/sched.h>
  58 #include <linux/fs_struct.h>
  59 #include <linux/namei.h>
  60 #include <linux/bio.h>
  61 #include <linux/blkdev.h>
  62 #include <linux/swap.h> /* for mark_page_accessed() */
  63 #include <asm/current.h>
  64 #include <asm/unaligned.h>
  65 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
  66 #include <linux/aio.h>
  67 #endif
  68
  69 #include "sdfat.h"
  70
  71 #ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
  72
  73 /*************************************************************************
  74  * INNER FUNCTIONS FOR FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
  75  *************************************************************************/
  76 static void __mpage_write_end_io(struct bio *bio, int err);
  77
  78 /*************************************************************************
  79  * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
  80  *************************************************************************/
  81 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
  82 static inline void __sdfat_submit_bio_write2(int flags, struct bio *bio)
  83 {
  84         bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
  85         submit_bio(bio);
  86 }
  87 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) */
  88 static inline void __sdfat_submit_bio_write2(int flags, struct bio *bio)
  89 {
  90         submit_bio(WRITE | flags, bio);
  91 }
  92 #endif
  93
  94 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
  95 static void  mpage_write_end_io(struct bio *bio)
  96 {
  97         __mpage_write_end_io(bio, bio->bi_error);
  98 }
  99 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0) */
 100 static void mpage_write_end_io(struct bio *bio, int err)
 101 {
 102         if (test_bit(BIO_UPTODATE, &bio->bi_flags))
 103                 err = 0;
 104         __mpage_write_end_io(bio, err);
 105 }
 106 #endif
 107
 108 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
 109 static inline int bio_get_nr_vecs(struct block_device *bdev)
 110 {
 111         return BIO_MAX_PAGES;
 112 }
 113 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) */
 114         /* EMPTY */
 115 #endif
 116
 117 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
 118 static inline sector_t __sdfat_bio_sector(struct bio *bio)
 119 {
 120         return bio->bi_iter.bi_sector;
 121 }
 122
 123 static inline void __sdfat_set_bio_sector(struct bio *bio, sector_t sector)
 124 {
 125         bio->bi_iter.bi_sector = sector;
 126 }
 127
 128 static inline unsigned int __sdfat_bio_size(struct bio *bio)
 129 {
 130         return bio->bi_iter.bi_size;
 131 }
 132
 133 static inline void __sdfat_set_bio_size(struct bio *bio, unsigned int size)
 134 {
 135         bio->bi_iter.bi_size = size;
 136 }
 137 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) */
 138 static inline sector_t __sdfat_bio_sector(struct bio *bio)
 139 {
 140         return bio->bi_sector;
 141 }
 142
 143 static inline void __sdfat_set_bio_sector(struct bio *bio, sector_t sector)
 144 {
 145         bio->bi_sector = sector;
 146 }
 147
 148 static inline unsigned int __sdfat_bio_size(struct bio *bio)
 149 {
 150         return bio->bi_size;
 151 }
 152
 153 static inline void __sdfat_set_bio_size(struct bio *bio, unsigned int size)
 154 {
 155         bio->bi_size = size;
 156 }
 157 #endif
 158
 159 /* __check_dfr_on() and __dfr_writepage_end_io() functions
 160  * are copied from sdfat.c
 161  * Each function should be same perfectly
 162  */
 163 static inline int __check_dfr_on(struct inode *inode, loff_t start, loff_t end, const char *fname)
 164 {
 165 #ifdef  CONFIG_SDFAT_DFR
 166         struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info);
 167
 168         if ((atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) &&
 169                         fsapi_dfr_check_dfr_on(inode, start, end, 0, fname))
 170                 return 1;
 171 #endif
 172         return 0;
 173 }
 174
 175 static inline int __dfr_writepage_end_io(struct page *page)
 176 {
 177 #ifdef  CONFIG_SDFAT_DFR
 178         struct defrag_info *ino_dfr = &(SDFAT_I(page->mapping->host)->dfr_info);
 179
 180         if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ)
 181                 fsapi_dfr_writepage_endio(page);
 182 #endif
 183         return 0;
 184 }
 185
 186
 187 static inline unsigned int __calc_size_to_align(struct super_block *sb)
 188 {
 189         struct block_device *bdev = sb->s_bdev;
 190         struct gendisk *disk;
 191         struct request_queue *queue;
 192         struct queue_limits *limit;
 193         unsigned int max_sectors;
 194         unsigned int aligned = 0;
 195
 196         disk = bdev->bd_disk;
 197         if (!disk)
 198                 goto out;
 199
 200         queue = disk->queue;
 201         if (!queue)
 202                 goto out;
 203
 204         limit = &queue->limits;
 205         max_sectors = limit->max_sectors;
 206         aligned = 1 << ilog2(max_sectors);
 207
 208         if (aligned && (max_sectors & (aligned - 1)))
 209                 aligned = 0;
 210 out:
 211         return aligned;
 212 }
 213
 214 struct mpage_data {
 215         struct bio *bio;
 216         sector_t last_block_in_bio;
 217         get_block_t *get_block;
 218         unsigned int use_writepage;
 219         unsigned int size_to_align;
 220 };
 221
 222 /*
 223  * I/O completion handler for multipage BIOs.
 224  *
 225  * The mpage code never puts partial pages into a BIO (except for end-of-file).
 226  * If a page does not map to a contiguous run of blocks then it simply falls
 227  * back to block_read_full_page().
 228  *
 229  * Why is this?  If a page's completion depends on a number of different BIOs
 230  * which can complete in any order (or at the same time) then determining the
 231  * status of that page is hard.  See end_buffer_async_read() for the details.
 232  * There is no point in duplicating all that complexity.
 233  */
 234 static void __mpage_write_end_io(struct bio *bio, int err)
 235 {
 236         struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 237
 238         ASSERT(bio_data_dir(bio) == WRITE); /* only write */
 239
 240         do {
 241                 struct page *page = bvec->bv_page;
 242
 243                 if (--bvec >= bio->bi_io_vec)
 244                         prefetchw(&bvec->bv_page->flags);
 245                 if (err) {
 246                         SetPageError(page);
 247                         if (page->mapping)
 248                                 mapping_set_error(page->mapping, err);
 249                 }
 250
 251                 __dfr_writepage_end_io(page);
 252
 253                 end_page_writeback(page);
 254         } while (bvec >= bio->bi_io_vec);
 255         bio_put(bio);
 256 }
 257
 258 static struct bio *mpage_bio_submit_write(int flags, struct bio *bio)
 259 {
 260         bio->bi_end_io = mpage_write_end_io;
 261         __sdfat_submit_bio_write2(flags, bio);
 262         return NULL;
 263 }
 264
 265 static struct bio *
 266 mpage_alloc(struct block_device *bdev,
 267                 sector_t first_sector, int nr_vecs,
 268                 gfp_t gfp_flags)
 269 {
 270         struct bio *bio;
 271
 272         bio = bio_alloc(gfp_flags, nr_vecs);
 273
 274         if (bio == NULL && (current->flags & PF_MEMALLOC)) {
 275                 while (!bio && (nr_vecs /= 2))
 276                         bio = bio_alloc(gfp_flags, nr_vecs);
 277         }
 278
 279         if (bio) {
 280                 bio->bi_bdev = bdev;
 281                 __sdfat_set_bio_sector(bio, first_sector);
 282         }
 283         return bio;
 284 }
 285
 286 static int sdfat_mpage_writepage(struct page *page,
 287                 struct writeback_control *wbc, void *data)
 288 {
 289         struct mpage_data *mpd = data;
 290         struct bio *bio = mpd->bio;
 291         struct address_space *mapping = page->mapping;
 292         struct inode *inode = page->mapping->host;
 293         const unsigned int blkbits = inode->i_blkbits;
 294         const unsigned int blocks_per_page = PAGE_SIZE >> blkbits;
 295         sector_t last_block;
 296         sector_t block_in_file;
 297         sector_t blocks[MAX_BUF_PER_PAGE];
 298         unsigned int page_block;
 299         unsigned int first_unmapped = blocks_per_page;
 300         struct block_device *bdev = NULL;
 301         int boundary = 0;
 302         sector_t boundary_block = 0;
 303         struct block_device *boundary_bdev = NULL;
 304         int length;
 305         struct buffer_head map_bh;
 306         loff_t i_size = i_size_read(inode);
 307         unsigned long end_index = i_size >> PAGE_SHIFT;
 308         int ret = 0;
 309
 310         if (page_has_buffers(page)) {
 311                 struct buffer_head *head = page_buffers(page);
 312                 struct buffer_head *bh = head;
 313
 314                 /* If they're all mapped and dirty, do it */
 315                 page_block = 0;
 316                 do {
 317                         BUG_ON(buffer_locked(bh));
 318                         if (!buffer_mapped(bh)) {
 319                                 /*
 320                                  * unmapped dirty buffers are created by
 321                                  * __set_page_dirty_buffers -> mmapped data
 322                                  */
 323                                 if (buffer_dirty(bh))
 324                                         goto confused;
 325                                 if (first_unmapped == blocks_per_page)
 326                                         first_unmapped = page_block;
 327                                 continue;
 328                         }
 329
 330                         if (first_unmapped != blocks_per_page)
 331                                 goto confused;  /* hole -> non-hole */
 332
 333                         if (!buffer_dirty(bh) || !buffer_uptodate(bh))
 334                                 goto confused;
 335
 336                         /* bh should be mapped if delay is set */
 337                         if (buffer_delay(bh)) {
 338                                 sector_t blk_in_file =
 339                                         (sector_t)(page->index << (PAGE_SHIFT - blkbits)) + page_block;
 340
 341                                 BUG_ON(bh->b_size != (1 << blkbits));
 342                                 if (page->index > end_index) {
 343                                         MMSG("%s(inode:%p) "
 344                                                 "over end with delayed buffer"
 345                                                 "(page_idx:%u, end_idx:%u)\n",
 346                                                 __func__, inode,
 347                                                 (u32)page->index,
 348                                                 (u32)end_index);
 349                                         goto confused;
 350                                 }
 351
 352                                 ret = mpd->get_block(inode, blk_in_file, bh, 1);
 353                                 if (ret) {
 354                                         MMSG("%s(inode:%p) "
 355                                                 "failed to getblk(ret:%d)\n",
 356                                                 __func__, inode, ret);
 357                                         goto confused;
 358                                 }
 359
 360                                 BUG_ON(buffer_delay(bh));
 361
 362                                 if (buffer_new(bh)) {
 363                                         clear_buffer_new(bh);
 364                                         unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
 365                                 }
 366                         }
 367
 368                         if (page_block) {
 369                                 if (bh->b_blocknr != blocks[page_block-1] + 1) {
 370                                         MMSG("%s(inode:%p) pblk(%d) "
 371                                                 "no_seq(prev:%lld, new:%lld)\n",
 372                                                 __func__, inode, page_block,
 373                                                 (u64)blocks[page_block-1],
 374                                                 (u64)bh->b_blocknr);
 375                                         goto confused;
 376                                 }
 377                         }
 378                         blocks[page_block++] = bh->b_blocknr;
 379                         boundary = buffer_boundary(bh);
 380                         if (boundary) {
 381                                 boundary_block = bh->b_blocknr;
 382                                 boundary_bdev = bh->b_bdev;
 383                         }
 384                         bdev = bh->b_bdev;
 385                 } while ((bh = bh->b_this_page) != head);
 386
 387                 if (first_unmapped)
 388                         goto page_is_mapped;
 389
 390                 /*
 391                  * Page has buffers, but they are all unmapped. The page was
 392                  * created by pagein or read over a hole which was handled by
 393                  * block_read_full_page().  If this address_space is also
 394                  * using mpage_readpages then this can rarely happen.
 395                  */
 396                 goto confused;
 397         }
 398
 399         /*
 400          * The page has no buffers: map it to disk
 401          */
 402         BUG_ON(!PageUptodate(page));
 403         block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
 404         last_block = (i_size - 1) >> blkbits;
 405         map_bh.b_page = page;
 406         for (page_block = 0; page_block < blocks_per_page; ) {
 407
 408                 map_bh.b_state = 0;
 409                 map_bh.b_size = 1 << blkbits;
 410                 if (mpd->get_block(inode, block_in_file, &map_bh, 1))
 411                         goto confused;
 412
 413                 if (buffer_new(&map_bh))
 414                         unmap_underlying_metadata(map_bh.b_bdev,
 415                                         map_bh.b_blocknr);
 416                 if (buffer_boundary(&map_bh)) {
 417                         boundary_block = map_bh.b_blocknr;
 418                         boundary_bdev = map_bh.b_bdev;
 419                 }
 420
 421                 if (page_block) {
 422                         if (map_bh.b_blocknr != blocks[page_block-1] + 1)
 423                                 goto confused;
 424                 }
 425                 blocks[page_block++] = map_bh.b_blocknr;
 426                 boundary = buffer_boundary(&map_bh);
 427                 bdev = map_bh.b_bdev;
 428                 if (block_in_file == last_block)
 429                         break;
 430                 block_in_file++;
 431         }
 432         BUG_ON(page_block == 0);
 433
 434         first_unmapped = page_block;
 435
 436 page_is_mapped:
 437         if (page->index >= end_index) {
 438                 /*
 439                  * The page straddles i_size.  It must be zeroed out on each
 440                  * and every writepage invocation because it may be mmapped.
 441                  * "A file is mapped in multiples of the page size.  For a file
 442                  * that is not a multiple of the page size, the remaining memory
 443                  * is zeroed when mapped, and writes to that region are not
 444                  * written out to the file."
 445                  */
 446                 unsigned int offset = i_size & (PAGE_SIZE - 1);
 447
 448                 if (page->index > end_index || !offset) {
 449                         MMSG("%s(inode:%p) over end "
 450                                 "(page_idx:%u, end_idx:%u off:%u)\n",
 451                                 __func__, inode, (u32)page->index,
 452                                 (u32)end_index, (u32)offset);
 453                         goto confused;
 454                 }
 455                 zero_user_segment(page, offset, PAGE_SIZE);
 456         }
 457
 458         /*
 459          * This page will go to BIO.  Do we need to send this BIO off first?
 460          *
 461          * REMARK : added ELSE_IF for ALIGNMENT_MPAGE_WRITE of SDFAT
 462          */
 463         if (bio) {
 464                 if (mpd->last_block_in_bio != blocks[0] - 1) {
 465                         bio = mpage_bio_submit_write(0, bio);
 466                 } else if (mpd->size_to_align) {
 467                         unsigned int mask = mpd->size_to_align - 1;
 468                         sector_t max_end_block =
 469                                 (__sdfat_bio_sector(bio) & ~(mask)) + mask;
 470
 471                         if ((__sdfat_bio_size(bio) != (1 << (mask + 1))) &&
 472                                 (mpd->last_block_in_bio == max_end_block)) {
 473                                 MMSG("%s(inode:%p) alignment mpage_bio_submit"
 474                                         "(start:%u, len:%u aligned:%u)\n",
 475                                         __func__, inode,
 476                                         (unsigned int)__sdfat_bio_sector(bio),
 477                                         (unsigned int)(mpd->last_block_in_bio -
 478                                                 __sdfat_bio_sector(bio) + 1),
 479                                         (unsigned int)mpd->size_to_align);
 480                                 bio = mpage_bio_submit_write(REQ_NOMERGE, bio);
 481                         }
 482                 }
 483         }
 484
 485 alloc_new:
 486         if (!bio) {
 487                 bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
 488                                 bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
 489                 if (!bio)
 490                         goto confused;
 491         }
 492
 493         /*
 494          * Must try to add the page before marking the buffer clean or
 495          * the confused fail path above (OOM) will be very confused when
 496          * it finds all bh marked clean (i.e. it will not write anything)
 497          */
 498         length = first_unmapped << blkbits;
 499         if (bio_add_page(bio, page, length, 0) < length) {
 500                 bio = mpage_bio_submit_write(0, bio);
 501                 goto alloc_new;
 502         }
 503
 504         /*
 505          * OK, we have our BIO, so we can now mark the buffers clean.  Make
 506          * sure to only clean buffers which we know we'll be writing.
 507          */
 508         if (page_has_buffers(page)) {
 509                 struct buffer_head *head = page_buffers(page);
 510                 struct buffer_head *bh = head;
 511                 unsigned int buffer_counter = 0;
 512
 513                 do {
 514                         if (buffer_counter++ == first_unmapped)
 515                                 break;
 516                         clear_buffer_dirty(bh);
 517                         bh = bh->b_this_page;
 518                 } while (bh != head);
 519
 520                 /*
 521                  * we cannot drop the bh if the page is not uptodate
 522                  * or a concurrent readpage would fail to serialize with the bh
 523                  * and it would read from disk before we reach the platter.
 524                  */
 525                 if (buffer_heads_over_limit && PageUptodate(page))
 526                         try_to_free_buffers(page);
 527         }
 528
 529         BUG_ON(PageWriteback(page));
 530         set_page_writeback(page);
 531
 532         /*
 533          * FIXME FOR DEFRAGMENTATION : CODE REVIEW IS REQUIRED
 534          *
 535          * Turn off MAPPED flag in victim's bh if defrag on.
 536          * Another write_begin can starts after get_block for defrag victims
 537          * called.
 538          * In this case, write_begin calls get_block and get original block
 539          * number and previous defrag will be canceled.
 540          */
 541         if (unlikely(__check_dfr_on(inode, (loff_t)(page->index << PAGE_SHIFT),
 542                         (loff_t)((page->index + 1) << PAGE_SHIFT), __func__))) {
 543                 struct buffer_head *head = page_buffers(page);
 544                 struct buffer_head *bh = head;
 545
 546                 do {
 547                         clear_buffer_mapped(bh);
 548                         bh = bh->b_this_page;
 549                 } while (bh != head);
 550         }
 551
 552         unlock_page(page);
 553         if (boundary || (first_unmapped != blocks_per_page)) {
 554                 bio = mpage_bio_submit_write(0, bio);
 555                 if (boundary_block) {
 556                         write_boundary_block(boundary_bdev,
 557                                         boundary_block, 1 << blkbits);
 558                 }
 559         } else {
 560                 mpd->last_block_in_bio = blocks[blocks_per_page - 1];
 561         }
 562
 563         goto out;
 564
 565 confused:
 566         if (bio)
 567                 bio = mpage_bio_submit_write(0, bio);
 568
 569         if (mpd->use_writepage) {
 570                 ret = mapping->a_ops->writepage(page, wbc);
 571         } else {
 572                 ret = -EAGAIN;
 573                 goto out;
 574         }
 575         /*
 576          * The caller has a ref on the inode, so *mapping is stable
 577          */
 578         mapping_set_error(mapping, ret);
 579 out:
 580         mpd->bio = bio;
 581         return ret;
 582 }
 583
 584 int sdfat_mpage_writepages(struct address_space *mapping,
 585                         struct writeback_control *wbc, get_block_t *get_block)
 586 {
 587         struct blk_plug plug;
 588         int ret;
 589         struct mpage_data mpd = {
 590                 .bio = NULL,
 591                 .last_block_in_bio = 0,
 592                 .get_block = get_block,
 593                 .use_writepage = 1,
 594                 .size_to_align = __calc_size_to_align(mapping->host->i_sb),
 595         };
 596
 597         BUG_ON(!get_block);
 598         blk_start_plug(&plug);
 599         ret = write_cache_pages(mapping, wbc, sdfat_mpage_writepage, &mpd);
 600         if (mpd.bio)
 601                 mpage_bio_submit_write(0, mpd.bio);
 602         blk_finish_plug(&plug);
 603         return ret;
 604 }
 605
 606 #endif /* CONFIG_SDFAT_ALIGNED_MPAGE_WRITE */
 607