f2fs: fix to recover xattr node block
[GitHub/exynos8895/android_kernel_samsung_universal8895.git] / fs / f2fs / data.c
CommitLineData
0a8165d7 1/*
eb47b800
JK
2 * fs/f2fs/data.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h>
14#include <linux/mpage.h>
a27bb332 15#include <linux/aio.h>
eb47b800
JK
16#include <linux/writeback.h>
17#include <linux/backing-dev.h>
18#include <linux/blkdev.h>
19#include <linux/bio.h>
690e4a3e 20#include <linux/prefetch.h>
eb47b800
JK
21
22#include "f2fs.h"
23#include "node.h"
24#include "segment.h"
848753aa 25#include <trace/events/f2fs.h>
eb47b800 26
93dfe2ac
JK
27static void f2fs_read_end_io(struct bio *bio, int err)
28{
f568849e
LT
29 struct bio_vec *bvec;
30 int i;
93dfe2ac 31
f568849e 32 bio_for_each_segment_all(bvec, bio, i) {
93dfe2ac
JK
33 struct page *page = bvec->bv_page;
34
f568849e
LT
35 if (!err) {
36 SetPageUptodate(page);
37 } else {
93dfe2ac
JK
38 ClearPageUptodate(page);
39 SetPageError(page);
40 }
41 unlock_page(page);
f568849e 42 }
93dfe2ac
JK
43 bio_put(bio);
44}
45
46static void f2fs_write_end_io(struct bio *bio, int err)
47{
f568849e
LT
48 struct f2fs_sb_info *sbi = F2FS_SB(bio->bi_io_vec->bv_page->mapping->host->i_sb);
49 struct bio_vec *bvec;
50 int i;
93dfe2ac 51
f568849e 52 bio_for_each_segment_all(bvec, bio, i) {
93dfe2ac
JK
53 struct page *page = bvec->bv_page;
54
f568849e 55 if (unlikely(err)) {
93dfe2ac
JK
56 SetPageError(page);
57 set_bit(AS_EIO, &page->mapping->flags);
744602cf 58 f2fs_stop_checkpoint(sbi);
93dfe2ac
JK
59 }
60 end_page_writeback(page);
61 dec_page_count(sbi, F2FS_WRITEBACK);
f568849e 62 }
93dfe2ac
JK
63
64 if (bio->bi_private)
65 complete(bio->bi_private);
66
67 if (!get_pages(sbi, F2FS_WRITEBACK) &&
68 !list_empty(&sbi->cp_wait.task_list))
69 wake_up(&sbi->cp_wait);
70
71 bio_put(bio);
72}
73
940a6d34
GZ
74/*
75 * Low-level block read/write IO operations.
76 */
77static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
78 int npages, bool is_read)
79{
80 struct bio *bio;
81
82 /* No failure on bio allocation */
83 bio = bio_alloc(GFP_NOIO, npages);
84
85 bio->bi_bdev = sbi->sb->s_bdev;
f568849e 86 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
940a6d34
GZ
87 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
88
89 return bio;
90}
91
458e6197 92static void __submit_merged_bio(struct f2fs_bio_info *io)
93dfe2ac 93{
458e6197
JK
94 struct f2fs_io_info *fio = &io->fio;
95 int rw;
93dfe2ac
JK
96
97 if (!io->bio)
98 return;
99
7e8f2308 100 rw = fio->rw;
93dfe2ac
JK
101
102 if (is_read_io(rw)) {
940a6d34
GZ
103 trace_f2fs_submit_read_bio(io->sbi->sb, rw,
104 fio->type, io->bio);
76130cca 105 submit_bio(rw, io->bio);
93dfe2ac 106 } else {
940a6d34
GZ
107 trace_f2fs_submit_write_bio(io->sbi->sb, rw,
108 fio->type, io->bio);
109 /*
110 * META_FLUSH is only from the checkpoint procedure, and we
111 * should wait this metadata bio for FS consistency.
112 */
113 if (fio->type == META_FLUSH) {
114 DECLARE_COMPLETION_ONSTACK(wait);
115 io->bio->bi_private = &wait;
116 submit_bio(rw, io->bio);
117 wait_for_completion(&wait);
118 } else {
119 submit_bio(rw, io->bio);
120 }
93dfe2ac 121 }
940a6d34 122
93dfe2ac
JK
123 io->bio = NULL;
124}
125
126void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
458e6197 127 enum page_type type, int rw)
93dfe2ac
JK
128{
129 enum page_type btype = PAGE_TYPE_OF_BIO(type);
130 struct f2fs_bio_info *io;
131
132 io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
133
134 mutex_lock(&io->io_mutex);
458e6197
JK
135
136 /* change META to META_FLUSH in the checkpoint procedure */
137 if (type >= META_FLUSH) {
138 io->fio.type = META_FLUSH;
c434cbc0 139 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
458e6197
JK
140 }
141 __submit_merged_bio(io);
93dfe2ac
JK
142 mutex_unlock(&io->io_mutex);
143}
144
145/*
146 * Fill the locked page with data located in the block address.
147 * Return unlocked page.
148 */
149int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
150 block_t blk_addr, int rw)
151{
93dfe2ac
JK
152 struct bio *bio;
153
154 trace_f2fs_submit_page_bio(page, blk_addr, rw);
155
156 /* Allocate a new bio */
940a6d34 157 bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
93dfe2ac
JK
158
159 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
160 bio_put(bio);
161 f2fs_put_page(page, 1);
162 return -EFAULT;
163 }
164
165 submit_bio(rw, bio);
166 return 0;
167}
168
169void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
458e6197 170 block_t blk_addr, struct f2fs_io_info *fio)
93dfe2ac 171{
458e6197 172 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
93dfe2ac 173 struct f2fs_bio_info *io;
940a6d34 174 bool is_read = is_read_io(fio->rw);
93dfe2ac 175
940a6d34 176 io = is_read ? &sbi->read_io : &sbi->write_io[btype];
93dfe2ac
JK
177
178 verify_block_addr(sbi, blk_addr);
179
180 mutex_lock(&io->io_mutex);
181
940a6d34 182 if (!is_read)
93dfe2ac
JK
183 inc_page_count(sbi, F2FS_WRITEBACK);
184
63a0b7cb 185 if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
458e6197
JK
186 io->fio.rw != fio->rw))
187 __submit_merged_bio(io);
93dfe2ac
JK
188alloc_new:
189 if (io->bio == NULL) {
940a6d34
GZ
190 int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
191
192 io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
458e6197 193 io->fio = *fio;
93dfe2ac
JK
194 }
195
196 if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
197 PAGE_CACHE_SIZE) {
458e6197 198 __submit_merged_bio(io);
93dfe2ac
JK
199 goto alloc_new;
200 }
201
202 io->last_block_in_bio = blk_addr;
203
204 mutex_unlock(&io->io_mutex);
458e6197 205 trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
93dfe2ac
JK
206}
207
0a8165d7 208/*
eb47b800
JK
209 * Lock ordering for the change of data block address:
210 * ->data_page
211 * ->node_page
212 * update block addresses in the node page
213 */
214static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
215{
216 struct f2fs_node *rn;
217 __le32 *addr_array;
218 struct page *node_page = dn->node_page;
219 unsigned int ofs_in_node = dn->ofs_in_node;
220
5514f0aa 221 f2fs_wait_on_page_writeback(node_page, NODE);
eb47b800 222
45590710 223 rn = F2FS_NODE(node_page);
eb47b800
JK
224
225 /* Get physical address of data block */
226 addr_array = blkaddr_in_node(rn);
227 addr_array[ofs_in_node] = cpu_to_le32(new_addr);
228 set_page_dirty(node_page);
229}
230
231int reserve_new_block(struct dnode_of_data *dn)
232{
233 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
234
6bacf52f 235 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
eb47b800 236 return -EPERM;
cfb271d4 237 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
eb47b800
JK
238 return -ENOSPC;
239
c01e2853
NJ
240 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
241
eb47b800
JK
242 __set_data_blkaddr(dn, NEW_ADDR);
243 dn->data_blkaddr = NEW_ADDR;
a18ff063 244 mark_inode_dirty(dn->inode);
eb47b800
JK
245 sync_inode_page(dn);
246 return 0;
247}
248
b600965c
HL
249int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
250{
251 bool need_put = dn->inode_page ? false : true;
252 int err;
253
a8865372
JK
254 /* if inode_page exists, index should be zero */
255 f2fs_bug_on(!need_put && index);
256
b600965c
HL
257 err = get_dnode_of_data(dn, index, ALLOC_NODE);
258 if (err)
259 return err;
a8865372 260
b600965c
HL
261 if (dn->data_blkaddr == NULL_ADDR)
262 err = reserve_new_block(dn);
a8865372 263 if (err || need_put)
b600965c
HL
264 f2fs_put_dnode(dn);
265 return err;
266}
267
eb47b800
JK
268static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
269 struct buffer_head *bh_result)
270{
271 struct f2fs_inode_info *fi = F2FS_I(inode);
eb47b800
JK
272 pgoff_t start_fofs, end_fofs;
273 block_t start_blkaddr;
274
c11abd1a
JK
275 if (is_inode_flag_set(fi, FI_NO_EXTENT))
276 return 0;
277
eb47b800
JK
278 read_lock(&fi->ext.ext_lock);
279 if (fi->ext.len == 0) {
280 read_unlock(&fi->ext.ext_lock);
281 return 0;
282 }
283
dcdfff65
JK
284 stat_inc_total_hit(inode->i_sb);
285
eb47b800
JK
286 start_fofs = fi->ext.fofs;
287 end_fofs = fi->ext.fofs + fi->ext.len - 1;
288 start_blkaddr = fi->ext.blk_addr;
289
290 if (pgofs >= start_fofs && pgofs <= end_fofs) {
291 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
292 size_t count;
293
294 clear_buffer_new(bh_result);
295 map_bh(bh_result, inode->i_sb,
296 start_blkaddr + pgofs - start_fofs);
297 count = end_fofs - pgofs + 1;
298 if (count < (UINT_MAX >> blkbits))
299 bh_result->b_size = (count << blkbits);
300 else
301 bh_result->b_size = UINT_MAX;
302
dcdfff65 303 stat_inc_read_hit(inode->i_sb);
eb47b800
JK
304 read_unlock(&fi->ext.ext_lock);
305 return 1;
306 }
307 read_unlock(&fi->ext.ext_lock);
308 return 0;
309}
310
311void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
312{
313 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
314 pgoff_t fofs, start_fofs, end_fofs;
315 block_t start_blkaddr, end_blkaddr;
c11abd1a 316 int need_update = true;
eb47b800 317
5d56b671 318 f2fs_bug_on(blk_addr == NEW_ADDR);
de93653f
JK
319 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
320 dn->ofs_in_node;
eb47b800
JK
321
322 /* Update the page address in the parent node */
323 __set_data_blkaddr(dn, blk_addr);
324
c11abd1a
JK
325 if (is_inode_flag_set(fi, FI_NO_EXTENT))
326 return;
327
eb47b800
JK
328 write_lock(&fi->ext.ext_lock);
329
330 start_fofs = fi->ext.fofs;
331 end_fofs = fi->ext.fofs + fi->ext.len - 1;
332 start_blkaddr = fi->ext.blk_addr;
333 end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;
334
335 /* Drop and initialize the matched extent */
336 if (fi->ext.len == 1 && fofs == start_fofs)
337 fi->ext.len = 0;
338
339 /* Initial extent */
340 if (fi->ext.len == 0) {
341 if (blk_addr != NULL_ADDR) {
342 fi->ext.fofs = fofs;
343 fi->ext.blk_addr = blk_addr;
344 fi->ext.len = 1;
345 }
346 goto end_update;
347 }
348
6224da87 349 /* Front merge */
eb47b800
JK
350 if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
351 fi->ext.fofs--;
352 fi->ext.blk_addr--;
353 fi->ext.len++;
354 goto end_update;
355 }
356
357 /* Back merge */
358 if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) {
359 fi->ext.len++;
360 goto end_update;
361 }
362
363 /* Split the existing extent */
364 if (fi->ext.len > 1 &&
365 fofs >= start_fofs && fofs <= end_fofs) {
366 if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
367 fi->ext.len = fofs - start_fofs;
368 } else {
369 fi->ext.fofs = fofs + 1;
370 fi->ext.blk_addr = start_blkaddr +
371 fofs - start_fofs + 1;
372 fi->ext.len -= fofs - start_fofs + 1;
373 }
c11abd1a
JK
374 } else {
375 need_update = false;
eb47b800 376 }
eb47b800 377
c11abd1a
JK
378 /* Finally, if the extent is very fragmented, let's drop the cache. */
379 if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
380 fi->ext.len = 0;
381 set_inode_flag(fi, FI_NO_EXTENT);
382 need_update = true;
383 }
eb47b800
JK
384end_update:
385 write_unlock(&fi->ext.ext_lock);
c11abd1a
JK
386 if (need_update)
387 sync_inode_page(dn);
388 return;
eb47b800
JK
389}
390
c718379b 391struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
eb47b800
JK
392{
393 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
394 struct address_space *mapping = inode->i_mapping;
395 struct dnode_of_data dn;
396 struct page *page;
397 int err;
398
399 page = find_get_page(mapping, index);
400 if (page && PageUptodate(page))
401 return page;
402 f2fs_put_page(page, 0);
403
404 set_new_dnode(&dn, inode, NULL, NULL, 0);
266e97a8 405 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
eb47b800
JK
406 if (err)
407 return ERR_PTR(err);
408 f2fs_put_dnode(&dn);
409
410 if (dn.data_blkaddr == NULL_ADDR)
411 return ERR_PTR(-ENOENT);
412
413 /* By fallocate(), there is no cached page, but with NEW_ADDR */
6bacf52f 414 if (unlikely(dn.data_blkaddr == NEW_ADDR))
eb47b800
JK
415 return ERR_PTR(-EINVAL);
416
6f85b352 417 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
eb47b800
JK
418 if (!page)
419 return ERR_PTR(-ENOMEM);
420
393ff91f
JK
421 if (PageUptodate(page)) {
422 unlock_page(page);
423 return page;
424 }
425
93dfe2ac 426 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
c718379b 427 sync ? READ_SYNC : READA);
1069bbf7
CY
428 if (err)
429 return ERR_PTR(err);
430
c718379b
JK
431 if (sync) {
432 wait_on_page_locked(page);
6bacf52f 433 if (unlikely(!PageUptodate(page))) {
c718379b
JK
434 f2fs_put_page(page, 0);
435 return ERR_PTR(-EIO);
436 }
eb47b800 437 }
eb47b800
JK
438 return page;
439}
440
0a8165d7 441/*
eb47b800
JK
442 * If it tries to access a hole, return an error.
443 * Because, the callers, functions in dir.c and GC, should be able to know
444 * whether this page exists or not.
445 */
446struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
447{
448 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
449 struct address_space *mapping = inode->i_mapping;
450 struct dnode_of_data dn;
451 struct page *page;
452 int err;
453
650495de 454repeat:
6f85b352 455 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
650495de
JK
456 if (!page)
457 return ERR_PTR(-ENOMEM);
458
eb47b800 459 set_new_dnode(&dn, inode, NULL, NULL, 0);
266e97a8 460 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
650495de
JK
461 if (err) {
462 f2fs_put_page(page, 1);
eb47b800 463 return ERR_PTR(err);
650495de 464 }
eb47b800
JK
465 f2fs_put_dnode(&dn);
466
6bacf52f 467 if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
650495de 468 f2fs_put_page(page, 1);
eb47b800 469 return ERR_PTR(-ENOENT);
650495de 470 }
eb47b800
JK
471
472 if (PageUptodate(page))
473 return page;
474
d59ff4df
JK
475 /*
476 * A new dentry page is allocated but not able to be written, since its
477 * new inode page couldn't be allocated due to -ENOSPC.
478 * In such the case, its blkaddr can be remained as NEW_ADDR.
479 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
480 */
481 if (dn.data_blkaddr == NEW_ADDR) {
482 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
483 SetPageUptodate(page);
484 return page;
485 }
eb47b800 486
93dfe2ac 487 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC);
393ff91f 488 if (err)
eb47b800 489 return ERR_PTR(err);
393ff91f
JK
490
491 lock_page(page);
6bacf52f 492 if (unlikely(!PageUptodate(page))) {
393ff91f
JK
493 f2fs_put_page(page, 1);
494 return ERR_PTR(-EIO);
eb47b800 495 }
6bacf52f 496 if (unlikely(page->mapping != mapping)) {
afcb7ca0
JK
497 f2fs_put_page(page, 1);
498 goto repeat;
eb47b800
JK
499 }
500 return page;
501}
502
0a8165d7 503/*
eb47b800
JK
504 * Caller ensures that this data page is never allocated.
505 * A new zero-filled data page is allocated in the page cache.
39936837 506 *
4f4124d0
CY
507 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
508 * f2fs_unlock_op().
a8865372 509 * Note that, ipage is set only by make_empty_dir.
eb47b800 510 */
64aa7ed9 511struct page *get_new_data_page(struct inode *inode,
a8865372 512 struct page *ipage, pgoff_t index, bool new_i_size)
eb47b800
JK
513{
514 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
515 struct address_space *mapping = inode->i_mapping;
516 struct page *page;
517 struct dnode_of_data dn;
518 int err;
519
a8865372 520 set_new_dnode(&dn, inode, ipage, NULL, 0);
b600965c 521 err = f2fs_reserve_block(&dn, index);
eb47b800
JK
522 if (err)
523 return ERR_PTR(err);
afcb7ca0 524repeat:
eb47b800 525 page = grab_cache_page(mapping, index);
a8865372
JK
526 if (!page) {
527 err = -ENOMEM;
528 goto put_err;
529 }
eb47b800
JK
530
531 if (PageUptodate(page))
532 return page;
533
534 if (dn.data_blkaddr == NEW_ADDR) {
535 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
393ff91f 536 SetPageUptodate(page);
eb47b800 537 } else {
93dfe2ac
JK
538 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
539 READ_SYNC);
393ff91f 540 if (err)
a8865372
JK
541 goto put_err;
542
393ff91f 543 lock_page(page);
6bacf52f 544 if (unlikely(!PageUptodate(page))) {
393ff91f 545 f2fs_put_page(page, 1);
a8865372
JK
546 err = -EIO;
547 goto put_err;
eb47b800 548 }
6bacf52f 549 if (unlikely(page->mapping != mapping)) {
afcb7ca0
JK
550 f2fs_put_page(page, 1);
551 goto repeat;
eb47b800
JK
552 }
553 }
eb47b800
JK
554
555 if (new_i_size &&
556 i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
557 i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
699489bb
JK
558 /* Only the directory inode sets new_i_size */
559 set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
eb47b800
JK
560 }
561 return page;
a8865372
JK
562
563put_err:
564 f2fs_put_dnode(&dn);
565 return ERR_PTR(err);
eb47b800
JK
566}
567
bfad7c2d
JK
568static int __allocate_data_block(struct dnode_of_data *dn)
569{
570 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
571 struct f2fs_summary sum;
572 block_t new_blkaddr;
573 struct node_info ni;
574 int type;
575
576 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
577 return -EPERM;
578 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
579 return -ENOSPC;
580
581 __set_data_blkaddr(dn, NEW_ADDR);
582 dn->data_blkaddr = NEW_ADDR;
583
584 get_node_info(sbi, dn->nid, &ni);
585 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
586
587 type = CURSEG_WARM_DATA;
588
589 allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
590
591 /* direct IO doesn't use extent cache to maximize the performance */
592 set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
593 update_extent_cache(new_blkaddr, dn);
594 clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
595
596 dn->data_blkaddr = new_blkaddr;
597 return 0;
598}
599
0a8165d7 600/*
4f4124d0
CY
601 * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
602 * If original data blocks are allocated, then give them to blockdev.
603 * Otherwise,
604 * a. preallocate requested block addresses
605 * b. do not use extent cache for better performance
606 * c. give the block addresses to blockdev
eb47b800 607 */
bfad7c2d 608static int get_data_block(struct inode *inode, sector_t iblock,
eb47b800
JK
609 struct buffer_head *bh_result, int create)
610{
bfad7c2d 611 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
eb47b800
JK
612 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
613 unsigned maxblocks = bh_result->b_size >> blkbits;
614 struct dnode_of_data dn;
bfad7c2d
JK
615 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
616 pgoff_t pgofs, end_offset;
617 int err = 0, ofs = 1;
618 bool allocated = false;
eb47b800
JK
619
620 /* Get the page offset from the block offset(iblock) */
621 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
622
bfad7c2d
JK
623 if (check_extent_cache(inode, pgofs, bh_result))
624 goto out;
625
626 if (create)
627 f2fs_lock_op(sbi);
eb47b800
JK
628
629 /* When reading holes, we need its node page */
630 set_new_dnode(&dn, inode, NULL, NULL, 0);
bfad7c2d 631 err = get_dnode_of_data(&dn, pgofs, mode);
1ec79083 632 if (err) {
bfad7c2d
JK
633 if (err == -ENOENT)
634 err = 0;
635 goto unlock_out;
848753aa 636 }
1ec79083
JK
637 if (dn.data_blkaddr == NEW_ADDR)
638 goto put_out;
eb47b800 639
bfad7c2d
JK
640 if (dn.data_blkaddr != NULL_ADDR) {
641 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
642 } else if (create) {
643 err = __allocate_data_block(&dn);
644 if (err)
645 goto put_out;
646 allocated = true;
647 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
648 } else {
649 goto put_out;
650 }
651
652 end_offset = IS_INODE(dn.node_page) ?
653 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
654 bh_result->b_size = (((size_t)1) << blkbits);
655 dn.ofs_in_node++;
656 pgofs++;
657
658get_next:
659 if (dn.ofs_in_node >= end_offset) {
660 if (allocated)
661 sync_inode_page(&dn);
662 allocated = false;
663 f2fs_put_dnode(&dn);
664
665 set_new_dnode(&dn, inode, NULL, NULL, 0);
666 err = get_dnode_of_data(&dn, pgofs, mode);
1ec79083 667 if (err) {
bfad7c2d
JK
668 if (err == -ENOENT)
669 err = 0;
670 goto unlock_out;
671 }
1ec79083
JK
672 if (dn.data_blkaddr == NEW_ADDR)
673 goto put_out;
674
eb47b800 675 end_offset = IS_INODE(dn.node_page) ?
bfad7c2d
JK
676 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
677 }
eb47b800 678
bfad7c2d
JK
679 if (maxblocks > (bh_result->b_size >> blkbits)) {
680 block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
681 if (blkaddr == NULL_ADDR && create) {
682 err = __allocate_data_block(&dn);
683 if (err)
684 goto sync_out;
685 allocated = true;
686 blkaddr = dn.data_blkaddr;
687 }
eb47b800 688 /* Give more consecutive addresses for the read ahead */
bfad7c2d
JK
689 if (blkaddr == (bh_result->b_blocknr + ofs)) {
690 ofs++;
691 dn.ofs_in_node++;
692 pgofs++;
693 bh_result->b_size += (((size_t)1) << blkbits);
694 goto get_next;
695 }
eb47b800 696 }
bfad7c2d
JK
697sync_out:
698 if (allocated)
699 sync_inode_page(&dn);
700put_out:
eb47b800 701 f2fs_put_dnode(&dn);
bfad7c2d
JK
702unlock_out:
703 if (create)
704 f2fs_unlock_op(sbi);
705out:
706 trace_f2fs_get_data_block(inode, iblock, bh_result, err);
707 return err;
eb47b800
JK
708}
709
710static int f2fs_read_data_page(struct file *file, struct page *page)
711{
9ffe0fb5
HL
712 struct inode *inode = page->mapping->host;
713 int ret;
714
715 /* If the file has inline data, try to read it directlly */
716 if (f2fs_has_inline_data(inode))
717 ret = f2fs_read_inline_data(inode, page);
718 else
719 ret = mpage_readpage(page, get_data_block);
720
721 return ret;
eb47b800
JK
722}
723
724static int f2fs_read_data_pages(struct file *file,
725 struct address_space *mapping,
726 struct list_head *pages, unsigned nr_pages)
727{
9ffe0fb5
HL
728 struct inode *inode = file->f_mapping->host;
729
730 /* If the file has inline data, skip readpages */
731 if (f2fs_has_inline_data(inode))
732 return 0;
733
bfad7c2d 734 return mpage_readpages(mapping, pages, nr_pages, get_data_block);
eb47b800
JK
735}
736
458e6197 737int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
eb47b800
JK
738{
739 struct inode *inode = page->mapping->host;
458e6197 740 block_t old_blkaddr, new_blkaddr;
eb47b800
JK
741 struct dnode_of_data dn;
742 int err = 0;
743
744 set_new_dnode(&dn, inode, NULL, NULL, 0);
266e97a8 745 err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
eb47b800
JK
746 if (err)
747 return err;
748
458e6197 749 old_blkaddr = dn.data_blkaddr;
eb47b800
JK
750
751 /* This page is already truncated */
458e6197 752 if (old_blkaddr == NULL_ADDR)
eb47b800
JK
753 goto out_writepage;
754
755 set_page_writeback(page);
756
757 /*
758 * If current allocation needs SSR,
759 * it had better in-place writes for updated data.
760 */
458e6197 761 if (unlikely(old_blkaddr != NEW_ADDR &&
b25958b6
HL
762 !is_cold_data(page) &&
763 need_inplace_update(inode))) {
458e6197 764 rewrite_data_page(page, old_blkaddr, fio);
eb47b800 765 } else {
458e6197
JK
766 write_data_page(page, &dn, &new_blkaddr, fio);
767 update_extent_cache(new_blkaddr, &dn);
eb47b800
JK
768 }
769out_writepage:
770 f2fs_put_dnode(&dn);
771 return err;
772}
773
774static int f2fs_write_data_page(struct page *page,
775 struct writeback_control *wbc)
776{
777 struct inode *inode = page->mapping->host;
778 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
779 loff_t i_size = i_size_read(inode);
780 const pgoff_t end_index = ((unsigned long long) i_size)
781 >> PAGE_CACHE_SHIFT;
9ffe0fb5 782 unsigned offset = 0;
39936837 783 bool need_balance_fs = false;
eb47b800 784 int err = 0;
458e6197
JK
785 struct f2fs_io_info fio = {
786 .type = DATA,
6c311ec6 787 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
458e6197 788 };
eb47b800
JK
789
790 if (page->index < end_index)
39936837 791 goto write;
eb47b800
JK
792
793 /*
794 * If the offset is out-of-range of file size,
795 * this page does not have to be written to disk.
796 */
797 offset = i_size & (PAGE_CACHE_SIZE - 1);
798 if ((page->index >= end_index + 1) || !offset) {
799 if (S_ISDIR(inode->i_mode)) {
800 dec_page_count(sbi, F2FS_DIRTY_DENTS);
801 inode_dec_dirty_dents(inode);
802 }
39936837 803 goto out;
eb47b800
JK
804 }
805
806 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
39936837 807write:
cfb271d4 808 if (unlikely(sbi->por_doing)) {
39936837 809 err = AOP_WRITEPAGE_ACTIVATE;
eb47b800 810 goto redirty_out;
39936837 811 }
eb47b800 812
39936837 813 /* Dentry blocks are controlled by checkpoint */
eb47b800
JK
814 if (S_ISDIR(inode->i_mode)) {
815 dec_page_count(sbi, F2FS_DIRTY_DENTS);
816 inode_dec_dirty_dents(inode);
458e6197 817 err = do_write_data_page(page, &fio);
39936837 818 } else {
e479556b 819 f2fs_lock_op(sbi);
9ffe0fb5
HL
820
821 if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) {
822 err = f2fs_write_inline_data(inode, page, offset);
823 f2fs_unlock_op(sbi);
824 goto out;
825 } else {
826 err = do_write_data_page(page, &fio);
827 }
828
e479556b 829 f2fs_unlock_op(sbi);
39936837 830 need_balance_fs = true;
eb47b800 831 }
39936837
JK
832 if (err == -ENOENT)
833 goto out;
834 else if (err)
835 goto redirty_out;
eb47b800 836
c33ec326 837 if (wbc->for_reclaim) {
458e6197 838 f2fs_submit_merged_bio(sbi, DATA, WRITE);
c33ec326
JK
839 need_balance_fs = false;
840 }
eb47b800 841
eb47b800 842 clear_cold_data(page);
39936837 843out:
eb47b800 844 unlock_page(page);
39936837 845 if (need_balance_fs)
eb47b800
JK
846 f2fs_balance_fs(sbi);
847 return 0;
848
eb47b800
JK
849redirty_out:
850 wbc->pages_skipped++;
851 set_page_dirty(page);
39936837 852 return err;
eb47b800
JK
853}
854
855#define MAX_DESIRED_PAGES_WP 4096
856
fa9150a8
NJ
857static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
858 void *data)
859{
860 struct address_space *mapping = data;
861 int ret = mapping->a_ops->writepage(page, wbc);
862 mapping_set_error(mapping, ret);
863 return ret;
864}
865
25ca923b 866static int f2fs_write_data_pages(struct address_space *mapping,
eb47b800
JK
867 struct writeback_control *wbc)
868{
869 struct inode *inode = mapping->host;
870 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
531ad7d5 871 bool locked = false;
eb47b800
JK
872 int ret;
873 long excess_nrtw = 0, desired_nrtw;
874
cfb185a1 875 /* deal with chardevs and other special file */
876 if (!mapping->a_ops->writepage)
877 return 0;
878
eb47b800
JK
879 if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) {
880 desired_nrtw = MAX_DESIRED_PAGES_WP;
881 excess_nrtw = desired_nrtw - wbc->nr_to_write;
882 wbc->nr_to_write = desired_nrtw;
883 }
884
531ad7d5 885 if (!S_ISDIR(inode->i_mode)) {
eb47b800 886 mutex_lock(&sbi->writepages);
531ad7d5
JK
887 locked = true;
888 }
fa9150a8 889 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
531ad7d5 890 if (locked)
eb47b800 891 mutex_unlock(&sbi->writepages);
458e6197
JK
892
893 f2fs_submit_merged_bio(sbi, DATA, WRITE);
eb47b800
JK
894
895 remove_dirty_dir_inode(inode);
896
897 wbc->nr_to_write -= excess_nrtw;
898 return ret;
899}
900
901static int f2fs_write_begin(struct file *file, struct address_space *mapping,
902 loff_t pos, unsigned len, unsigned flags,
903 struct page **pagep, void **fsdata)
904{
905 struct inode *inode = mapping->host;
906 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
907 struct page *page;
908 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
909 struct dnode_of_data dn;
910 int err = 0;
911
eb47b800 912 f2fs_balance_fs(sbi);
afcb7ca0 913repeat:
9e09fc85
JK
914 err = f2fs_convert_inline_data(inode, pos + len);
915 if (err)
916 return err;
917
eb47b800
JK
918 page = grab_cache_page_write_begin(mapping, index, flags);
919 if (!page)
920 return -ENOMEM;
921 *pagep = page;
922
9e09fc85
JK
923 if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
924 goto inline_data;
9ffe0fb5 925
e479556b 926 f2fs_lock_op(sbi);
eb47b800 927 set_new_dnode(&dn, inode, NULL, NULL, 0);
b600965c 928 err = f2fs_reserve_block(&dn, index);
e479556b 929 f2fs_unlock_op(sbi);
eb47b800 930
b600965c
HL
931 if (err) {
932 f2fs_put_page(page, 1);
933 return err;
934 }
9ffe0fb5 935inline_data:
eb47b800
JK
936 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
937 return 0;
938
939 if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
940 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
941 unsigned end = start + len;
942
943 /* Reading beyond i_size is simple: memset to zero */
944 zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
393ff91f 945 goto out;
eb47b800
JK
946 }
947
948 if (dn.data_blkaddr == NEW_ADDR) {
949 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
950 } else {
9ffe0fb5
HL
951 if (f2fs_has_inline_data(inode))
952 err = f2fs_read_inline_data(inode, page);
953 else
954 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
93dfe2ac 955 READ_SYNC);
393ff91f 956 if (err)
eb47b800 957 return err;
393ff91f 958 lock_page(page);
6bacf52f 959 if (unlikely(!PageUptodate(page))) {
393ff91f
JK
960 f2fs_put_page(page, 1);
961 return -EIO;
eb47b800 962 }
6bacf52f 963 if (unlikely(page->mapping != mapping)) {
afcb7ca0
JK
964 f2fs_put_page(page, 1);
965 goto repeat;
eb47b800
JK
966 }
967 }
393ff91f 968out:
eb47b800
JK
969 SetPageUptodate(page);
970 clear_cold_data(page);
971 return 0;
972}
973
a1dd3c13
JK
974static int f2fs_write_end(struct file *file,
975 struct address_space *mapping,
976 loff_t pos, unsigned len, unsigned copied,
977 struct page *page, void *fsdata)
978{
979 struct inode *inode = page->mapping->host;
980
981 SetPageUptodate(page);
982 set_page_dirty(page);
983
984 if (pos + copied > i_size_read(inode)) {
985 i_size_write(inode, pos + copied);
986 mark_inode_dirty(inode);
987 update_inode_page(inode);
988 }
989
75c3c8bc 990 f2fs_put_page(page, 1);
a1dd3c13
JK
991 return copied;
992}
993
944fcfc1
JK
994static int check_direct_IO(struct inode *inode, int rw,
995 const struct iovec *iov, loff_t offset, unsigned long nr_segs)
996{
997 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
998 int i;
999
1000 if (rw == READ)
1001 return 0;
1002
1003 if (offset & blocksize_mask)
1004 return -EINVAL;
1005
1006 for (i = 0; i < nr_segs; i++)
1007 if (iov[i].iov_len & blocksize_mask)
1008 return -EINVAL;
1009 return 0;
1010}
1011
eb47b800
JK
1012static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1013 const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1014{
1015 struct file *file = iocb->ki_filp;
1016 struct inode *inode = file->f_mapping->host;
944fcfc1 1017
9ffe0fb5
HL
1018 /* Let buffer I/O handle the inline data case. */
1019 if (f2fs_has_inline_data(inode))
1020 return 0;
1021
944fcfc1
JK
1022 if (check_direct_IO(inode, rw, iov, offset, nr_segs))
1023 return 0;
1024
eb47b800 1025 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
bfad7c2d 1026 get_data_block);
eb47b800
JK
1027}
1028
d47992f8
LC
1029static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
1030 unsigned int length)
eb47b800
JK
1031{
1032 struct inode *inode = page->mapping->host;
1033 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1034 if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
1035 dec_page_count(sbi, F2FS_DIRTY_DENTS);
1036 inode_dec_dirty_dents(inode);
1037 }
1038 ClearPagePrivate(page);
1039}
1040
1041static int f2fs_release_data_page(struct page *page, gfp_t wait)
1042{
1043 ClearPagePrivate(page);
c3850aa1 1044 return 1;
eb47b800
JK
1045}
1046
1047static int f2fs_set_data_page_dirty(struct page *page)
1048{
1049 struct address_space *mapping = page->mapping;
1050 struct inode *inode = mapping->host;
1051
26c6b887
JK
1052 trace_f2fs_set_page_dirty(page, DATA);
1053
eb47b800 1054 SetPageUptodate(page);
a18ff063
JK
1055 mark_inode_dirty(inode);
1056
eb47b800
JK
1057 if (!PageDirty(page)) {
1058 __set_page_dirty_nobuffers(page);
1059 set_dirty_dir_page(inode, page);
1060 return 1;
1061 }
1062 return 0;
1063}
1064
c01e54b7
JK
1065static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
1066{
bfad7c2d 1067 return generic_block_bmap(mapping, block, get_data_block);
c01e54b7
JK
1068}
1069
eb47b800
JK
1070const struct address_space_operations f2fs_dblock_aops = {
1071 .readpage = f2fs_read_data_page,
1072 .readpages = f2fs_read_data_pages,
1073 .writepage = f2fs_write_data_page,
1074 .writepages = f2fs_write_data_pages,
1075 .write_begin = f2fs_write_begin,
a1dd3c13 1076 .write_end = f2fs_write_end,
eb47b800
JK
1077 .set_page_dirty = f2fs_set_data_page_dirty,
1078 .invalidatepage = f2fs_invalidate_data_page,
1079 .releasepage = f2fs_release_data_page,
1080 .direct_IO = f2fs_direct_IO,
c01e54b7 1081 .bmap = f2fs_bmap,
eb47b800 1082};