UBIFS: store free and dirty space in the bud replay entry
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / ubifs / replay.c
CommitLineData
1e51764a
AB
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file contains journal replay code. It runs when the file-system is being
25 * mounted and requires no locking.
26 *
27 * The larger is the journal, the longer it takes to scan it, so the longer it
28 * takes to mount UBIFS. This is why the journal has limited size which may be
29 * changed depending on the system requirements. But a larger journal gives
30 * faster I/O speed because it writes the index less frequently. So this is a
31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
32 * larger is the journal, the more memory its index may consume.
33 */
34
35#include "ubifs.h"
36
37/*
38 * Replay flags.
39 *
40 * REPLAY_DELETION: node was deleted
41 * REPLAY_REF: node is a reference node
42 */
43enum {
44 REPLAY_DELETION = 1,
45 REPLAY_REF = 2,
46};
47
48/**
49 * struct replay_entry - replay tree entry.
50 * @lnum: logical eraseblock number of the node
51 * @offs: node offset
52 * @len: node length
53 * @sqnum: node sequence number
54 * @flags: replay flags
55 * @rb: links the replay tree
56 * @key: node key
57 * @nm: directory entry name
58 * @old_size: truncation old size
59 * @new_size: truncation new size
60 * @free: amount of free space in a bud
61 * @dirty: amount of dirty space in a bud from padding and deletion nodes
52c6e6f9 62 * @jhead: journal head number of the bud
1e51764a
AB
63 *
64 * UBIFS journal replay must compare node sequence numbers, which means it must
65 * build a tree of node information to insert into the TNC.
66 */
67struct replay_entry {
68 int lnum;
69 int offs;
70 int len;
71 unsigned long long sqnum;
72 int flags;
73 struct rb_node rb;
74 union ubifs_key key;
75 union {
76 struct qstr nm;
77 struct {
78 loff_t old_size;
79 loff_t new_size;
80 };
81 struct {
82 int free;
83 int dirty;
52c6e6f9 84 int jhead;
1e51764a
AB
85 };
86 };
87};
88
89/**
90 * struct bud_entry - entry in the list of buds to replay.
91 * @list: next bud in the list
92 * @bud: bud description object
1e51764a 93 * @sqnum: reference node sequence number
af1dd412
AB
94 * @free: free bytes in the bud
95 * @dirty: dirty bytes in the bud
1e51764a
AB
96 */
97struct bud_entry {
98 struct list_head list;
99 struct ubifs_bud *bud;
1e51764a 100 unsigned long long sqnum;
af1dd412
AB
101 int free;
102 int dirty;
1e51764a
AB
103};
104
105/**
106 * set_bud_lprops - set free and dirty space used by a bud.
107 * @c: UBIFS file-system description object
108 * @r: replay entry of bud
109 */
110static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
111{
112 const struct ubifs_lprops *lp;
113 int err = 0, dirty;
114
115 ubifs_get_lprops(c);
116
117 lp = ubifs_lpt_lookup_dirty(c, r->lnum);
118 if (IS_ERR(lp)) {
119 err = PTR_ERR(lp);
120 goto out;
121 }
122
123 dirty = lp->dirty;
124 if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
125 /*
126 * The LEB was added to the journal with a starting offset of
127 * zero which means the LEB must have been empty. The LEB
128 * property values should be lp->free == c->leb_size and
129 * lp->dirty == 0, but that is not the case. The reason is that
7a9c3e39
AB
130 * the LEB had been garbage collected before it became the bud,
131 * and there was not commit inbetween. The garbage collector
132 * resets the free and dirty space without recording it
133 * anywhere except lprops, so if there was no commit then
134 * lprops does not have that information.
1e51764a
AB
135 *
136 * We do not need to adjust free space because the scan has told
137 * us the exact value which is recorded in the replay entry as
138 * r->free.
139 *
140 * However we do need to subtract from the dirty space the
141 * amount of space that the garbage collector reclaimed, which
142 * is the whole LEB minus the amount of space that was free.
143 */
144 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
145 lp->free, lp->dirty);
146 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
147 lp->free, lp->dirty);
148 dirty -= c->leb_size - lp->free;
149 /*
150 * If the replay order was perfect the dirty space would now be
7d4e9ccb 151 * zero. The order is not perfect because the journal heads
6edbfafd 152 * race with each other. This is not a problem but is does mean
1e51764a
AB
153 * that the dirty space may temporarily exceed c->leb_size
154 * during the replay.
155 */
156 if (dirty != 0)
157 dbg_msg("LEB %d lp: %d free %d dirty "
158 "replay: %d free %d dirty", r->lnum, lp->free,
159 lp->dirty, r->free, r->dirty);
160 }
161 lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
162 lp->flags | LPROPS_TAKEN, 0);
163 if (IS_ERR(lp)) {
164 err = PTR_ERR(lp);
165 goto out;
166 }
52c6e6f9
AB
167
168 /* Make sure the journal head points to the latest bud */
169 err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum,
170 c->leb_size - r->free, UBI_SHORTTERM);
171
1e51764a
AB
172out:
173 ubifs_release_lprops(c);
174 return err;
175}
176
177/**
178 * trun_remove_range - apply a replay entry for a truncation to the TNC.
179 * @c: UBIFS file-system description object
180 * @r: replay entry of truncation
181 */
182static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
183{
184 unsigned min_blk, max_blk;
185 union ubifs_key min_key, max_key;
186 ino_t ino;
187
188 min_blk = r->new_size / UBIFS_BLOCK_SIZE;
189 if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
190 min_blk += 1;
191
192 max_blk = r->old_size / UBIFS_BLOCK_SIZE;
193 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
194 max_blk -= 1;
195
196 ino = key_inum(c, &r->key);
197
198 data_key_init(c, &min_key, ino, min_blk);
199 data_key_init(c, &max_key, ino, max_blk);
200
201 return ubifs_tnc_remove_range(c, &min_key, &max_key);
202}
203
204/**
205 * apply_replay_entry - apply a replay entry to the TNC.
206 * @c: UBIFS file-system description object
207 * @r: replay entry to apply
208 *
209 * Apply a replay entry to the TNC.
210 */
211static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
212{
213 int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
214
215 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
216 r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
217
218 /* Set c->replay_sqnum to help deal with dangling branches. */
219 c->replay_sqnum = r->sqnum;
220
221 if (r->flags & REPLAY_REF)
222 err = set_bud_lprops(c, r);
223 else if (is_hash_key(c, &r->key)) {
224 if (deletion)
225 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
226 else
227 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
228 r->len, &r->nm);
229 } else {
230 if (deletion)
231 switch (key_type(c, &r->key)) {
232 case UBIFS_INO_KEY:
233 {
234 ino_t inum = key_inum(c, &r->key);
235
236 err = ubifs_tnc_remove_ino(c, inum);
237 break;
238 }
239 case UBIFS_TRUN_KEY:
240 err = trun_remove_range(c, r);
241 break;
242 default:
243 err = ubifs_tnc_remove(c, &r->key);
244 break;
245 }
246 else
247 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
248 r->len);
249 if (err)
250 return err;
251
252 if (c->need_recovery)
253 err = ubifs_recover_size_accum(c, &r->key, deletion,
254 r->new_size);
255 }
256
257 return err;
258}
259
260/**
261 * destroy_replay_tree - destroy the replay.
262 * @c: UBIFS file-system description object
263 *
264 * Destroy the replay tree.
265 */
266static void destroy_replay_tree(struct ubifs_info *c)
267{
268 struct rb_node *this = c->replay_tree.rb_node;
269 struct replay_entry *r;
270
271 while (this) {
272 if (this->rb_left) {
273 this = this->rb_left;
274 continue;
275 } else if (this->rb_right) {
276 this = this->rb_right;
277 continue;
278 }
279 r = rb_entry(this, struct replay_entry, rb);
280 this = rb_parent(this);
281 if (this) {
282 if (this->rb_left == &r->rb)
283 this->rb_left = NULL;
284 else
285 this->rb_right = NULL;
286 }
287 if (is_hash_key(c, &r->key))
288 kfree(r->nm.name);
289 kfree(r);
290 }
291 c->replay_tree = RB_ROOT;
292}
293
294/**
295 * apply_replay_tree - apply the replay tree to the TNC.
296 * @c: UBIFS file-system description object
297 *
298 * Apply the replay tree.
299 * Returns zero in case of success and a negative error code in case of
300 * failure.
301 */
302static int apply_replay_tree(struct ubifs_info *c)
303{
304 struct rb_node *this = rb_first(&c->replay_tree);
305
306 while (this) {
307 struct replay_entry *r;
308 int err;
309
310 cond_resched();
311
312 r = rb_entry(this, struct replay_entry, rb);
313 err = apply_replay_entry(c, r);
314 if (err)
315 return err;
316 this = rb_next(this);
317 }
318 return 0;
319}
320
321/**
322 * insert_node - insert a node to the replay tree.
323 * @c: UBIFS file-system description object
324 * @lnum: node logical eraseblock number
325 * @offs: node offset
326 * @len: node length
327 * @key: node key
328 * @sqnum: sequence number
329 * @deletion: non-zero if this is a deletion
330 * @used: number of bytes in use in a LEB
331 * @old_size: truncation old size
332 * @new_size: truncation new size
333 *
334 * This function inserts a scanned non-direntry node to the replay tree. The
335 * replay tree is an RB-tree containing @struct replay_entry elements which are
336 * indexed by the sequence number. The replay tree is applied at the very end
337 * of the replay process. Since the tree is sorted in sequence number order,
338 * the older modifications are applied first. This function returns zero in
339 * case of success and a negative error code in case of failure.
340 */
341static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
342 union ubifs_key *key, unsigned long long sqnum,
343 int deletion, int *used, loff_t old_size,
344 loff_t new_size)
345{
346 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
347 struct replay_entry *r;
348
349 if (key_inum(c, key) >= c->highest_inum)
350 c->highest_inum = key_inum(c, key);
351
352 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
353 while (*p) {
354 parent = *p;
355 r = rb_entry(parent, struct replay_entry, rb);
356 if (sqnum < r->sqnum) {
357 p = &(*p)->rb_left;
358 continue;
359 } else if (sqnum > r->sqnum) {
360 p = &(*p)->rb_right;
361 continue;
362 }
363 ubifs_err("duplicate sqnum in replay");
364 return -EINVAL;
365 }
366
367 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
368 if (!r)
369 return -ENOMEM;
370
371 if (!deletion)
372 *used += ALIGN(len, 8);
373 r->lnum = lnum;
374 r->offs = offs;
375 r->len = len;
376 r->sqnum = sqnum;
377 r->flags = (deletion ? REPLAY_DELETION : 0);
378 r->old_size = old_size;
379 r->new_size = new_size;
380 key_copy(c, key, &r->key);
381
382 rb_link_node(&r->rb, parent, p);
383 rb_insert_color(&r->rb, &c->replay_tree);
384 return 0;
385}
386
387/**
388 * insert_dent - insert a directory entry node into the replay tree.
389 * @c: UBIFS file-system description object
390 * @lnum: node logical eraseblock number
391 * @offs: node offset
392 * @len: node length
393 * @key: node key
394 * @name: directory entry name
395 * @nlen: directory entry name length
396 * @sqnum: sequence number
397 * @deletion: non-zero if this is a deletion
398 * @used: number of bytes in use in a LEB
399 *
400 * This function inserts a scanned directory entry node to the replay tree.
401 * Returns zero in case of success and a negative error code in case of
402 * failure.
403 *
404 * This function is also used for extended attribute entries because they are
405 * implemented as directory entry nodes.
406 */
407static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
408 union ubifs_key *key, const char *name, int nlen,
409 unsigned long long sqnum, int deletion, int *used)
410{
411 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
412 struct replay_entry *r;
413 char *nbuf;
414
415 if (key_inum(c, key) >= c->highest_inum)
416 c->highest_inum = key_inum(c, key);
417
418 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
419 while (*p) {
420 parent = *p;
421 r = rb_entry(parent, struct replay_entry, rb);
422 if (sqnum < r->sqnum) {
423 p = &(*p)->rb_left;
424 continue;
425 }
426 if (sqnum > r->sqnum) {
427 p = &(*p)->rb_right;
428 continue;
429 }
430 ubifs_err("duplicate sqnum in replay");
431 return -EINVAL;
432 }
433
434 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
435 if (!r)
436 return -ENOMEM;
437 nbuf = kmalloc(nlen + 1, GFP_KERNEL);
438 if (!nbuf) {
439 kfree(r);
440 return -ENOMEM;
441 }
442
443 if (!deletion)
444 *used += ALIGN(len, 8);
445 r->lnum = lnum;
446 r->offs = offs;
447 r->len = len;
448 r->sqnum = sqnum;
449 r->nm.len = nlen;
450 memcpy(nbuf, name, nlen);
451 nbuf[nlen] = '\0';
452 r->nm.name = nbuf;
453 r->flags = (deletion ? REPLAY_DELETION : 0);
454 key_copy(c, key, &r->key);
455
456 ubifs_assert(!*p);
457 rb_link_node(&r->rb, parent, p);
458 rb_insert_color(&r->rb, &c->replay_tree);
459 return 0;
460}
461
462/**
463 * ubifs_validate_entry - validate directory or extended attribute entry node.
464 * @c: UBIFS file-system description object
465 * @dent: the node to validate
466 *
467 * This function validates directory or extended attribute entry node @dent.
468 * Returns zero if the node is all right and a %-EINVAL if not.
469 */
470int ubifs_validate_entry(struct ubifs_info *c,
471 const struct ubifs_dent_node *dent)
472{
473 int key_type = key_type_flash(c, dent->key);
474 int nlen = le16_to_cpu(dent->nlen);
475
476 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
477 dent->type >= UBIFS_ITYPES_CNT ||
478 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
479 strnlen(dent->name, nlen) != nlen ||
480 le64_to_cpu(dent->inum) > MAX_INUM) {
481 ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ?
482 "directory entry" : "extended attribute entry");
483 return -EINVAL;
484 }
485
486 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
487 ubifs_err("bad key type %d", key_type);
488 return -EINVAL;
489 }
490
491 return 0;
492}
493
494/**
495 * replay_bud - replay a bud logical eraseblock.
496 * @c: UBIFS file-system description object
497 * @lnum: bud logical eraseblock number to replay
498 * @offs: bud start offset
499 * @jhead: journal head to which this bud belongs
500 * @free: amount of free space in the bud is returned here
501 * @dirty: amount of dirty space from padding and deletion nodes is returned
502 * here
503 *
504 * This function returns zero in case of success and a negative error code in
505 * case of failure.
506 */
507static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
508 int *free, int *dirty)
509{
510 int err = 0, used = 0;
511 struct ubifs_scan_leb *sleb;
512 struct ubifs_scan_node *snod;
513 struct ubifs_bud *bud;
514
c839e297 515 dbg_mnt("replay bud LEB %d, head %d, offs %d", lnum, jhead, offs);
1e51764a
AB
516 if (c->need_recovery)
517 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
518 else
348709ba 519 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
1e51764a
AB
520 if (IS_ERR(sleb))
521 return PTR_ERR(sleb);
522
523 /*
524 * The bud does not have to start from offset zero - the beginning of
525 * the 'lnum' LEB may contain previously committed data. One of the
526 * things we have to do in replay is to correctly update lprops with
527 * newer information about this LEB.
528 *
529 * At this point lprops thinks that this LEB has 'c->leb_size - offs'
530 * bytes of free space because it only contain information about
531 * committed data.
532 *
533 * But we know that real amount of free space is 'c->leb_size -
534 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
535 * 'sleb->endpt' is used by bud data. We have to correctly calculate
536 * how much of these data are dirty and update lprops with this
537 * information.
538 *
539 * The dirt in that LEB region is comprised of padding nodes, deletion
540 * nodes, truncation nodes and nodes which are obsoleted by subsequent
541 * nodes in this LEB. So instead of calculating clean space, we
542 * calculate used space ('used' variable).
543 */
544
545 list_for_each_entry(snod, &sleb->nodes, list) {
546 int deletion = 0;
547
548 cond_resched();
549
550 if (snod->sqnum >= SQNUM_WATERMARK) {
551 ubifs_err("file system's life ended");
552 goto out_dump;
553 }
554
555 if (snod->sqnum > c->max_sqnum)
556 c->max_sqnum = snod->sqnum;
557
558 switch (snod->type) {
559 case UBIFS_INO_NODE:
560 {
561 struct ubifs_ino_node *ino = snod->node;
562 loff_t new_size = le64_to_cpu(ino->size);
563
564 if (le32_to_cpu(ino->nlink) == 0)
565 deletion = 1;
566 err = insert_node(c, lnum, snod->offs, snod->len,
567 &snod->key, snod->sqnum, deletion,
568 &used, 0, new_size);
569 break;
570 }
571 case UBIFS_DATA_NODE:
572 {
573 struct ubifs_data_node *dn = snod->node;
574 loff_t new_size = le32_to_cpu(dn->size) +
575 key_block(c, &snod->key) *
576 UBIFS_BLOCK_SIZE;
577
578 err = insert_node(c, lnum, snod->offs, snod->len,
579 &snod->key, snod->sqnum, deletion,
580 &used, 0, new_size);
581 break;
582 }
583 case UBIFS_DENT_NODE:
584 case UBIFS_XENT_NODE:
585 {
586 struct ubifs_dent_node *dent = snod->node;
587
588 err = ubifs_validate_entry(c, dent);
589 if (err)
590 goto out_dump;
591
592 err = insert_dent(c, lnum, snod->offs, snod->len,
593 &snod->key, dent->name,
594 le16_to_cpu(dent->nlen), snod->sqnum,
595 !le64_to_cpu(dent->inum), &used);
596 break;
597 }
598 case UBIFS_TRUN_NODE:
599 {
600 struct ubifs_trun_node *trun = snod->node;
601 loff_t old_size = le64_to_cpu(trun->old_size);
602 loff_t new_size = le64_to_cpu(trun->new_size);
603 union ubifs_key key;
604
605 /* Validate truncation node */
606 if (old_size < 0 || old_size > c->max_inode_sz ||
607 new_size < 0 || new_size > c->max_inode_sz ||
608 old_size <= new_size) {
609 ubifs_err("bad truncation node");
610 goto out_dump;
611 }
612
613 /*
614 * Create a fake truncation key just to use the same
615 * functions which expect nodes to have keys.
616 */
617 trun_key_init(c, &key, le32_to_cpu(trun->inum));
618 err = insert_node(c, lnum, snod->offs, snod->len,
619 &key, snod->sqnum, 1, &used,
620 old_size, new_size);
621 break;
622 }
623 default:
624 ubifs_err("unexpected node type %d in bud LEB %d:%d",
625 snod->type, lnum, snod->offs);
626 err = -EINVAL;
627 goto out_dump;
628 }
629 if (err)
630 goto out;
631 }
632
633 bud = ubifs_search_bud(c, lnum);
634 if (!bud)
635 BUG();
636
637 ubifs_assert(sleb->endpt - offs >= used);
638 ubifs_assert(sleb->endpt % c->min_io_size == 0);
639
1e51764a
AB
640 *dirty = sleb->endpt - offs - used;
641 *free = c->leb_size - sleb->endpt;
c839e297 642 dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, *dirty, *free);
1e51764a
AB
643
644out:
645 ubifs_scan_destroy(sleb);
646 return err;
647
648out_dump:
649 ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
650 dbg_dump_node(c, snod->node);
651 ubifs_scan_destroy(sleb);
652 return -EINVAL;
653}
654
655/**
656 * insert_ref_node - insert a reference node to the replay tree.
657 * @c: UBIFS file-system description object
658 * @lnum: node logical eraseblock number
659 * @offs: node offset
660 * @sqnum: sequence number
661 * @free: amount of free space in bud
662 * @dirty: amount of dirty space from padding and deletion nodes
52c6e6f9 663 * @jhead: journal head number for the bud
1e51764a
AB
664 *
665 * This function inserts a reference node to the replay tree and returns zero
6edbfafd 666 * in case of success or a negative error code in case of failure.
1e51764a
AB
667 */
668static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
52c6e6f9
AB
669 unsigned long long sqnum, int free, int dirty,
670 int jhead)
1e51764a
AB
671{
672 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
673 struct replay_entry *r;
674
675 dbg_mnt("add ref LEB %d:%d", lnum, offs);
676 while (*p) {
677 parent = *p;
678 r = rb_entry(parent, struct replay_entry, rb);
679 if (sqnum < r->sqnum) {
680 p = &(*p)->rb_left;
681 continue;
682 } else if (sqnum > r->sqnum) {
683 p = &(*p)->rb_right;
684 continue;
685 }
686 ubifs_err("duplicate sqnum in replay tree");
687 return -EINVAL;
688 }
689
690 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
691 if (!r)
692 return -ENOMEM;
693
694 r->lnum = lnum;
695 r->offs = offs;
696 r->sqnum = sqnum;
697 r->flags = REPLAY_REF;
698 r->free = free;
699 r->dirty = dirty;
52c6e6f9 700 r->jhead = jhead;
1e51764a
AB
701
702 rb_link_node(&r->rb, parent, p);
703 rb_insert_color(&r->rb, &c->replay_tree);
704 return 0;
705}
706
707/**
708 * replay_buds - replay all buds.
709 * @c: UBIFS file-system description object
710 *
711 * This function returns zero in case of success and a negative error code in
712 * case of failure.
713 */
714static int replay_buds(struct ubifs_info *c)
715{
716 struct bud_entry *b;
717 int err, uninitialized_var(free), uninitialized_var(dirty);
7703f09d 718 unsigned long long prev_sqnum = 0;
1e51764a
AB
719
720 list_for_each_entry(b, &c->replay_buds, list) {
721 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
722 &free, &dirty);
723 if (err)
724 return err;
af1dd412
AB
725 b->free = free;
726 b->dirty = dirty;
1e51764a 727 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
52c6e6f9 728 free, dirty, b->bud->jhead);
1e51764a
AB
729 if (err)
730 return err;
7703f09d
AB
731
732 ubifs_assert(b->sqnum > prev_sqnum);
733 prev_sqnum = b->sqnum;
1e51764a
AB
734 }
735
736 return 0;
737}
738
739/**
740 * destroy_bud_list - destroy the list of buds to replay.
741 * @c: UBIFS file-system description object
742 */
743static void destroy_bud_list(struct ubifs_info *c)
744{
745 struct bud_entry *b;
746
747 while (!list_empty(&c->replay_buds)) {
748 b = list_entry(c->replay_buds.next, struct bud_entry, list);
749 list_del(&b->list);
750 kfree(b);
751 }
752}
753
754/**
755 * add_replay_bud - add a bud to the list of buds to replay.
756 * @c: UBIFS file-system description object
757 * @lnum: bud logical eraseblock number to replay
758 * @offs: bud start offset
759 * @jhead: journal head to which this bud belongs
760 * @sqnum: reference node sequence number
761 *
762 * This function returns zero in case of success and a negative error code in
763 * case of failure.
764 */
765static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
766 unsigned long long sqnum)
767{
768 struct ubifs_bud *bud;
769 struct bud_entry *b;
770
771 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
772
773 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
774 if (!bud)
775 return -ENOMEM;
776
777 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
778 if (!b) {
779 kfree(bud);
780 return -ENOMEM;
781 }
782
783 bud->lnum = lnum;
784 bud->start = offs;
785 bud->jhead = jhead;
786 ubifs_add_bud(c, bud);
787
788 b->bud = bud;
789 b->sqnum = sqnum;
790 list_add_tail(&b->list, &c->replay_buds);
791
792 return 0;
793}
794
795/**
796 * validate_ref - validate a reference node.
797 * @c: UBIFS file-system description object
798 * @ref: the reference node to validate
799 * @ref_lnum: LEB number of the reference node
800 * @ref_offs: reference node offset
801 *
802 * This function returns %1 if a bud reference already exists for the LEB. %0 is
803 * returned if the reference node is new, otherwise %-EINVAL is returned if
804 * validation failed.
805 */
806static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
807{
808 struct ubifs_bud *bud;
809 int lnum = le32_to_cpu(ref->lnum);
810 unsigned int offs = le32_to_cpu(ref->offs);
811 unsigned int jhead = le32_to_cpu(ref->jhead);
812
813 /*
814 * ref->offs may point to the end of LEB when the journal head points
815 * to the end of LEB and we write reference node for it during commit.
816 * So this is why we require 'offs > c->leb_size'.
817 */
818 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
819 lnum < c->main_first || offs > c->leb_size ||
820 offs & (c->min_io_size - 1))
821 return -EINVAL;
822
823 /* Make sure we have not already looked at this bud */
824 bud = ubifs_search_bud(c, lnum);
825 if (bud) {
826 if (bud->jhead == jhead && bud->start <= offs)
827 return 1;
828 ubifs_err("bud at LEB %d:%d was already referred", lnum, offs);
829 return -EINVAL;
830 }
831
832 return 0;
833}
834
835/**
836 * replay_log_leb - replay a log logical eraseblock.
837 * @c: UBIFS file-system description object
838 * @lnum: log logical eraseblock to replay
839 * @offs: offset to start replaying from
840 * @sbuf: scan buffer
841 *
842 * This function replays a log LEB and returns zero in case of success, %1 if
843 * this is the last LEB in the log, and a negative error code in case of
844 * failure.
845 */
846static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
847{
848 int err;
849 struct ubifs_scan_leb *sleb;
850 struct ubifs_scan_node *snod;
851 const struct ubifs_cs_node *node;
852
853 dbg_mnt("replay log LEB %d:%d", lnum, offs);
348709ba
AB
854 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
855 if (IS_ERR(sleb)) {
ed43f2f0
AB
856 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
857 return PTR_ERR(sleb);
7d08ae3c
AB
858 /*
859 * Note, the below function will recover this log LEB only if
860 * it is the last, because unclean reboots can possibly corrupt
861 * only the tail of the log.
862 */
ed43f2f0 863 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
1e51764a
AB
864 if (IS_ERR(sleb))
865 return PTR_ERR(sleb);
866 }
867
868 if (sleb->nodes_cnt == 0) {
869 err = 1;
870 goto out;
871 }
872
873 node = sleb->buf;
1e51764a
AB
874 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
875 if (c->cs_sqnum == 0) {
876 /*
877 * This is the first log LEB we are looking at, make sure that
878 * the first node is a commit start node. Also record its
879 * sequence number so that UBIFS can determine where the log
880 * ends, because all nodes which were have higher sequence
881 * numbers.
882 */
883 if (snod->type != UBIFS_CS_NODE) {
884 dbg_err("first log node at LEB %d:%d is not CS node",
885 lnum, offs);
886 goto out_dump;
887 }
888 if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
889 dbg_err("first CS node at LEB %d:%d has wrong "
890 "commit number %llu expected %llu",
891 lnum, offs,
892 (unsigned long long)le64_to_cpu(node->cmt_no),
893 c->cmt_no);
894 goto out_dump;
895 }
896
897 c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
898 dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
899 }
900
901 if (snod->sqnum < c->cs_sqnum) {
902 /*
903 * This means that we reached end of log and now
904 * look to the older log data, which was already
905 * committed but the eraseblock was not erased (UBIFS
6edbfafd 906 * only un-maps it). So this basically means we have to
1e51764a
AB
907 * exit with "end of log" code.
908 */
909 err = 1;
910 goto out;
911 }
912
913 /* Make sure the first node sits at offset zero of the LEB */
914 if (snod->offs != 0) {
915 dbg_err("first node is not at zero offset");
916 goto out_dump;
917 }
918
919 list_for_each_entry(snod, &sleb->nodes, list) {
1e51764a
AB
920 cond_resched();
921
922 if (snod->sqnum >= SQNUM_WATERMARK) {
923 ubifs_err("file system's life ended");
924 goto out_dump;
925 }
926
927 if (snod->sqnum < c->cs_sqnum) {
928 dbg_err("bad sqnum %llu, commit sqnum %llu",
929 snod->sqnum, c->cs_sqnum);
930 goto out_dump;
931 }
932
933 if (snod->sqnum > c->max_sqnum)
934 c->max_sqnum = snod->sqnum;
935
936 switch (snod->type) {
937 case UBIFS_REF_NODE: {
938 const struct ubifs_ref_node *ref = snod->node;
939
940 err = validate_ref(c, ref);
941 if (err == 1)
942 break; /* Already have this bud */
943 if (err)
944 goto out_dump;
945
946 err = add_replay_bud(c, le32_to_cpu(ref->lnum),
947 le32_to_cpu(ref->offs),
948 le32_to_cpu(ref->jhead),
949 snod->sqnum);
950 if (err)
951 goto out;
952
953 break;
954 }
955 case UBIFS_CS_NODE:
956 /* Make sure it sits at the beginning of LEB */
957 if (snod->offs != 0) {
958 ubifs_err("unexpected node in log");
959 goto out_dump;
960 }
961 break;
962 default:
963 ubifs_err("unexpected node in log");
964 goto out_dump;
965 }
966 }
967
968 if (sleb->endpt || c->lhead_offs >= c->leb_size) {
969 c->lhead_lnum = lnum;
970 c->lhead_offs = sleb->endpt;
971 }
972
973 err = !sleb->endpt;
974out:
975 ubifs_scan_destroy(sleb);
976 return err;
977
978out_dump:
681947d2 979 ubifs_err("log error detected while replaying the log at LEB %d:%d",
1e51764a
AB
980 lnum, offs + snod->offs);
981 dbg_dump_node(c, snod->node);
982 ubifs_scan_destroy(sleb);
983 return -EINVAL;
984}
985
986/**
987 * take_ihead - update the status of the index head in lprops to 'taken'.
988 * @c: UBIFS file-system description object
989 *
990 * This function returns the amount of free space in the index head LEB or a
991 * negative error code.
992 */
993static int take_ihead(struct ubifs_info *c)
994{
995 const struct ubifs_lprops *lp;
996 int err, free;
997
998 ubifs_get_lprops(c);
999
1000 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
1001 if (IS_ERR(lp)) {
1002 err = PTR_ERR(lp);
1003 goto out;
1004 }
1005
1006 free = lp->free;
1007
1008 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
1009 lp->flags | LPROPS_TAKEN, 0);
1010 if (IS_ERR(lp)) {
1011 err = PTR_ERR(lp);
1012 goto out;
1013 }
1014
1015 err = free;
1016out:
1017 ubifs_release_lprops(c);
1018 return err;
1019}
1020
1021/**
1022 * ubifs_replay_journal - replay journal.
1023 * @c: UBIFS file-system description object
1024 *
1025 * This function scans the journal, replays and cleans it up. It makes sure all
1026 * memory data structures related to uncommitted journal are built (dirty TNC
1027 * tree, tree of buds, modified lprops, etc).
1028 */
1029int ubifs_replay_journal(struct ubifs_info *c)
1030{
1031 int err, i, lnum, offs, free;
1e51764a
AB
1032
1033 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
1034
1035 /* Update the status of the index head in lprops to 'taken' */
1036 free = take_ihead(c);
1037 if (free < 0)
1038 return free; /* Error code */
1039
1040 if (c->ihead_offs != c->leb_size - free) {
1041 ubifs_err("bad index head LEB %d:%d", c->ihead_lnum,
1042 c->ihead_offs);
1043 return -EINVAL;
1044 }
1045
1e51764a 1046 dbg_mnt("start replaying the journal");
1e51764a 1047 c->replaying = 1;
1e51764a
AB
1048 lnum = c->ltail_lnum = c->lhead_lnum;
1049 offs = c->lhead_offs;
1050
1051 for (i = 0; i < c->log_lebs; i++, lnum++) {
1052 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) {
1053 /*
1054 * The log is logically circular, we reached the last
1055 * LEB, switch to the first one.
1056 */
1057 lnum = UBIFS_LOG_LNUM;
1058 offs = 0;
1059 }
6599fcbd 1060 err = replay_log_leb(c, lnum, offs, c->sbuf);
1e51764a
AB
1061 if (err == 1)
1062 /* We hit the end of the log */
1063 break;
1064 if (err)
1065 goto out;
1066 offs = 0;
1067 }
1068
1069 err = replay_buds(c);
1070 if (err)
1071 goto out;
1072
1073 err = apply_replay_tree(c);
1074 if (err)
1075 goto out;
1076
6edbfafd 1077 /*
b137545c
AB
1078 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
1079 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
6edbfafd
AB
1080 * depend on it. This means we have to initialize it to make sure
1081 * budgeting works properly.
1082 */
b137545c
AB
1083 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1084 c->bi.uncommitted_idx *= c->max_idx_node_sz;
6edbfafd 1085
1e51764a
AB
1086 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1087 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1088 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
e84461ad 1089 (unsigned long)c->highest_inum);
1e51764a
AB
1090out:
1091 destroy_replay_tree(c);
1092 destroy_bud_list(c);
1e51764a
AB
1093 c->replaying = 0;
1094 return err;
1095}