import PULS_20160108
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / ubifs / replay.c
1 /*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23 /*
24 * This file contains journal replay code. It runs when the file-system is being
25 * mounted and requires no locking.
26 *
27 * The larger is the journal, the longer it takes to scan it, so the longer it
28 * takes to mount UBIFS. This is why the journal has limited size which may be
29 * changed depending on the system requirements. But a larger journal gives
30 * faster I/O speed because it writes the index less frequently. So this is a
31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
32 * larger is the journal, the more memory its index may consume.
33 */
34
35 #include "ubifs.h"
36 #include <linux/list_sort.h>
37
38 /**
39 * struct replay_entry - replay list entry.
40 * @lnum: logical eraseblock number of the node
41 * @offs: node offset
42 * @len: node length
43 * @deletion: non-zero if this entry corresponds to a node deletion
44 * @sqnum: node sequence number
45 * @list: links the replay list
46 * @key: node key
47 * @nm: directory entry name
48 * @old_size: truncation old size
49 * @new_size: truncation new size
50 *
51 * The replay process first scans all buds and builds the replay list, then
52 * sorts the replay list in nodes sequence number order, and then inserts all
53 * the replay entries to the TNC.
54 */
55 struct replay_entry {
56 int lnum;
57 int offs;
58 int len;
59 unsigned int deletion:1;
60 unsigned long long sqnum;
61 struct list_head list;
62 union ubifs_key key;
63 union {
64 struct qstr nm;
65 struct {
66 loff_t old_size;
67 loff_t new_size;
68 };
69 };
70 };
71
72 /**
73 * struct bud_entry - entry in the list of buds to replay.
74 * @list: next bud in the list
75 * @bud: bud description object
76 * @sqnum: reference node sequence number
77 * @free: free bytes in the bud
78 * @dirty: dirty bytes in the bud
79 */
80 struct bud_entry {
81 struct list_head list;
82 struct ubifs_bud *bud;
83 unsigned long long sqnum;
84 int free;
85 int dirty;
86 };
87
88 /**
89 * set_bud_lprops - set free and dirty space used by a bud.
90 * @c: UBIFS file-system description object
91 * @b: bud entry which describes the bud
92 *
93 * This function makes sure the LEB properties of bud @b are set correctly
94 * after the replay. Returns zero in case of success and a negative error code
95 * in case of failure.
96 */
97 static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
98 {
99 const struct ubifs_lprops *lp;
100 int err = 0, dirty;
101
102 ubifs_get_lprops(c);
103
104 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
105 if (IS_ERR(lp)) {
106 err = PTR_ERR(lp);
107 goto out;
108 }
109
110 dirty = lp->dirty;
111 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
112 /*
113 * The LEB was added to the journal with a starting offset of
114 * zero which means the LEB must have been empty. The LEB
115 * property values should be @lp->free == @c->leb_size and
116 * @lp->dirty == 0, but that is not the case. The reason is that
117 * the LEB had been garbage collected before it became the bud,
118 * and there was not commit inbetween. The garbage collector
119 * resets the free and dirty space without recording it
120 * anywhere except lprops, so if there was no commit then
121 * lprops does not have that information.
122 *
123 * We do not need to adjust free space because the scan has told
124 * us the exact value which is recorded in the replay entry as
125 * @b->free.
126 *
127 * However we do need to subtract from the dirty space the
128 * amount of space that the garbage collector reclaimed, which
129 * is the whole LEB minus the amount of space that was free.
130 */
131 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
132 lp->free, lp->dirty);
133 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
134 lp->free, lp->dirty);
135 dirty -= c->leb_size - lp->free;
136 /*
137 * If the replay order was perfect the dirty space would now be
138 * zero. The order is not perfect because the journal heads
139 * race with each other. This is not a problem but is does mean
140 * that the dirty space may temporarily exceed c->leb_size
141 * during the replay.
142 */
143 if (dirty != 0)
144 dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty",
145 b->bud->lnum, lp->free, lp->dirty, b->free,
146 b->dirty);
147 }
148 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
149 lp->flags | LPROPS_TAKEN, 0);
150 if (IS_ERR(lp)) {
151 err = PTR_ERR(lp);
152 goto out;
153 }
154
155 //MTK start
156 if (c->need_recovery && b->free > 0) {
157 err = ubifs_leb_read(c, b->bud->lnum, c->sbuf, 0, c->leb_size - b->free, 0);
158 if(err) {
159 switch(err) {
160 case -EUCLEAN:
161 /* shouldn't see this error, UBI will take care this*/
162 goto skip_move;
163 case -EBADMSG:
164 /* copy if error == -EBADMSG */
165 break;
166 default:
167 ubifs_err("cannot read %d bytes from LEB %d:%d,"
168 " error %d", c->leb_size - b->free, b->bud->lnum, 0, err);
169 goto out;
170 }
171 }
172 dbg_mnt("ubifs_leb_change LEB %d:%d\n", b->bud->lnum, c->leb_size - b->free);
173 err = ubifs_leb_change(c, b->bud->lnum, c->sbuf, c->leb_size - b->free);
174 if(err) {
175 ubifs_err("ubifs_leb_change LEB %d:%d fail\n", b->bud->lnum, c->leb_size - b->free);
176 }
177 }
178 skip_move:
179 //MTK end
180 /* Make sure the journal head points to the latest bud */
181 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
182 b->bud->lnum, c->leb_size - b->free);
183
184 out:
185 ubifs_release_lprops(c);
186 return err;
187 }
188
189 /**
190 * set_buds_lprops - set free and dirty space for all replayed buds.
191 * @c: UBIFS file-system description object
192 *
193 * This function sets LEB properties for all replayed buds. Returns zero in
194 * case of success and a negative error code in case of failure.
195 */
196 static int set_buds_lprops(struct ubifs_info *c)
197 {
198 struct bud_entry *b;
199 int err;
200
201 list_for_each_entry(b, &c->replay_buds, list) {
202 err = set_bud_lprops(c, b);
203 if (err)
204 return err;
205 }
206
207 return 0;
208 }
209
210 /**
211 * trun_remove_range - apply a replay entry for a truncation to the TNC.
212 * @c: UBIFS file-system description object
213 * @r: replay entry of truncation
214 */
215 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
216 {
217 unsigned min_blk, max_blk;
218 union ubifs_key min_key, max_key;
219 ino_t ino;
220
221 min_blk = r->new_size / UBIFS_BLOCK_SIZE;
222 if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
223 min_blk += 1;
224
225 max_blk = r->old_size / UBIFS_BLOCK_SIZE;
226 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
227 max_blk -= 1;
228
229 ino = key_inum(c, &r->key);
230
231 data_key_init(c, &min_key, ino, min_blk);
232 data_key_init(c, &max_key, ino, max_blk);
233
234 return ubifs_tnc_remove_range(c, &min_key, &max_key);
235 }
236
237 /**
238 * apply_replay_entry - apply a replay entry to the TNC.
239 * @c: UBIFS file-system description object
240 * @r: replay entry to apply
241 *
242 * Apply a replay entry to the TNC.
243 */
244 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
245 {
246 int err;
247
248 dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ",
249 r->lnum, r->offs, r->len, r->deletion, r->sqnum);
250
251 /* Set c->replay_sqnum to help deal with dangling branches. */
252 c->replay_sqnum = r->sqnum;
253
254 if (is_hash_key(c, &r->key)) {
255 if (r->deletion)
256 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
257 else
258 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
259 r->len, &r->nm);
260 } else {
261 if (r->deletion)
262 switch (key_type(c, &r->key)) {
263 case UBIFS_INO_KEY:
264 {
265 ino_t inum = key_inum(c, &r->key);
266
267 err = ubifs_tnc_remove_ino(c, inum);
268 break;
269 }
270 case UBIFS_TRUN_KEY:
271 err = trun_remove_range(c, r);
272 break;
273 default:
274 err = ubifs_tnc_remove(c, &r->key);
275 break;
276 }
277 else
278 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
279 r->len);
280 if (err)
281 return err;
282
283 if (c->need_recovery)
284 err = ubifs_recover_size_accum(c, &r->key, r->deletion,
285 r->new_size);
286 }
287
288 return err;
289 }
290
291 /**
292 * replay_entries_cmp - compare 2 replay entries.
293 * @priv: UBIFS file-system description object
294 * @a: first replay entry
295 * @a: second replay entry
296 *
297 * This is a comparios function for 'list_sort()' which compares 2 replay
298 * entries @a and @b by comparing their sequence numer. Returns %1 if @a has
299 * greater sequence number and %-1 otherwise.
300 */
301 static int replay_entries_cmp(void *priv, struct list_head *a,
302 struct list_head *b)
303 {
304 struct replay_entry *ra, *rb;
305
306 cond_resched();
307 if (a == b)
308 return 0;
309
310 ra = list_entry(a, struct replay_entry, list);
311 rb = list_entry(b, struct replay_entry, list);
312 ubifs_assert(ra->sqnum != rb->sqnum);
313 if (ra->sqnum > rb->sqnum)
314 return 1;
315 return -1;
316 }
317
318 /**
319 * apply_replay_list - apply the replay list to the TNC.
320 * @c: UBIFS file-system description object
321 *
322 * Apply all entries in the replay list to the TNC. Returns zero in case of
323 * success and a negative error code in case of failure.
324 */
325 static int apply_replay_list(struct ubifs_info *c)
326 {
327 struct replay_entry *r;
328 int err;
329
330 list_sort(c, &c->replay_list, &replay_entries_cmp);
331
332 list_for_each_entry(r, &c->replay_list, list) {
333 cond_resched();
334
335 err = apply_replay_entry(c, r);
336 if (err)
337 return err;
338 }
339
340 return 0;
341 }
342
343 /**
344 * destroy_replay_list - destroy the replay.
345 * @c: UBIFS file-system description object
346 *
347 * Destroy the replay list.
348 */
349 static void destroy_replay_list(struct ubifs_info *c)
350 {
351 struct replay_entry *r, *tmp;
352
353 list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
354 if (is_hash_key(c, &r->key))
355 kfree(r->nm.name);
356 list_del(&r->list);
357 kfree(r);
358 }
359 }
360
361 /**
362 * insert_node - insert a node to the replay list
363 * @c: UBIFS file-system description object
364 * @lnum: node logical eraseblock number
365 * @offs: node offset
366 * @len: node length
367 * @key: node key
368 * @sqnum: sequence number
369 * @deletion: non-zero if this is a deletion
370 * @used: number of bytes in use in a LEB
371 * @old_size: truncation old size
372 * @new_size: truncation new size
373 *
374 * This function inserts a scanned non-direntry node to the replay list. The
375 * replay list contains @struct replay_entry elements, and we sort this list in
376 * sequence number order before applying it. The replay list is applied at the
377 * very end of the replay process. Since the list is sorted in sequence number
378 * order, the older modifications are applied first. This function returns zero
379 * in case of success and a negative error code in case of failure.
380 */
381 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
382 union ubifs_key *key, unsigned long long sqnum,
383 int deletion, int *used, loff_t old_size,
384 loff_t new_size)
385 {
386 struct replay_entry *r;
387
388 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
389
390 if (key_inum(c, key) >= c->highest_inum)
391 c->highest_inum = key_inum(c, key);
392
393 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
394 if (!r)
395 return -ENOMEM;
396
397 if (!deletion)
398 *used += ALIGN(len, 8);
399 r->lnum = lnum;
400 r->offs = offs;
401 r->len = len;
402 r->deletion = !!deletion;
403 r->sqnum = sqnum;
404 key_copy(c, key, &r->key);
405 r->old_size = old_size;
406 r->new_size = new_size;
407
408 list_add_tail(&r->list, &c->replay_list);
409 return 0;
410 }
411
412 /**
413 * insert_dent - insert a directory entry node into the replay list.
414 * @c: UBIFS file-system description object
415 * @lnum: node logical eraseblock number
416 * @offs: node offset
417 * @len: node length
418 * @key: node key
419 * @name: directory entry name
420 * @nlen: directory entry name length
421 * @sqnum: sequence number
422 * @deletion: non-zero if this is a deletion
423 * @used: number of bytes in use in a LEB
424 *
425 * This function inserts a scanned directory entry node or an extended
426 * attribute entry to the replay list. Returns zero in case of success and a
427 * negative error code in case of failure.
428 */
429 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
430 union ubifs_key *key, const char *name, int nlen,
431 unsigned long long sqnum, int deletion, int *used)
432 {
433 struct replay_entry *r;
434 char *nbuf;
435
436 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
437 if (key_inum(c, key) >= c->highest_inum)
438 c->highest_inum = key_inum(c, key);
439
440 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
441 if (!r)
442 return -ENOMEM;
443
444 nbuf = kmalloc(nlen + 1, GFP_KERNEL);
445 if (!nbuf) {
446 kfree(r);
447 return -ENOMEM;
448 }
449
450 if (!deletion)
451 *used += ALIGN(len, 8);
452 r->lnum = lnum;
453 r->offs = offs;
454 r->len = len;
455 r->deletion = !!deletion;
456 r->sqnum = sqnum;
457 key_copy(c, key, &r->key);
458 r->nm.len = nlen;
459 memcpy(nbuf, name, nlen);
460 nbuf[nlen] = '\0';
461 r->nm.name = nbuf;
462
463 list_add_tail(&r->list, &c->replay_list);
464 return 0;
465 }
466
467 /**
468 * ubifs_validate_entry - validate directory or extended attribute entry node.
469 * @c: UBIFS file-system description object
470 * @dent: the node to validate
471 *
472 * This function validates directory or extended attribute entry node @dent.
473 * Returns zero if the node is all right and a %-EINVAL if not.
474 */
475 int ubifs_validate_entry(struct ubifs_info *c,
476 const struct ubifs_dent_node *dent)
477 {
478 int key_type = key_type_flash(c, dent->key);
479 int nlen = le16_to_cpu(dent->nlen);
480
481 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
482 dent->type >= UBIFS_ITYPES_CNT ||
483 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
484 strnlen(dent->name, nlen) != nlen ||
485 le64_to_cpu(dent->inum) > MAX_INUM) {
486 ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ?
487 "directory entry" : "extended attribute entry");
488 return -EINVAL;
489 }
490
491 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
492 ubifs_err("bad key type %d", key_type);
493 return -EINVAL;
494 }
495
496 return 0;
497 }
498
499 /**
500 * is_last_bud - check if the bud is the last in the journal head.
501 * @c: UBIFS file-system description object
502 * @bud: bud description object
503 *
504 * This function checks if bud @bud is the last bud in its journal head. This
505 * information is then used by 'replay_bud()' to decide whether the bud can
506 * have corruptions or not. Indeed, only last buds can be corrupted by power
507 * cuts. Returns %1 if this is the last bud, and %0 if not.
508 */
509 static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
510 {
511 struct ubifs_jhead *jh = &c->jheads[bud->jhead];
512 struct ubifs_bud *next;
513 uint32_t data;
514 int err;
515
516 if (list_is_last(&bud->list, &jh->buds_list))
517 return 1;
518
519 /*
520 * The following is a quirk to make sure we work correctly with UBIFS
521 * images used with older UBIFS.
522 *
523 * Normally, the last bud will be the last in the journal head's list
524 * of bud. However, there is one exception if the UBIFS image belongs
525 * to older UBIFS. This is fairly unlikely: one would need to use old
526 * UBIFS, then have a power cut exactly at the right point, and then
527 * try to mount this image with new UBIFS.
528 *
529 * The exception is: it is possible to have 2 buds A and B, A goes
530 * before B, and B is the last, bud B is contains no data, and bud A is
531 * corrupted at the end. The reason is that in older versions when the
532 * journal code switched the next bud (from A to B), it first added a
533 * log reference node for the new bud (B), and only after this it
534 * synchronized the write-buffer of current bud (A). But later this was
535 * changed and UBIFS started to always synchronize the write-buffer of
536 * the bud (A) before writing the log reference for the new bud (B).
537 *
538 * But because older UBIFS always synchronized A's write-buffer before
539 * writing to B, we can recognize this exceptional situation but
540 * checking the contents of bud B - if it is empty, then A can be
541 * treated as the last and we can recover it.
542 *
543 * TODO: remove this piece of code in a couple of years (today it is
544 * 16.05.2011).
545 */
546 next = list_entry(bud->list.next, struct ubifs_bud, list);
547 if (!list_is_last(&next->list, &jh->buds_list))
548 return 0;
549
550 err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1);
551 if (err)
552 return 0;
553
554 return data == 0xFFFFFFFF;
555 }
556
557 /**
558 * replay_bud - replay a bud logical eraseblock.
559 * @c: UBIFS file-system description object
560 * @b: bud entry which describes the bud
561 *
562 * This function replays bud @bud, recovers it if needed, and adds all nodes
563 * from this bud to the replay list. Returns zero in case of success and a
564 * negative error code in case of failure.
565 */
566 static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
567 {
568 int is_last = is_last_bud(c, b->bud);
569 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
570 struct ubifs_scan_leb *sleb;
571 struct ubifs_scan_node *snod;
572
573 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
574 lnum, b->bud->jhead, offs, is_last);
575
576 if (c->need_recovery && is_last)
577 /*
578 * Recover only last LEBs in the journal heads, because power
579 * cuts may cause corruptions only in these LEBs, because only
580 * these LEBs could possibly be written to at the power cut
581 * time.
582 */
583 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
584 else
585 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
586 if (IS_ERR(sleb))
587 return PTR_ERR(sleb);
588
589 /*
590 * The bud does not have to start from offset zero - the beginning of
591 * the 'lnum' LEB may contain previously committed data. One of the
592 * things we have to do in replay is to correctly update lprops with
593 * newer information about this LEB.
594 *
595 * At this point lprops thinks that this LEB has 'c->leb_size - offs'
596 * bytes of free space because it only contain information about
597 * committed data.
598 *
599 * But we know that real amount of free space is 'c->leb_size -
600 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
601 * 'sleb->endpt' is used by bud data. We have to correctly calculate
602 * how much of these data are dirty and update lprops with this
603 * information.
604 *
605 * The dirt in that LEB region is comprised of padding nodes, deletion
606 * nodes, truncation nodes and nodes which are obsoleted by subsequent
607 * nodes in this LEB. So instead of calculating clean space, we
608 * calculate used space ('used' variable).
609 */
610
611 list_for_each_entry(snod, &sleb->nodes, list) {
612 int deletion = 0;
613
614 cond_resched();
615
616 if (snod->sqnum >= SQNUM_WATERMARK) {
617 ubifs_err("file system's life ended");
618 goto out_dump;
619 }
620
621 if (snod->sqnum > c->max_sqnum)
622 c->max_sqnum = snod->sqnum;
623
624 switch (snod->type) {
625 case UBIFS_INO_NODE:
626 {
627 struct ubifs_ino_node *ino = snod->node;
628 loff_t new_size = le64_to_cpu(ino->size);
629
630 if (le32_to_cpu(ino->nlink) == 0)
631 deletion = 1;
632 err = insert_node(c, lnum, snod->offs, snod->len,
633 &snod->key, snod->sqnum, deletion,
634 &used, 0, new_size);
635 break;
636 }
637 case UBIFS_DATA_NODE:
638 {
639 struct ubifs_data_node *dn = snod->node;
640 loff_t new_size = le32_to_cpu(dn->size) +
641 key_block(c, &snod->key) *
642 UBIFS_BLOCK_SIZE;
643
644 err = insert_node(c, lnum, snod->offs, snod->len,
645 &snod->key, snod->sqnum, deletion,
646 &used, 0, new_size);
647 break;
648 }
649 case UBIFS_DENT_NODE:
650 case UBIFS_XENT_NODE:
651 {
652 struct ubifs_dent_node *dent = snod->node;
653
654 err = ubifs_validate_entry(c, dent);
655 if (err)
656 goto out_dump;
657
658 err = insert_dent(c, lnum, snod->offs, snod->len,
659 &snod->key, dent->name,
660 le16_to_cpu(dent->nlen), snod->sqnum,
661 !le64_to_cpu(dent->inum), &used);
662 break;
663 }
664 case UBIFS_TRUN_NODE:
665 {
666 struct ubifs_trun_node *trun = snod->node;
667 loff_t old_size = le64_to_cpu(trun->old_size);
668 loff_t new_size = le64_to_cpu(trun->new_size);
669 union ubifs_key key;
670
671 /* Validate truncation node */
672 if (old_size < 0 || old_size > c->max_inode_sz ||
673 new_size < 0 || new_size > c->max_inode_sz ||
674 old_size <= new_size) {
675 ubifs_err("bad truncation node");
676 goto out_dump;
677 }
678
679 /*
680 * Create a fake truncation key just to use the same
681 * functions which expect nodes to have keys.
682 */
683 trun_key_init(c, &key, le32_to_cpu(trun->inum));
684 err = insert_node(c, lnum, snod->offs, snod->len,
685 &key, snod->sqnum, 1, &used,
686 old_size, new_size);
687 break;
688 }
689 default:
690 ubifs_err("unexpected node type %d in bud LEB %d:%d",
691 snod->type, lnum, snod->offs);
692 err = -EINVAL;
693 goto out_dump;
694 }
695 if (err)
696 goto out;
697 }
698
699 ubifs_assert(ubifs_search_bud(c, lnum));
700 ubifs_assert(sleb->endpt - offs >= used);
701 ubifs_assert(sleb->endpt % c->min_io_size == 0);
702
703 b->dirty = sleb->endpt - offs - used;
704 b->free = c->leb_size - sleb->endpt;
705 dbg_mnt("bud LEB %d replied: dirty %d, free %d",
706 lnum, b->dirty, b->free);
707
708 out:
709 ubifs_scan_destroy(sleb);
710 return err;
711
712 out_dump:
713 ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
714 ubifs_dump_node(c, snod->node);
715 ubifs_scan_destroy(sleb);
716 return -EINVAL;
717 }
718
719 /**
720 * replay_buds - replay all buds.
721 * @c: UBIFS file-system description object
722 *
723 * This function returns zero in case of success and a negative error code in
724 * case of failure.
725 */
726 static int replay_buds(struct ubifs_info *c)
727 {
728 struct bud_entry *b;
729 int err;
730 unsigned long long prev_sqnum = 0;
731
732 list_for_each_entry(b, &c->replay_buds, list) {
733 err = replay_bud(c, b);
734 if (err)
735 return err;
736
737 ubifs_assert(b->sqnum > prev_sqnum);
738 prev_sqnum = b->sqnum;
739 }
740
741 return 0;
742 }
743
744 /**
745 * destroy_bud_list - destroy the list of buds to replay.
746 * @c: UBIFS file-system description object
747 */
748 static void destroy_bud_list(struct ubifs_info *c)
749 {
750 struct bud_entry *b;
751
752 while (!list_empty(&c->replay_buds)) {
753 b = list_entry(c->replay_buds.next, struct bud_entry, list);
754 list_del(&b->list);
755 kfree(b);
756 }
757 }
758
759 /**
760 * add_replay_bud - add a bud to the list of buds to replay.
761 * @c: UBIFS file-system description object
762 * @lnum: bud logical eraseblock number to replay
763 * @offs: bud start offset
764 * @jhead: journal head to which this bud belongs
765 * @sqnum: reference node sequence number
766 *
767 * This function returns zero in case of success and a negative error code in
768 * case of failure.
769 */
770 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
771 unsigned long long sqnum)
772 {
773 struct ubifs_bud *bud;
774 struct bud_entry *b;
775
776 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
777
778 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
779 if (!bud)
780 return -ENOMEM;
781
782 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
783 if (!b) {
784 kfree(bud);
785 return -ENOMEM;
786 }
787
788 bud->lnum = lnum;
789 bud->start = offs;
790 bud->jhead = jhead;
791 ubifs_add_bud(c, bud);
792
793 b->bud = bud;
794 b->sqnum = sqnum;
795 list_add_tail(&b->list, &c->replay_buds);
796
797 return 0;
798 }
799
800 /**
801 * validate_ref - validate a reference node.
802 * @c: UBIFS file-system description object
803 * @ref: the reference node to validate
804 * @ref_lnum: LEB number of the reference node
805 * @ref_offs: reference node offset
806 *
807 * This function returns %1 if a bud reference already exists for the LEB. %0 is
808 * returned if the reference node is new, otherwise %-EINVAL is returned if
809 * validation failed.
810 */
811 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
812 {
813 struct ubifs_bud *bud;
814 int lnum = le32_to_cpu(ref->lnum);
815 unsigned int offs = le32_to_cpu(ref->offs);
816 unsigned int jhead = le32_to_cpu(ref->jhead);
817
818 /*
819 * ref->offs may point to the end of LEB when the journal head points
820 * to the end of LEB and we write reference node for it during commit.
821 * So this is why we require 'offs > c->leb_size'.
822 */
823 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
824 lnum < c->main_first || offs > c->leb_size ||
825 offs & (c->min_io_size - 1))
826 return -EINVAL;
827
828 /* Make sure we have not already looked at this bud */
829 bud = ubifs_search_bud(c, lnum);
830 if (bud) {
831 if (bud->jhead == jhead && bud->start <= offs)
832 return 1;
833 ubifs_err("bud at LEB %d:%d was already referred", lnum, offs);
834 return -EINVAL;
835 }
836
837 return 0;
838 }
839
840 /**
841 * replay_log_leb - replay a log logical eraseblock.
842 * @c: UBIFS file-system description object
843 * @lnum: log logical eraseblock to replay
844 * @offs: offset to start replaying from
845 * @sbuf: scan buffer
846 *
847 * This function replays a log LEB and returns zero in case of success, %1 if
848 * this is the last LEB in the log, and a negative error code in case of
849 * failure.
850 */
851 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
852 {
853 int err;
854 struct ubifs_scan_leb *sleb;
855 struct ubifs_scan_node *snod;
856 const struct ubifs_cs_node *node;
857
858 dbg_mnt("replay log LEB %d:%d", lnum, offs);
859 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
860 if (IS_ERR(sleb)) {
861 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
862 return PTR_ERR(sleb);
863 /*
864 * Note, the below function will recover this log LEB only if
865 * it is the last, because unclean reboots can possibly corrupt
866 * only the tail of the log.
867 */
868 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
869 if (IS_ERR(sleb))
870 return PTR_ERR(sleb);
871 }
872
873 if (sleb->nodes_cnt == 0) {
874 err = 1;
875 goto out;
876 }
877
878 #ifdef CONFIG_UBIFS_FS_FULL_USE_LOG_BACKWARD
879 /* Search the last cs node whose cmt_no is recorded in master node*/
880 list_for_each_entry_reverse(snod, &sleb->nodes, list) {
881 if(snod->type == UBIFS_CS_NODE)
882 {
883 if(c->cs_sqnum == 0)
884 {
885 node = snod->node;
886 if(le64_to_cpu(node->cmt_no) == c->cmt_no)
887 break;
888 }
889 else
890 break;
891 }
892 }
893 node = snod->node;
894 #else
895 node = sleb->buf;
896 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
897 #endif
898 if (c->cs_sqnum == 0) {
899 /*
900 * This is the first log LEB we are looking at, make sure that
901 * the first node is a commit start node. Also record its
902 * sequence number so that UBIFS can determine where the log
903 * ends, because all nodes which were have higher sequence
904 * numbers.
905 */
906 if (snod->type != UBIFS_CS_NODE) {
907 ubifs_err("first log node at LEB %d:%d is not CS node",
908 lnum, offs);
909 goto out_dump;
910 }
911 if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
912 ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
913 lnum, offs,
914 (unsigned long long)le64_to_cpu(node->cmt_no),
915 c->cmt_no);
916 goto out_dump;
917 }
918
919 c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
920 dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
921 }
922
923 if (snod->sqnum < c->cs_sqnum) {
924 /*
925 * This means that we reached end of log and now
926 * look to the older log data, which was already
927 * committed but the eraseblock was not erased (UBIFS
928 * only un-maps it). So this basically means we have to
929 * exit with "end of log" code.
930 */
931 err = 1;
932 goto out;
933 }
934
935 #ifndef CONFIG_UBIFS_FS_FULL_USE_LOG_BACKWARD
936 /* Make sure the first node sits at offset zero of the LEB */
937 if (snod->offs != 0) {
938 ubifs_err("first node is not at zero offset");
939 goto out_dump;
940 }
941 #endif
942
943 list_for_each_entry(snod, &sleb->nodes, list) {
944 cond_resched();
945
946 if (snod->sqnum >= SQNUM_WATERMARK) {
947 ubifs_err("file system's life ended");
948 goto out_dump;
949 }
950
951 if (snod->sqnum < c->cs_sqnum) {
952 #ifdef CONFIG_UBIFS_FS_FULL_USE_LOG_BACKWARD
953 /*
954 * Node with sqnum less than that of current cs already handled
955 * skip current snod from adding.
956 */
957 continue;
958 #else
959 ubifs_err("bad sqnum %llu, commit sqnum %llu",
960 snod->sqnum, c->cs_sqnum);
961 goto out_dump;
962 #endif
963 }
964
965 if (snod->sqnum > c->max_sqnum)
966 c->max_sqnum = snod->sqnum;
967
968 switch (snod->type) {
969 case UBIFS_REF_NODE: {
970 const struct ubifs_ref_node *ref = snod->node;
971
972 err = validate_ref(c, ref);
973 if (err == 1)
974 break; /* Already have this bud */
975 if (err)
976 goto out_dump;
977
978 err = add_replay_bud(c, le32_to_cpu(ref->lnum),
979 le32_to_cpu(ref->offs),
980 le32_to_cpu(ref->jhead),
981 snod->sqnum);
982 if (err)
983 goto out;
984
985 break;
986 }
987 case UBIFS_CS_NODE:
988 #ifdef CONFIG_UBIFS_FS_FULL_USE_LOG_BACKWARD
989 continue;
990 #else
991 /* Make sure it sits at the beginning of LEB */
992 if (snod->offs != 0) {
993 ubifs_err("unexpected node in log");
994 goto out_dump;
995 }
996 #endif
997 break;
998 default:
999 ubifs_err("unexpected node in log");
1000 goto out_dump;
1001 }
1002 }
1003
1004 if (sleb->endpt || c->lhead_offs >= c->leb_size) {
1005 c->lhead_lnum = lnum;
1006 c->lhead_offs = sleb->endpt;
1007 }
1008
1009 err = !sleb->endpt;
1010 out:
1011 ubifs_scan_destroy(sleb);
1012 return err;
1013
1014 out_dump:
1015 ubifs_err("log error detected while replaying the log at LEB %d:%d",
1016 lnum, offs + snod->offs);
1017 ubifs_dump_node(c, snod->node);
1018 ubifs_scan_destroy(sleb);
1019 return -EINVAL;
1020 }
1021
1022 /**
1023 * take_ihead - update the status of the index head in lprops to 'taken'.
1024 * @c: UBIFS file-system description object
1025 *
1026 * This function returns the amount of free space in the index head LEB or a
1027 * negative error code.
1028 */
1029 static int take_ihead(struct ubifs_info *c)
1030 {
1031 const struct ubifs_lprops *lp;
1032 int err, free;
1033
1034 ubifs_get_lprops(c);
1035
1036 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
1037 if (IS_ERR(lp)) {
1038 err = PTR_ERR(lp);
1039 goto out;
1040 }
1041
1042 free = lp->free;
1043
1044 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
1045 lp->flags | LPROPS_TAKEN, 0);
1046 if (IS_ERR(lp)) {
1047 err = PTR_ERR(lp);
1048 goto out;
1049 }
1050
1051 err = free;
1052 out:
1053 ubifs_release_lprops(c);
1054 return err;
1055 }
1056
1057 /**
1058 * ubifs_replay_journal - replay journal.
1059 * @c: UBIFS file-system description object
1060 *
1061 * This function scans the journal, replays and cleans it up. It makes sure all
1062 * memory data structures related to uncommitted journal are built (dirty TNC
1063 * tree, tree of buds, modified lprops, etc).
1064 */
1065 int ubifs_replay_journal(struct ubifs_info *c)
1066 {
1067 int err, lnum, free;
1068
1069 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
1070
1071 /* Update the status of the index head in lprops to 'taken' */
1072 free = take_ihead(c);
1073 if (free < 0)
1074 return free; /* Error code */
1075
1076 if (c->ihead_offs != c->leb_size - free) {
1077 ubifs_err("bad index head LEB %d:%d", c->ihead_lnum,
1078 c->ihead_offs);
1079 return -EINVAL;
1080 }
1081
1082 dbg_mnt("start replaying the journal");
1083 c->replaying = 1;
1084 lnum = c->ltail_lnum = c->lhead_lnum;
1085
1086 do {
1087 err = replay_log_leb(c, lnum, 0, c->sbuf);
1088 if (err == 1)
1089 /* We hit the end of the log */
1090 break;
1091 if (err)
1092 goto out;
1093 lnum = ubifs_next_log_lnum(c, lnum);
1094 } while (lnum != c->ltail_lnum);
1095
1096 err = replay_buds(c);
1097 if (err)
1098 goto out;
1099
1100 err = apply_replay_list(c);
1101 if (err)
1102 goto out;
1103
1104 err = set_buds_lprops(c);
1105 if (err)
1106 goto out;
1107
1108 /*
1109 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
1110 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
1111 * depend on it. This means we have to initialize it to make sure
1112 * budgeting works properly.
1113 */
1114 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1115 c->bi.uncommitted_idx *= c->max_idx_node_sz;
1116
1117 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1118 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu",
1119 c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1120 (unsigned long)c->highest_inum);
1121 out:
1122 destroy_replay_list(c);
1123 destroy_bud_list(c);
1124 c->replaying = 0;
1125 return err;
1126 }