Btrfs: set ioprio of scrub readahead to idle
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / btrfs / check-integrity.c
CommitLineData
5db02760
SB
1/*
2 * Copyright (C) STRATO AG 2011. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19/*
20 * This module can be used to catch cases when the btrfs kernel
21 * code executes write requests to the disk that bring the file
22 * system in an inconsistent state. In such a state, a power-loss
23 * or kernel panic event would cause that the data on disk is
24 * lost or at least damaged.
25 *
26 * Code is added that examines all block write requests during
27 * runtime (including writes of the super block). Three rules
28 * are verified and an error is printed on violation of the
29 * rules:
30 * 1. It is not allowed to write a disk block which is
31 * currently referenced by the super block (either directly
32 * or indirectly).
33 * 2. When a super block is written, it is verified that all
34 * referenced (directly or indirectly) blocks fulfill the
35 * following requirements:
36 * 2a. All referenced blocks have either been present when
37 * the file system was mounted, (i.e., they have been
38 * referenced by the super block) or they have been
39 * written since then and the write completion callback
40 * was called and a FLUSH request to the device where
41 * these blocks are located was received and completed.
42 * 2b. All referenced blocks need to have a generation
43 * number which is equal to the parent's number.
44 *
45 * One issue that was found using this module was that the log
46 * tree on disk became temporarily corrupted because disk blocks
47 * that had been in use for the log tree had been freed and
48 * reused too early, while being referenced by the written super
49 * block.
50 *
51 * The search term in the kernel log that can be used to filter
52 * on the existence of detected integrity issues is
53 * "btrfs: attempt".
54 *
55 * The integrity check is enabled via mount options. These
56 * mount options are only supported if the integrity check
57 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
58 *
59 * Example #1, apply integrity checks to all metadata:
60 * mount /dev/sdb1 /mnt -o check_int
61 *
62 * Example #2, apply integrity checks to all metadata and
63 * to data extents:
64 * mount /dev/sdb1 /mnt -o check_int_data
65 *
66 * Example #3, apply integrity checks to all metadata and dump
67 * the tree that the super block references to kernel messages
68 * each time after a super block was written:
69 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
70 *
71 * If the integrity check tool is included and activated in
72 * the mount options, plenty of kernel memory is used, and
73 * plenty of additional CPU cycles are spent. Enabling this
74 * functionality is not intended for normal use. In most
75 * cases, unless you are a btrfs developer who needs to verify
76 * the integrity of (super)-block write requests, do not
77 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
78 * include and compile the integrity check tool.
79 */
80
81#include <linux/sched.h>
82#include <linux/slab.h>
83#include <linux/buffer_head.h>
84#include <linux/mutex.h>
85#include <linux/crc32c.h>
86#include <linux/genhd.h>
87#include <linux/blkdev.h>
88#include "ctree.h"
89#include "disk-io.h"
90#include "transaction.h"
91#include "extent_io.h"
5db02760
SB
92#include "volumes.h"
93#include "print-tree.h"
94#include "locking.h"
95#include "check-integrity.h"
96
97#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
98#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
99#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
100#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
101#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
102#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
103#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
104#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
105 * excluding " [...]" */
5db02760
SB
106#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
107
108/*
109 * The definition of the bitmask fields for the print_mask.
110 * They are specified with the mount option check_integrity_print_mask.
111 */
112#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001
113#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002
114#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004
115#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008
116#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010
117#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020
118#define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040
119#define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080
120#define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100
121#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200
122#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
123#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
124#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
125
126struct btrfsic_dev_state;
127struct btrfsic_state;
128
129struct btrfsic_block {
130 u32 magic_num; /* only used for debug purposes */
131 unsigned int is_metadata:1; /* if it is meta-data, not data-data */
132 unsigned int is_superblock:1; /* if it is one of the superblocks */
133 unsigned int is_iodone:1; /* if is done by lower subsystem */
134 unsigned int iodone_w_error:1; /* error was indicated to endio */
135 unsigned int never_written:1; /* block was added because it was
136 * referenced, not because it was
137 * written */
138 unsigned int mirror_num:2; /* large enough to hold
139 * BTRFS_SUPER_MIRROR_MAX */
140 struct btrfsic_dev_state *dev_state;
141 u64 dev_bytenr; /* key, physical byte num on disk */
142 u64 logical_bytenr; /* logical byte num on disk */
143 u64 generation;
144 struct btrfs_disk_key disk_key; /* extra info to print in case of
145 * issues, will not always be correct */
146 struct list_head collision_resolving_node; /* list node */
147 struct list_head all_blocks_node; /* list node */
148
149 /* the following two lists contain block_link items */
150 struct list_head ref_to_list; /* list */
151 struct list_head ref_from_list; /* list */
152 struct btrfsic_block *next_in_same_bio;
153 void *orig_bio_bh_private;
154 union {
155 bio_end_io_t *bio;
156 bh_end_io_t *bh;
157 } orig_bio_bh_end_io;
158 int submit_bio_bh_rw;
159 u64 flush_gen; /* only valid if !never_written */
160};
161
162/*
163 * Elements of this type are allocated dynamically and required because
164 * each block object can refer to and can be ref from multiple blocks.
165 * The key to lookup them in the hashtable is the dev_bytenr of
166 * the block ref to plus the one from the block refered from.
167 * The fact that they are searchable via a hashtable and that a
168 * ref_cnt is maintained is not required for the btrfs integrity
169 * check algorithm itself, it is only used to make the output more
170 * beautiful in case that an error is detected (an error is defined
171 * as a write operation to a block while that block is still referenced).
172 */
173struct btrfsic_block_link {
174 u32 magic_num; /* only used for debug purposes */
175 u32 ref_cnt;
176 struct list_head node_ref_to; /* list node */
177 struct list_head node_ref_from; /* list node */
178 struct list_head collision_resolving_node; /* list node */
179 struct btrfsic_block *block_ref_to;
180 struct btrfsic_block *block_ref_from;
181 u64 parent_generation;
182};
183
184struct btrfsic_dev_state {
185 u32 magic_num; /* only used for debug purposes */
186 struct block_device *bdev;
187 struct btrfsic_state *state;
188 struct list_head collision_resolving_node; /* list node */
189 struct btrfsic_block dummy_block_for_bio_bh_flush;
190 u64 last_flush_gen;
191 char name[BDEVNAME_SIZE];
192};
193
194struct btrfsic_block_hashtable {
195 struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
196};
197
198struct btrfsic_block_link_hashtable {
199 struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
200};
201
202struct btrfsic_dev_state_hashtable {
203 struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
204};
205
206struct btrfsic_block_data_ctx {
207 u64 start; /* virtual bytenr */
208 u64 dev_bytenr; /* physical bytenr on device */
209 u32 len;
210 struct btrfsic_dev_state *dev;
e06baab4
SB
211 char **datav;
212 struct page **pagev;
213 void *mem_to_free;
5db02760
SB
214};
215
216/* This structure is used to implement recursion without occupying
217 * any stack space, refer to btrfsic_process_metablock() */
218struct btrfsic_stack_frame {
219 u32 magic;
220 u32 nr;
221 int error;
222 int i;
223 int limit_nesting;
224 int num_copies;
225 int mirror_num;
226 struct btrfsic_block *block;
227 struct btrfsic_block_data_ctx *block_ctx;
228 struct btrfsic_block *next_block;
229 struct btrfsic_block_data_ctx next_block_ctx;
230 struct btrfs_header *hdr;
231 struct btrfsic_stack_frame *prev;
232};
233
234/* Some state per mounted filesystem */
235struct btrfsic_state {
236 u32 print_mask;
237 int include_extent_data;
238 int csum_size;
239 struct list_head all_blocks_list;
240 struct btrfsic_block_hashtable block_hashtable;
241 struct btrfsic_block_link_hashtable block_link_hashtable;
242 struct btrfs_root *root;
243 u64 max_superblock_generation;
244 struct btrfsic_block *latest_superblock;
e06baab4
SB
245 u32 metablock_size;
246 u32 datablock_size;
5db02760
SB
247};
248
249static void btrfsic_block_init(struct btrfsic_block *b);
250static struct btrfsic_block *btrfsic_block_alloc(void);
251static void btrfsic_block_free(struct btrfsic_block *b);
252static void btrfsic_block_link_init(struct btrfsic_block_link *n);
253static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
254static void btrfsic_block_link_free(struct btrfsic_block_link *n);
255static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
256static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
257static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
258static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
259static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
260 struct btrfsic_block_hashtable *h);
261static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
262static struct btrfsic_block *btrfsic_block_hashtable_lookup(
263 struct block_device *bdev,
264 u64 dev_bytenr,
265 struct btrfsic_block_hashtable *h);
266static void btrfsic_block_link_hashtable_init(
267 struct btrfsic_block_link_hashtable *h);
268static void btrfsic_block_link_hashtable_add(
269 struct btrfsic_block_link *l,
270 struct btrfsic_block_link_hashtable *h);
271static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
272static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
273 struct block_device *bdev_ref_to,
274 u64 dev_bytenr_ref_to,
275 struct block_device *bdev_ref_from,
276 u64 dev_bytenr_ref_from,
277 struct btrfsic_block_link_hashtable *h);
278static void btrfsic_dev_state_hashtable_init(
279 struct btrfsic_dev_state_hashtable *h);
280static void btrfsic_dev_state_hashtable_add(
281 struct btrfsic_dev_state *ds,
282 struct btrfsic_dev_state_hashtable *h);
283static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
284static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
285 struct block_device *bdev,
286 struct btrfsic_dev_state_hashtable *h);
287static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
288static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
289static int btrfsic_process_superblock(struct btrfsic_state *state,
290 struct btrfs_fs_devices *fs_devices);
291static int btrfsic_process_metablock(struct btrfsic_state *state,
292 struct btrfsic_block *block,
293 struct btrfsic_block_data_ctx *block_ctx,
5db02760 294 int limit_nesting, int force_iodone_flag);
e06baab4
SB
295static void btrfsic_read_from_block_data(
296 struct btrfsic_block_data_ctx *block_ctx,
297 void *dst, u32 offset, size_t len);
5db02760
SB
298static int btrfsic_create_link_to_next_block(
299 struct btrfsic_state *state,
300 struct btrfsic_block *block,
301 struct btrfsic_block_data_ctx
302 *block_ctx, u64 next_bytenr,
303 int limit_nesting,
304 struct btrfsic_block_data_ctx *next_block_ctx,
305 struct btrfsic_block **next_blockp,
306 int force_iodone_flag,
307 int *num_copiesp, int *mirror_nump,
308 struct btrfs_disk_key *disk_key,
309 u64 parent_generation);
310static int btrfsic_handle_extent_data(struct btrfsic_state *state,
311 struct btrfsic_block *block,
312 struct btrfsic_block_data_ctx *block_ctx,
313 u32 item_offset, int force_iodone_flag);
314static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
315 struct btrfsic_block_data_ctx *block_ctx_out,
316 int mirror_num);
317static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
318 u32 len, struct block_device *bdev,
319 struct btrfsic_block_data_ctx *block_ctx_out);
320static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
321static int btrfsic_read_block(struct btrfsic_state *state,
322 struct btrfsic_block_data_ctx *block_ctx);
323static void btrfsic_dump_database(struct btrfsic_state *state);
e06baab4 324static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
5db02760 325static int btrfsic_test_for_metadata(struct btrfsic_state *state,
e06baab4 326 char **datav, unsigned int num_pages);
5db02760 327static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
e06baab4
SB
328 u64 dev_bytenr, char **mapped_datav,
329 unsigned int num_pages,
330 struct bio *bio, int *bio_is_patched,
5db02760
SB
331 struct buffer_head *bh,
332 int submit_bio_bh_rw);
333static int btrfsic_process_written_superblock(
334 struct btrfsic_state *state,
335 struct btrfsic_block *const block,
336 struct btrfs_super_block *const super_hdr);
337static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
338static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
339static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
340 const struct btrfsic_block *block,
341 int recursion_level);
342static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
343 struct btrfsic_block *const block,
344 int recursion_level);
345static void btrfsic_print_add_link(const struct btrfsic_state *state,
346 const struct btrfsic_block_link *l);
347static void btrfsic_print_rem_link(const struct btrfsic_state *state,
348 const struct btrfsic_block_link *l);
349static char btrfsic_get_block_type(const struct btrfsic_state *state,
350 const struct btrfsic_block *block);
351static void btrfsic_dump_tree(const struct btrfsic_state *state);
352static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
353 const struct btrfsic_block *block,
354 int indent_level);
355static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
356 struct btrfsic_state *state,
357 struct btrfsic_block_data_ctx *next_block_ctx,
358 struct btrfsic_block *next_block,
359 struct btrfsic_block *from_block,
360 u64 parent_generation);
361static struct btrfsic_block *btrfsic_block_lookup_or_add(
362 struct btrfsic_state *state,
363 struct btrfsic_block_data_ctx *block_ctx,
364 const char *additional_string,
365 int is_metadata,
366 int is_iodone,
367 int never_written,
368 int mirror_num,
369 int *was_created);
370static int btrfsic_process_superblock_dev_mirror(
371 struct btrfsic_state *state,
372 struct btrfsic_dev_state *dev_state,
373 struct btrfs_device *device,
374 int superblock_mirror_num,
375 struct btrfsic_dev_state **selected_dev_state,
376 struct btrfs_super_block *selected_super);
377static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
378 struct block_device *bdev);
379static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
380 u64 bytenr,
381 struct btrfsic_dev_state *dev_state,
e06baab4 382 u64 dev_bytenr);
5db02760
SB
383
384static struct mutex btrfsic_mutex;
385static int btrfsic_is_initialized;
386static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
387
388
389static void btrfsic_block_init(struct btrfsic_block *b)
390{
391 b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
392 b->dev_state = NULL;
393 b->dev_bytenr = 0;
394 b->logical_bytenr = 0;
395 b->generation = BTRFSIC_GENERATION_UNKNOWN;
396 b->disk_key.objectid = 0;
397 b->disk_key.type = 0;
398 b->disk_key.offset = 0;
399 b->is_metadata = 0;
400 b->is_superblock = 0;
401 b->is_iodone = 0;
402 b->iodone_w_error = 0;
403 b->never_written = 0;
404 b->mirror_num = 0;
405 b->next_in_same_bio = NULL;
406 b->orig_bio_bh_private = NULL;
407 b->orig_bio_bh_end_io.bio = NULL;
408 INIT_LIST_HEAD(&b->collision_resolving_node);
409 INIT_LIST_HEAD(&b->all_blocks_node);
410 INIT_LIST_HEAD(&b->ref_to_list);
411 INIT_LIST_HEAD(&b->ref_from_list);
412 b->submit_bio_bh_rw = 0;
413 b->flush_gen = 0;
414}
415
416static struct btrfsic_block *btrfsic_block_alloc(void)
417{
418 struct btrfsic_block *b;
419
420 b = kzalloc(sizeof(*b), GFP_NOFS);
421 if (NULL != b)
422 btrfsic_block_init(b);
423
424 return b;
425}
426
427static void btrfsic_block_free(struct btrfsic_block *b)
428{
429 BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
430 kfree(b);
431}
432
433static void btrfsic_block_link_init(struct btrfsic_block_link *l)
434{
435 l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
436 l->ref_cnt = 1;
437 INIT_LIST_HEAD(&l->node_ref_to);
438 INIT_LIST_HEAD(&l->node_ref_from);
439 INIT_LIST_HEAD(&l->collision_resolving_node);
440 l->block_ref_to = NULL;
441 l->block_ref_from = NULL;
442}
443
444static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
445{
446 struct btrfsic_block_link *l;
447
448 l = kzalloc(sizeof(*l), GFP_NOFS);
449 if (NULL != l)
450 btrfsic_block_link_init(l);
451
452 return l;
453}
454
455static void btrfsic_block_link_free(struct btrfsic_block_link *l)
456{
457 BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
458 kfree(l);
459}
460
461static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
462{
463 ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
464 ds->bdev = NULL;
465 ds->state = NULL;
466 ds->name[0] = '\0';
467 INIT_LIST_HEAD(&ds->collision_resolving_node);
468 ds->last_flush_gen = 0;
469 btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
470 ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
471 ds->dummy_block_for_bio_bh_flush.dev_state = ds;
472}
473
474static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
475{
476 struct btrfsic_dev_state *ds;
477
478 ds = kzalloc(sizeof(*ds), GFP_NOFS);
479 if (NULL != ds)
480 btrfsic_dev_state_init(ds);
481
482 return ds;
483}
484
485static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
486{
487 BUG_ON(!(NULL == ds ||
488 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
489 kfree(ds);
490}
491
492static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
493{
494 int i;
495
496 for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
497 INIT_LIST_HEAD(h->table + i);
498}
499
500static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
501 struct btrfsic_block_hashtable *h)
502{
503 const unsigned int hashval =
504 (((unsigned int)(b->dev_bytenr >> 16)) ^
505 ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
506 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
507
508 list_add(&b->collision_resolving_node, h->table + hashval);
509}
510
511static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
512{
513 list_del(&b->collision_resolving_node);
514}
515
516static struct btrfsic_block *btrfsic_block_hashtable_lookup(
517 struct block_device *bdev,
518 u64 dev_bytenr,
519 struct btrfsic_block_hashtable *h)
520{
521 const unsigned int hashval =
522 (((unsigned int)(dev_bytenr >> 16)) ^
523 ((unsigned int)((uintptr_t)bdev))) &
524 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
525 struct list_head *elem;
526
527 list_for_each(elem, h->table + hashval) {
528 struct btrfsic_block *const b =
529 list_entry(elem, struct btrfsic_block,
530 collision_resolving_node);
531
532 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
533 return b;
534 }
535
536 return NULL;
537}
538
539static void btrfsic_block_link_hashtable_init(
540 struct btrfsic_block_link_hashtable *h)
541{
542 int i;
543
544 for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
545 INIT_LIST_HEAD(h->table + i);
546}
547
548static void btrfsic_block_link_hashtable_add(
549 struct btrfsic_block_link *l,
550 struct btrfsic_block_link_hashtable *h)
551{
552 const unsigned int hashval =
553 (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
554 ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
555 ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
556 ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
557 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
558
559 BUG_ON(NULL == l->block_ref_to);
560 BUG_ON(NULL == l->block_ref_from);
561 list_add(&l->collision_resolving_node, h->table + hashval);
562}
563
564static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
565{
566 list_del(&l->collision_resolving_node);
567}
568
569static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
570 struct block_device *bdev_ref_to,
571 u64 dev_bytenr_ref_to,
572 struct block_device *bdev_ref_from,
573 u64 dev_bytenr_ref_from,
574 struct btrfsic_block_link_hashtable *h)
575{
576 const unsigned int hashval =
577 (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
578 ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
579 ((unsigned int)((uintptr_t)bdev_ref_to)) ^
580 ((unsigned int)((uintptr_t)bdev_ref_from))) &
581 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
582 struct list_head *elem;
583
584 list_for_each(elem, h->table + hashval) {
585 struct btrfsic_block_link *const l =
586 list_entry(elem, struct btrfsic_block_link,
587 collision_resolving_node);
588
589 BUG_ON(NULL == l->block_ref_to);
590 BUG_ON(NULL == l->block_ref_from);
591 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
592 l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
593 l->block_ref_from->dev_state->bdev == bdev_ref_from &&
594 l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
595 return l;
596 }
597
598 return NULL;
599}
600
601static void btrfsic_dev_state_hashtable_init(
602 struct btrfsic_dev_state_hashtable *h)
603{
604 int i;
605
606 for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
607 INIT_LIST_HEAD(h->table + i);
608}
609
610static void btrfsic_dev_state_hashtable_add(
611 struct btrfsic_dev_state *ds,
612 struct btrfsic_dev_state_hashtable *h)
613{
614 const unsigned int hashval =
615 (((unsigned int)((uintptr_t)ds->bdev)) &
616 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
617
618 list_add(&ds->collision_resolving_node, h->table + hashval);
619}
620
621static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
622{
623 list_del(&ds->collision_resolving_node);
624}
625
626static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
627 struct block_device *bdev,
628 struct btrfsic_dev_state_hashtable *h)
629{
630 const unsigned int hashval =
631 (((unsigned int)((uintptr_t)bdev)) &
632 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
633 struct list_head *elem;
634
635 list_for_each(elem, h->table + hashval) {
636 struct btrfsic_dev_state *const ds =
637 list_entry(elem, struct btrfsic_dev_state,
638 collision_resolving_node);
639
640 if (ds->bdev == bdev)
641 return ds;
642 }
643
644 return NULL;
645}
646
647static int btrfsic_process_superblock(struct btrfsic_state *state,
648 struct btrfs_fs_devices *fs_devices)
649{
e77266e4 650 int ret = 0;
5db02760
SB
651 struct btrfs_super_block *selected_super;
652 struct list_head *dev_head = &fs_devices->devices;
653 struct btrfs_device *device;
654 struct btrfsic_dev_state *selected_dev_state = NULL;
655 int pass;
656
657 BUG_ON(NULL == state);
e06baab4 658 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
5db02760
SB
659 if (NULL == selected_super) {
660 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
661 return -1;
662 }
663
664 list_for_each_entry(device, dev_head, dev_list) {
665 int i;
666 struct btrfsic_dev_state *dev_state;
667
668 if (!device->bdev || !device->name)
669 continue;
670
671 dev_state = btrfsic_dev_state_lookup(device->bdev);
672 BUG_ON(NULL == dev_state);
673 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
674 ret = btrfsic_process_superblock_dev_mirror(
675 state, dev_state, device, i,
676 &selected_dev_state, selected_super);
677 if (0 != ret && 0 == i) {
678 kfree(selected_super);
679 return ret;
680 }
681 }
682 }
683
684 if (NULL == state->latest_superblock) {
685 printk(KERN_INFO "btrfsic: no superblock found!\n");
686 kfree(selected_super);
687 return -1;
688 }
689
690 state->csum_size = btrfs_super_csum_size(selected_super);
691
692 for (pass = 0; pass < 3; pass++) {
693 int num_copies;
694 int mirror_num;
695 u64 next_bytenr;
696
697 switch (pass) {
698 case 0:
699 next_bytenr = btrfs_super_root(selected_super);
700 if (state->print_mask &
701 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
702 printk(KERN_INFO "root@%llu\n",
703 (unsigned long long)next_bytenr);
704 break;
705 case 1:
706 next_bytenr = btrfs_super_chunk_root(selected_super);
707 if (state->print_mask &
708 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
709 printk(KERN_INFO "chunk@%llu\n",
710 (unsigned long long)next_bytenr);
711 break;
712 case 2:
713 next_bytenr = btrfs_super_log_root(selected_super);
714 if (0 == next_bytenr)
715 continue;
716 if (state->print_mask &
717 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
718 printk(KERN_INFO "log@%llu\n",
719 (unsigned long long)next_bytenr);
720 break;
721 }
722
723 num_copies =
724 btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 725 next_bytenr, state->metablock_size);
5db02760
SB
726 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
727 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
728 (unsigned long long)next_bytenr, num_copies);
729
730 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
731 struct btrfsic_block *next_block;
732 struct btrfsic_block_data_ctx tmp_next_block_ctx;
733 struct btrfsic_block_link *l;
5db02760 734
e06baab4
SB
735 ret = btrfsic_map_block(state, next_bytenr,
736 state->metablock_size,
5db02760
SB
737 &tmp_next_block_ctx,
738 mirror_num);
739 if (ret) {
740 printk(KERN_INFO "btrfsic:"
741 " btrfsic_map_block(root @%llu,"
742 " mirror %d) failed!\n",
743 (unsigned long long)next_bytenr,
744 mirror_num);
745 kfree(selected_super);
746 return -1;
747 }
748
749 next_block = btrfsic_block_hashtable_lookup(
750 tmp_next_block_ctx.dev->bdev,
751 tmp_next_block_ctx.dev_bytenr,
752 &state->block_hashtable);
753 BUG_ON(NULL == next_block);
754
755 l = btrfsic_block_link_hashtable_lookup(
756 tmp_next_block_ctx.dev->bdev,
757 tmp_next_block_ctx.dev_bytenr,
758 state->latest_superblock->dev_state->
759 bdev,
760 state->latest_superblock->dev_bytenr,
761 &state->block_link_hashtable);
762 BUG_ON(NULL == l);
763
764 ret = btrfsic_read_block(state, &tmp_next_block_ctx);
e06baab4 765 if (ret < (int)PAGE_CACHE_SIZE) {
5db02760
SB
766 printk(KERN_INFO
767 "btrfsic: read @logical %llu failed!\n",
768 (unsigned long long)
769 tmp_next_block_ctx.start);
770 btrfsic_release_block_ctx(&tmp_next_block_ctx);
771 kfree(selected_super);
772 return -1;
773 }
774
5db02760
SB
775 ret = btrfsic_process_metablock(state,
776 next_block,
777 &tmp_next_block_ctx,
5db02760
SB
778 BTRFS_MAX_LEVEL + 3, 1);
779 btrfsic_release_block_ctx(&tmp_next_block_ctx);
780 }
781 }
782
783 kfree(selected_super);
784 return ret;
785}
786
787static int btrfsic_process_superblock_dev_mirror(
788 struct btrfsic_state *state,
789 struct btrfsic_dev_state *dev_state,
790 struct btrfs_device *device,
791 int superblock_mirror_num,
792 struct btrfsic_dev_state **selected_dev_state,
793 struct btrfs_super_block *selected_super)
794{
795 struct btrfs_super_block *super_tmp;
796 u64 dev_bytenr;
797 struct buffer_head *bh;
798 struct btrfsic_block *superblock_tmp;
799 int pass;
800 struct block_device *const superblock_bdev = device->bdev;
801
802 /* super block bytenr is always the unmapped device bytenr */
803 dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
e06baab4
SB
804 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
805 return -1;
806 bh = __bread(superblock_bdev, dev_bytenr / 4096,
807 BTRFS_SUPER_INFO_SIZE);
5db02760
SB
808 if (NULL == bh)
809 return -1;
810 super_tmp = (struct btrfs_super_block *)
811 (bh->b_data + (dev_bytenr & 4095));
812
813 if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
814 strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
815 sizeof(super_tmp->magic)) ||
e06baab4
SB
816 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
817 btrfs_super_nodesize(super_tmp) != state->metablock_size ||
818 btrfs_super_leafsize(super_tmp) != state->metablock_size ||
819 btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
5db02760
SB
820 brelse(bh);
821 return 0;
822 }
823
824 superblock_tmp =
825 btrfsic_block_hashtable_lookup(superblock_bdev,
826 dev_bytenr,
827 &state->block_hashtable);
828 if (NULL == superblock_tmp) {
829 superblock_tmp = btrfsic_block_alloc();
830 if (NULL == superblock_tmp) {
831 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
832 brelse(bh);
833 return -1;
834 }
835 /* for superblock, only the dev_bytenr makes sense */
836 superblock_tmp->dev_bytenr = dev_bytenr;
837 superblock_tmp->dev_state = dev_state;
838 superblock_tmp->logical_bytenr = dev_bytenr;
839 superblock_tmp->generation = btrfs_super_generation(super_tmp);
840 superblock_tmp->is_metadata = 1;
841 superblock_tmp->is_superblock = 1;
842 superblock_tmp->is_iodone = 1;
843 superblock_tmp->never_written = 0;
844 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
845 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
846 printk(KERN_INFO "New initial S-block (bdev %p, %s)"
847 " @%llu (%s/%llu/%d)\n",
848 superblock_bdev, device->name,
849 (unsigned long long)dev_bytenr,
850 dev_state->name,
851 (unsigned long long)dev_bytenr,
852 superblock_mirror_num);
853 list_add(&superblock_tmp->all_blocks_node,
854 &state->all_blocks_list);
855 btrfsic_block_hashtable_add(superblock_tmp,
856 &state->block_hashtable);
857 }
858
859 /* select the one with the highest generation field */
860 if (btrfs_super_generation(super_tmp) >
861 state->max_superblock_generation ||
862 0 == state->max_superblock_generation) {
863 memcpy(selected_super, super_tmp, sizeof(*selected_super));
864 *selected_dev_state = dev_state;
865 state->max_superblock_generation =
866 btrfs_super_generation(super_tmp);
867 state->latest_superblock = superblock_tmp;
868 }
869
870 for (pass = 0; pass < 3; pass++) {
871 u64 next_bytenr;
872 int num_copies;
873 int mirror_num;
874 const char *additional_string = NULL;
875 struct btrfs_disk_key tmp_disk_key;
876
877 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
878 tmp_disk_key.offset = 0;
879 switch (pass) {
880 case 0:
881 tmp_disk_key.objectid =
882 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
883 additional_string = "initial root ";
884 next_bytenr = btrfs_super_root(super_tmp);
885 break;
886 case 1:
887 tmp_disk_key.objectid =
888 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
889 additional_string = "initial chunk ";
890 next_bytenr = btrfs_super_chunk_root(super_tmp);
891 break;
892 case 2:
893 tmp_disk_key.objectid =
894 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
895 additional_string = "initial log ";
896 next_bytenr = btrfs_super_log_root(super_tmp);
897 if (0 == next_bytenr)
898 continue;
899 break;
900 }
901
902 num_copies =
903 btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 904 next_bytenr, state->metablock_size);
5db02760
SB
905 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
906 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
907 (unsigned long long)next_bytenr, num_copies);
908 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
909 struct btrfsic_block *next_block;
910 struct btrfsic_block_data_ctx tmp_next_block_ctx;
911 struct btrfsic_block_link *l;
912
e06baab4
SB
913 if (btrfsic_map_block(state, next_bytenr,
914 state->metablock_size,
5db02760
SB
915 &tmp_next_block_ctx,
916 mirror_num)) {
917 printk(KERN_INFO "btrfsic: btrfsic_map_block("
918 "bytenr @%llu, mirror %d) failed!\n",
919 (unsigned long long)next_bytenr,
920 mirror_num);
921 brelse(bh);
922 return -1;
923 }
924
925 next_block = btrfsic_block_lookup_or_add(
926 state, &tmp_next_block_ctx,
927 additional_string, 1, 1, 0,
928 mirror_num, NULL);
929 if (NULL == next_block) {
930 btrfsic_release_block_ctx(&tmp_next_block_ctx);
931 brelse(bh);
932 return -1;
933 }
934
935 next_block->disk_key = tmp_disk_key;
936 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
937 l = btrfsic_block_link_lookup_or_add(
938 state, &tmp_next_block_ctx,
939 next_block, superblock_tmp,
940 BTRFSIC_GENERATION_UNKNOWN);
941 btrfsic_release_block_ctx(&tmp_next_block_ctx);
942 if (NULL == l) {
943 brelse(bh);
944 return -1;
945 }
946 }
947 }
948 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
949 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
950
951 brelse(bh);
952 return 0;
953}
954
955static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
956{
957 struct btrfsic_stack_frame *sf;
958
959 sf = kzalloc(sizeof(*sf), GFP_NOFS);
960 if (NULL == sf)
961 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
962 else
963 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
964 return sf;
965}
966
967static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
968{
969 BUG_ON(!(NULL == sf ||
970 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
971 kfree(sf);
972}
973
974static int btrfsic_process_metablock(
975 struct btrfsic_state *state,
976 struct btrfsic_block *const first_block,
977 struct btrfsic_block_data_ctx *const first_block_ctx,
5db02760
SB
978 int first_limit_nesting, int force_iodone_flag)
979{
980 struct btrfsic_stack_frame initial_stack_frame = { 0 };
981 struct btrfsic_stack_frame *sf;
982 struct btrfsic_stack_frame *next_stack;
e06baab4
SB
983 struct btrfs_header *const first_hdr =
984 (struct btrfs_header *)first_block_ctx->datav[0];
5db02760 985
e06baab4 986 BUG_ON(!first_hdr);
5db02760
SB
987 sf = &initial_stack_frame;
988 sf->error = 0;
989 sf->i = -1;
990 sf->limit_nesting = first_limit_nesting;
991 sf->block = first_block;
992 sf->block_ctx = first_block_ctx;
993 sf->next_block = NULL;
994 sf->hdr = first_hdr;
995 sf->prev = NULL;
996
997continue_with_new_stack_frame:
998 sf->block->generation = le64_to_cpu(sf->hdr->generation);
999 if (0 == sf->hdr->level) {
1000 struct btrfs_leaf *const leafhdr =
1001 (struct btrfs_leaf *)sf->hdr;
1002
1003 if (-1 == sf->i) {
1004 sf->nr = le32_to_cpu(leafhdr->header.nritems);
1005
1006 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1007 printk(KERN_INFO
1008 "leaf %llu items %d generation %llu"
1009 " owner %llu\n",
1010 (unsigned long long)
1011 sf->block_ctx->start,
1012 sf->nr,
1013 (unsigned long long)
1014 le64_to_cpu(leafhdr->header.generation),
1015 (unsigned long long)
1016 le64_to_cpu(leafhdr->header.owner));
1017 }
1018
1019continue_with_current_leaf_stack_frame:
1020 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1021 sf->i++;
1022 sf->num_copies = 0;
1023 }
1024
1025 if (sf->i < sf->nr) {
e06baab4
SB
1026 struct btrfs_item disk_item;
1027 u32 disk_item_offset =
1028 (uintptr_t)(leafhdr->items + sf->i) -
1029 (uintptr_t)leafhdr;
1030 struct btrfs_disk_key *disk_key;
5db02760 1031 u8 type;
e06baab4 1032 u32 item_offset;
5db02760 1033
e06baab4
SB
1034 if (disk_item_offset + sizeof(struct btrfs_item) >
1035 sf->block_ctx->len) {
1036leaf_item_out_of_bounce_error:
1037 printk(KERN_INFO
1038 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1039 sf->block_ctx->start,
1040 sf->block_ctx->dev->name);
1041 goto one_stack_frame_backwards;
1042 }
1043 btrfsic_read_from_block_data(sf->block_ctx,
1044 &disk_item,
1045 disk_item_offset,
1046 sizeof(struct btrfs_item));
1047 item_offset = le32_to_cpu(disk_item.offset);
1048 disk_key = &disk_item.key;
5db02760
SB
1049 type = disk_key->type;
1050
1051 if (BTRFS_ROOT_ITEM_KEY == type) {
e06baab4
SB
1052 struct btrfs_root_item root_item;
1053 u32 root_item_offset;
1054 u64 next_bytenr;
1055
1056 root_item_offset = item_offset +
1057 offsetof(struct btrfs_leaf, items);
1058 if (root_item_offset +
1059 sizeof(struct btrfs_root_item) >
1060 sf->block_ctx->len)
1061 goto leaf_item_out_of_bounce_error;
1062 btrfsic_read_from_block_data(
1063 sf->block_ctx, &root_item,
1064 root_item_offset,
1065 sizeof(struct btrfs_root_item));
1066 next_bytenr = le64_to_cpu(root_item.bytenr);
5db02760
SB
1067
1068 sf->error =
1069 btrfsic_create_link_to_next_block(
1070 state,
1071 sf->block,
1072 sf->block_ctx,
1073 next_bytenr,
1074 sf->limit_nesting,
1075 &sf->next_block_ctx,
1076 &sf->next_block,
1077 force_iodone_flag,
1078 &sf->num_copies,
1079 &sf->mirror_num,
1080 disk_key,
e06baab4 1081 le64_to_cpu(root_item.
5db02760
SB
1082 generation));
1083 if (sf->error)
1084 goto one_stack_frame_backwards;
1085
1086 if (NULL != sf->next_block) {
1087 struct btrfs_header *const next_hdr =
1088 (struct btrfs_header *)
e06baab4 1089 sf->next_block_ctx.datav[0];
5db02760
SB
1090
1091 next_stack =
1092 btrfsic_stack_frame_alloc();
1093 if (NULL == next_stack) {
1094 btrfsic_release_block_ctx(
1095 &sf->
1096 next_block_ctx);
1097 goto one_stack_frame_backwards;
1098 }
1099
1100 next_stack->i = -1;
1101 next_stack->block = sf->next_block;
1102 next_stack->block_ctx =
1103 &sf->next_block_ctx;
1104 next_stack->next_block = NULL;
1105 next_stack->hdr = next_hdr;
1106 next_stack->limit_nesting =
1107 sf->limit_nesting - 1;
1108 next_stack->prev = sf;
1109 sf = next_stack;
1110 goto continue_with_new_stack_frame;
1111 }
1112 } else if (BTRFS_EXTENT_DATA_KEY == type &&
1113 state->include_extent_data) {
1114 sf->error = btrfsic_handle_extent_data(
1115 state,
1116 sf->block,
1117 sf->block_ctx,
1118 item_offset,
1119 force_iodone_flag);
1120 if (sf->error)
1121 goto one_stack_frame_backwards;
1122 }
1123
1124 goto continue_with_current_leaf_stack_frame;
1125 }
1126 } else {
1127 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1128
1129 if (-1 == sf->i) {
1130 sf->nr = le32_to_cpu(nodehdr->header.nritems);
1131
1132 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1133 printk(KERN_INFO "node %llu level %d items %d"
1134 " generation %llu owner %llu\n",
1135 (unsigned long long)
1136 sf->block_ctx->start,
1137 nodehdr->header.level, sf->nr,
1138 (unsigned long long)
1139 le64_to_cpu(nodehdr->header.generation),
1140 (unsigned long long)
1141 le64_to_cpu(nodehdr->header.owner));
1142 }
1143
1144continue_with_current_node_stack_frame:
1145 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1146 sf->i++;
1147 sf->num_copies = 0;
1148 }
1149
1150 if (sf->i < sf->nr) {
e06baab4
SB
1151 struct btrfs_key_ptr key_ptr;
1152 u32 key_ptr_offset;
1153 u64 next_bytenr;
1154
1155 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1156 (uintptr_t)nodehdr;
1157 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1158 sf->block_ctx->len) {
1159 printk(KERN_INFO
1160 "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1161 sf->block_ctx->start,
1162 sf->block_ctx->dev->name);
1163 goto one_stack_frame_backwards;
1164 }
1165 btrfsic_read_from_block_data(
1166 sf->block_ctx, &key_ptr, key_ptr_offset,
1167 sizeof(struct btrfs_key_ptr));
1168 next_bytenr = le64_to_cpu(key_ptr.blockptr);
5db02760
SB
1169
1170 sf->error = btrfsic_create_link_to_next_block(
1171 state,
1172 sf->block,
1173 sf->block_ctx,
1174 next_bytenr,
1175 sf->limit_nesting,
1176 &sf->next_block_ctx,
1177 &sf->next_block,
1178 force_iodone_flag,
1179 &sf->num_copies,
1180 &sf->mirror_num,
e06baab4
SB
1181 &key_ptr.key,
1182 le64_to_cpu(key_ptr.generation));
5db02760
SB
1183 if (sf->error)
1184 goto one_stack_frame_backwards;
1185
1186 if (NULL != sf->next_block) {
1187 struct btrfs_header *const next_hdr =
1188 (struct btrfs_header *)
e06baab4 1189 sf->next_block_ctx.datav[0];
5db02760
SB
1190
1191 next_stack = btrfsic_stack_frame_alloc();
1192 if (NULL == next_stack)
1193 goto one_stack_frame_backwards;
1194
1195 next_stack->i = -1;
1196 next_stack->block = sf->next_block;
1197 next_stack->block_ctx = &sf->next_block_ctx;
1198 next_stack->next_block = NULL;
1199 next_stack->hdr = next_hdr;
1200 next_stack->limit_nesting =
1201 sf->limit_nesting - 1;
1202 next_stack->prev = sf;
1203 sf = next_stack;
1204 goto continue_with_new_stack_frame;
1205 }
1206
1207 goto continue_with_current_node_stack_frame;
1208 }
1209 }
1210
1211one_stack_frame_backwards:
1212 if (NULL != sf->prev) {
1213 struct btrfsic_stack_frame *const prev = sf->prev;
1214
1215 /* the one for the initial block is freed in the caller */
1216 btrfsic_release_block_ctx(sf->block_ctx);
1217
1218 if (sf->error) {
1219 prev->error = sf->error;
1220 btrfsic_stack_frame_free(sf);
1221 sf = prev;
1222 goto one_stack_frame_backwards;
1223 }
1224
1225 btrfsic_stack_frame_free(sf);
1226 sf = prev;
1227 goto continue_with_new_stack_frame;
1228 } else {
1229 BUG_ON(&initial_stack_frame != sf);
1230 }
1231
1232 return sf->error;
1233}
1234
e06baab4
SB
1235static void btrfsic_read_from_block_data(
1236 struct btrfsic_block_data_ctx *block_ctx,
1237 void *dstv, u32 offset, size_t len)
1238{
1239 size_t cur;
1240 size_t offset_in_page;
1241 char *kaddr;
1242 char *dst = (char *)dstv;
1243 size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1244 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1245
1246 WARN_ON(offset + len > block_ctx->len);
1247 offset_in_page = (start_offset + offset) &
1248 ((unsigned long)PAGE_CACHE_SIZE - 1);
1249
1250 while (len > 0) {
1251 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1252 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
1253 PAGE_CACHE_SHIFT);
1254 kaddr = block_ctx->datav[i];
1255 memcpy(dst, kaddr + offset_in_page, cur);
1256
1257 dst += cur;
1258 len -= cur;
1259 offset_in_page = 0;
1260 i++;
1261 }
1262}
1263
5db02760
SB
1264static int btrfsic_create_link_to_next_block(
1265 struct btrfsic_state *state,
1266 struct btrfsic_block *block,
1267 struct btrfsic_block_data_ctx *block_ctx,
1268 u64 next_bytenr,
1269 int limit_nesting,
1270 struct btrfsic_block_data_ctx *next_block_ctx,
1271 struct btrfsic_block **next_blockp,
1272 int force_iodone_flag,
1273 int *num_copiesp, int *mirror_nump,
1274 struct btrfs_disk_key *disk_key,
1275 u64 parent_generation)
1276{
1277 struct btrfsic_block *next_block = NULL;
1278 int ret;
1279 struct btrfsic_block_link *l;
1280 int did_alloc_block_link;
1281 int block_was_created;
1282
1283 *next_blockp = NULL;
1284 if (0 == *num_copiesp) {
1285 *num_copiesp =
1286 btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 1287 next_bytenr, state->metablock_size);
5db02760
SB
1288 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1289 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1290 (unsigned long long)next_bytenr, *num_copiesp);
1291 *mirror_nump = 1;
1292 }
1293
1294 if (*mirror_nump > *num_copiesp)
1295 return 0;
1296
1297 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1298 printk(KERN_INFO
1299 "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1300 *mirror_nump);
1301 ret = btrfsic_map_block(state, next_bytenr,
e06baab4 1302 state->metablock_size,
5db02760
SB
1303 next_block_ctx, *mirror_nump);
1304 if (ret) {
1305 printk(KERN_INFO
1306 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1307 (unsigned long long)next_bytenr, *mirror_nump);
1308 btrfsic_release_block_ctx(next_block_ctx);
1309 *next_blockp = NULL;
1310 return -1;
1311 }
1312
1313 next_block = btrfsic_block_lookup_or_add(state,
1314 next_block_ctx, "referenced ",
1315 1, force_iodone_flag,
1316 !force_iodone_flag,
1317 *mirror_nump,
1318 &block_was_created);
1319 if (NULL == next_block) {
1320 btrfsic_release_block_ctx(next_block_ctx);
1321 *next_blockp = NULL;
1322 return -1;
1323 }
1324 if (block_was_created) {
1325 l = NULL;
1326 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1327 } else {
1328 if (next_block->logical_bytenr != next_bytenr &&
1329 !(!next_block->is_metadata &&
1330 0 == next_block->logical_bytenr)) {
1331 printk(KERN_INFO
1332 "Referenced block @%llu (%s/%llu/%d)"
1333 " found in hash table, %c,"
1334 " bytenr mismatch (!= stored %llu).\n",
1335 (unsigned long long)next_bytenr,
1336 next_block_ctx->dev->name,
1337 (unsigned long long)next_block_ctx->dev_bytenr,
1338 *mirror_nump,
1339 btrfsic_get_block_type(state, next_block),
1340 (unsigned long long)next_block->logical_bytenr);
1341 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1342 printk(KERN_INFO
1343 "Referenced block @%llu (%s/%llu/%d)"
1344 " found in hash table, %c.\n",
1345 (unsigned long long)next_bytenr,
1346 next_block_ctx->dev->name,
1347 (unsigned long long)next_block_ctx->dev_bytenr,
1348 *mirror_nump,
1349 btrfsic_get_block_type(state, next_block));
1350 next_block->logical_bytenr = next_bytenr;
1351
1352 next_block->mirror_num = *mirror_nump;
1353 l = btrfsic_block_link_hashtable_lookup(
1354 next_block_ctx->dev->bdev,
1355 next_block_ctx->dev_bytenr,
1356 block_ctx->dev->bdev,
1357 block_ctx->dev_bytenr,
1358 &state->block_link_hashtable);
1359 }
1360
1361 next_block->disk_key = *disk_key;
1362 if (NULL == l) {
1363 l = btrfsic_block_link_alloc();
1364 if (NULL == l) {
1365 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1366 btrfsic_release_block_ctx(next_block_ctx);
1367 *next_blockp = NULL;
1368 return -1;
1369 }
1370
1371 did_alloc_block_link = 1;
1372 l->block_ref_to = next_block;
1373 l->block_ref_from = block;
1374 l->ref_cnt = 1;
1375 l->parent_generation = parent_generation;
1376
1377 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1378 btrfsic_print_add_link(state, l);
1379
1380 list_add(&l->node_ref_to, &block->ref_to_list);
1381 list_add(&l->node_ref_from, &next_block->ref_from_list);
1382
1383 btrfsic_block_link_hashtable_add(l,
1384 &state->block_link_hashtable);
1385 } else {
1386 did_alloc_block_link = 0;
1387 if (0 == limit_nesting) {
1388 l->ref_cnt++;
1389 l->parent_generation = parent_generation;
1390 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1391 btrfsic_print_add_link(state, l);
1392 }
1393 }
1394
1395 if (limit_nesting > 0 && did_alloc_block_link) {
1396 ret = btrfsic_read_block(state, next_block_ctx);
e06baab4 1397 if (ret < (int)next_block_ctx->len) {
5db02760
SB
1398 printk(KERN_INFO
1399 "btrfsic: read block @logical %llu failed!\n",
1400 (unsigned long long)next_bytenr);
1401 btrfsic_release_block_ctx(next_block_ctx);
1402 *next_blockp = NULL;
1403 return -1;
1404 }
1405
1406 *next_blockp = next_block;
1407 } else {
1408 *next_blockp = NULL;
1409 }
1410 (*mirror_nump)++;
1411
1412 return 0;
1413}
1414
1415static int btrfsic_handle_extent_data(
1416 struct btrfsic_state *state,
1417 struct btrfsic_block *block,
1418 struct btrfsic_block_data_ctx *block_ctx,
1419 u32 item_offset, int force_iodone_flag)
1420{
1421 int ret;
e06baab4
SB
1422 struct btrfs_file_extent_item file_extent_item;
1423 u64 file_extent_item_offset;
1424 u64 next_bytenr;
1425 u64 num_bytes;
1426 u64 generation;
5db02760
SB
1427 struct btrfsic_block_link *l;
1428
e06baab4
SB
1429 file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1430 item_offset;
1431 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1432 block_ctx->len) {
1433 printk(KERN_INFO
1434 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1435 block_ctx->start, block_ctx->dev->name);
1436 return -1;
1437 }
1438 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1439 file_extent_item_offset,
1440 sizeof(struct btrfs_file_extent_item));
1441 next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
1442 le64_to_cpu(file_extent_item.offset);
1443 generation = le64_to_cpu(file_extent_item.generation);
1444 num_bytes = le64_to_cpu(file_extent_item.num_bytes);
1445 generation = le64_to_cpu(file_extent_item.generation);
1446
5db02760
SB
1447 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1448 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1449 " offset = %llu, num_bytes = %llu\n",
e06baab4 1450 file_extent_item.type,
5db02760 1451 (unsigned long long)
e06baab4
SB
1452 le64_to_cpu(file_extent_item.disk_bytenr),
1453 (unsigned long long)le64_to_cpu(file_extent_item.offset),
1454 (unsigned long long)num_bytes);
1455 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1456 ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr))
5db02760
SB
1457 return 0;
1458 while (num_bytes > 0) {
1459 u32 chunk_len;
1460 int num_copies;
1461 int mirror_num;
1462
e06baab4
SB
1463 if (num_bytes > state->datablock_size)
1464 chunk_len = state->datablock_size;
5db02760
SB
1465 else
1466 chunk_len = num_bytes;
1467
1468 num_copies =
1469 btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 1470 next_bytenr, state->datablock_size);
5db02760
SB
1471 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1472 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1473 (unsigned long long)next_bytenr, num_copies);
1474 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1475 struct btrfsic_block_data_ctx next_block_ctx;
1476 struct btrfsic_block *next_block;
1477 int block_was_created;
1478
1479 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1480 printk(KERN_INFO "btrfsic_handle_extent_data("
1481 "mirror_num=%d)\n", mirror_num);
1482 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1483 printk(KERN_INFO
1484 "\tdisk_bytenr = %llu, num_bytes %u\n",
1485 (unsigned long long)next_bytenr,
1486 chunk_len);
1487 ret = btrfsic_map_block(state, next_bytenr,
1488 chunk_len, &next_block_ctx,
1489 mirror_num);
1490 if (ret) {
1491 printk(KERN_INFO
1492 "btrfsic: btrfsic_map_block(@%llu,"
1493 " mirror=%d) failed!\n",
1494 (unsigned long long)next_bytenr,
1495 mirror_num);
1496 return -1;
1497 }
1498
1499 next_block = btrfsic_block_lookup_or_add(
1500 state,
1501 &next_block_ctx,
1502 "referenced ",
1503 0,
1504 force_iodone_flag,
1505 !force_iodone_flag,
1506 mirror_num,
1507 &block_was_created);
1508 if (NULL == next_block) {
1509 printk(KERN_INFO
1510 "btrfsic: error, kmalloc failed!\n");
1511 btrfsic_release_block_ctx(&next_block_ctx);
1512 return -1;
1513 }
1514 if (!block_was_created) {
1515 if (next_block->logical_bytenr != next_bytenr &&
1516 !(!next_block->is_metadata &&
1517 0 == next_block->logical_bytenr)) {
1518 printk(KERN_INFO
1519 "Referenced block"
1520 " @%llu (%s/%llu/%d)"
1521 " found in hash table, D,"
1522 " bytenr mismatch"
1523 " (!= stored %llu).\n",
1524 (unsigned long long)next_bytenr,
1525 next_block_ctx.dev->name,
1526 (unsigned long long)
1527 next_block_ctx.dev_bytenr,
1528 mirror_num,
1529 (unsigned long long)
1530 next_block->logical_bytenr);
1531 }
1532 next_block->logical_bytenr = next_bytenr;
1533 next_block->mirror_num = mirror_num;
1534 }
1535
1536 l = btrfsic_block_link_lookup_or_add(state,
1537 &next_block_ctx,
1538 next_block, block,
1539 generation);
1540 btrfsic_release_block_ctx(&next_block_ctx);
1541 if (NULL == l)
1542 return -1;
1543 }
1544
1545 next_bytenr += chunk_len;
1546 num_bytes -= chunk_len;
1547 }
1548
1549 return 0;
1550}
1551
1552static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1553 struct btrfsic_block_data_ctx *block_ctx_out,
1554 int mirror_num)
1555{
1556 int ret;
1557 u64 length;
1558 struct btrfs_bio *multi = NULL;
1559 struct btrfs_device *device;
1560
1561 length = len;
1562 ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
1563 bytenr, &length, &multi, mirror_num);
1564
1565 device = multi->stripes[0].dev;
1566 block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1567 block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1568 block_ctx_out->start = bytenr;
1569 block_ctx_out->len = len;
e06baab4
SB
1570 block_ctx_out->datav = NULL;
1571 block_ctx_out->pagev = NULL;
1572 block_ctx_out->mem_to_free = NULL;
5db02760
SB
1573
1574 if (0 == ret)
1575 kfree(multi);
1576 if (NULL == block_ctx_out->dev) {
1577 ret = -ENXIO;
1578 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1579 }
1580
1581 return ret;
1582}
1583
1584static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1585 u32 len, struct block_device *bdev,
1586 struct btrfsic_block_data_ctx *block_ctx_out)
1587{
1588 block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
1589 block_ctx_out->dev_bytenr = bytenr;
1590 block_ctx_out->start = bytenr;
1591 block_ctx_out->len = len;
e06baab4
SB
1592 block_ctx_out->datav = NULL;
1593 block_ctx_out->pagev = NULL;
1594 block_ctx_out->mem_to_free = NULL;
5db02760
SB
1595 if (NULL != block_ctx_out->dev) {
1596 return 0;
1597 } else {
1598 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
1599 return -ENXIO;
1600 }
1601}
1602
1603static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1604{
e06baab4
SB
1605 if (block_ctx->mem_to_free) {
1606 unsigned int num_pages;
1607
1608 BUG_ON(!block_ctx->datav);
1609 BUG_ON(!block_ctx->pagev);
1610 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1611 PAGE_CACHE_SHIFT;
1612 while (num_pages > 0) {
1613 num_pages--;
1614 if (block_ctx->datav[num_pages]) {
1615 kunmap(block_ctx->pagev[num_pages]);
1616 block_ctx->datav[num_pages] = NULL;
1617 }
1618 if (block_ctx->pagev[num_pages]) {
1619 __free_page(block_ctx->pagev[num_pages]);
1620 block_ctx->pagev[num_pages] = NULL;
1621 }
1622 }
1623
1624 kfree(block_ctx->mem_to_free);
1625 block_ctx->mem_to_free = NULL;
1626 block_ctx->pagev = NULL;
1627 block_ctx->datav = NULL;
5db02760
SB
1628 }
1629}
1630
1631static int btrfsic_read_block(struct btrfsic_state *state,
1632 struct btrfsic_block_data_ctx *block_ctx)
1633{
e06baab4
SB
1634 unsigned int num_pages;
1635 unsigned int i;
1636 u64 dev_bytenr;
1637 int ret;
1638
1639 BUG_ON(block_ctx->datav);
1640 BUG_ON(block_ctx->pagev);
1641 BUG_ON(block_ctx->mem_to_free);
1642 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
5db02760
SB
1643 printk(KERN_INFO
1644 "btrfsic: read_block() with unaligned bytenr %llu\n",
1645 (unsigned long long)block_ctx->dev_bytenr);
1646 return -1;
1647 }
e06baab4
SB
1648
1649 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1650 PAGE_CACHE_SHIFT;
1651 block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1652 sizeof(*block_ctx->pagev)) *
1653 num_pages, GFP_NOFS);
1654 if (!block_ctx->mem_to_free)
5db02760 1655 return -1;
e06baab4
SB
1656 block_ctx->datav = block_ctx->mem_to_free;
1657 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1658 for (i = 0; i < num_pages; i++) {
1659 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1660 if (!block_ctx->pagev[i])
1661 return -1;
5db02760
SB
1662 }
1663
e06baab4
SB
1664 dev_bytenr = block_ctx->dev_bytenr;
1665 for (i = 0; i < num_pages;) {
1666 struct bio *bio;
1667 unsigned int j;
1668 DECLARE_COMPLETION_ONSTACK(complete);
1669
1670 bio = bio_alloc(GFP_NOFS, num_pages - i);
1671 if (!bio) {
1672 printk(KERN_INFO
1673 "btrfsic: bio_alloc() for %u pages failed!\n",
1674 num_pages - i);
1675 return -1;
1676 }
1677 bio->bi_bdev = block_ctx->dev->bdev;
1678 bio->bi_sector = dev_bytenr >> 9;
1679 bio->bi_end_io = btrfsic_complete_bio_end_io;
1680 bio->bi_private = &complete;
1681
1682 for (j = i; j < num_pages; j++) {
1683 ret = bio_add_page(bio, block_ctx->pagev[j],
1684 PAGE_CACHE_SIZE, 0);
1685 if (PAGE_CACHE_SIZE != ret)
1686 break;
1687 }
1688 if (j == i) {
1689 printk(KERN_INFO
1690 "btrfsic: error, failed to add a single page!\n");
1691 return -1;
1692 }
1693 submit_bio(READ, bio);
1694
1695 /* this will also unplug the queue */
1696 wait_for_completion(&complete);
1697
1698 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1699 printk(KERN_INFO
1700 "btrfsic: read error at logical %llu dev %s!\n",
1701 block_ctx->start, block_ctx->dev->name);
1702 bio_put(bio);
1703 return -1;
1704 }
1705 bio_put(bio);
1706 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1707 i = j;
1708 }
1709 for (i = 0; i < num_pages; i++) {
1710 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1711 if (!block_ctx->datav[i]) {
1712 printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1713 block_ctx->dev->name);
1714 return -1;
1715 }
1716 }
5db02760
SB
1717
1718 return block_ctx->len;
1719}
1720
e06baab4
SB
1721static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1722{
1723 complete((struct completion *)bio->bi_private);
1724}
1725
5db02760
SB
1726static void btrfsic_dump_database(struct btrfsic_state *state)
1727{
1728 struct list_head *elem_all;
1729
1730 BUG_ON(NULL == state);
1731
1732 printk(KERN_INFO "all_blocks_list:\n");
1733 list_for_each(elem_all, &state->all_blocks_list) {
1734 const struct btrfsic_block *const b_all =
1735 list_entry(elem_all, struct btrfsic_block,
1736 all_blocks_node);
1737 struct list_head *elem_ref_to;
1738 struct list_head *elem_ref_from;
1739
1740 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1741 btrfsic_get_block_type(state, b_all),
1742 (unsigned long long)b_all->logical_bytenr,
1743 b_all->dev_state->name,
1744 (unsigned long long)b_all->dev_bytenr,
1745 b_all->mirror_num);
1746
1747 list_for_each(elem_ref_to, &b_all->ref_to_list) {
1748 const struct btrfsic_block_link *const l =
1749 list_entry(elem_ref_to,
1750 struct btrfsic_block_link,
1751 node_ref_to);
1752
1753 printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1754 " refers %u* to"
1755 " %c @%llu (%s/%llu/%d)\n",
1756 btrfsic_get_block_type(state, b_all),
1757 (unsigned long long)b_all->logical_bytenr,
1758 b_all->dev_state->name,
1759 (unsigned long long)b_all->dev_bytenr,
1760 b_all->mirror_num,
1761 l->ref_cnt,
1762 btrfsic_get_block_type(state, l->block_ref_to),
1763 (unsigned long long)
1764 l->block_ref_to->logical_bytenr,
1765 l->block_ref_to->dev_state->name,
1766 (unsigned long long)l->block_ref_to->dev_bytenr,
1767 l->block_ref_to->mirror_num);
1768 }
1769
1770 list_for_each(elem_ref_from, &b_all->ref_from_list) {
1771 const struct btrfsic_block_link *const l =
1772 list_entry(elem_ref_from,
1773 struct btrfsic_block_link,
1774 node_ref_from);
1775
1776 printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1777 " is ref %u* from"
1778 " %c @%llu (%s/%llu/%d)\n",
1779 btrfsic_get_block_type(state, b_all),
1780 (unsigned long long)b_all->logical_bytenr,
1781 b_all->dev_state->name,
1782 (unsigned long long)b_all->dev_bytenr,
1783 b_all->mirror_num,
1784 l->ref_cnt,
1785 btrfsic_get_block_type(state, l->block_ref_from),
1786 (unsigned long long)
1787 l->block_ref_from->logical_bytenr,
1788 l->block_ref_from->dev_state->name,
1789 (unsigned long long)
1790 l->block_ref_from->dev_bytenr,
1791 l->block_ref_from->mirror_num);
1792 }
1793
1794 printk(KERN_INFO "\n");
1795 }
1796}
1797
1798/*
1799 * Test whether the disk block contains a tree block (leaf or node)
1800 * (note that this test fails for the super block)
1801 */
1802static int btrfsic_test_for_metadata(struct btrfsic_state *state,
e06baab4 1803 char **datav, unsigned int num_pages)
5db02760
SB
1804{
1805 struct btrfs_header *h;
1806 u8 csum[BTRFS_CSUM_SIZE];
1807 u32 crc = ~(u32)0;
e06baab4 1808 unsigned int i;
5db02760 1809
e06baab4
SB
1810 if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1811 return 1; /* not metadata */
1812 num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1813 h = (struct btrfs_header *)datav[0];
5db02760
SB
1814
1815 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
e06baab4 1816 return 1;
5db02760 1817
e06baab4
SB
1818 for (i = 0; i < num_pages; i++) {
1819 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1820 size_t sublen = i ? PAGE_CACHE_SIZE :
1821 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1822
1823 crc = crc32c(crc, data, sublen);
1824 }
5db02760
SB
1825 btrfs_csum_final(crc, csum);
1826 if (memcmp(csum, h->csum, state->csum_size))
e06baab4 1827 return 1;
5db02760 1828
e06baab4 1829 return 0; /* is metadata */
5db02760
SB
1830}
1831
1832static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
e06baab4
SB
1833 u64 dev_bytenr, char **mapped_datav,
1834 unsigned int num_pages,
1835 struct bio *bio, int *bio_is_patched,
5db02760
SB
1836 struct buffer_head *bh,
1837 int submit_bio_bh_rw)
1838{
1839 int is_metadata;
1840 struct btrfsic_block *block;
1841 struct btrfsic_block_data_ctx block_ctx;
1842 int ret;
1843 struct btrfsic_state *state = dev_state->state;
1844 struct block_device *bdev = dev_state->bdev;
e06baab4 1845 unsigned int processed_len;
5db02760 1846
5db02760
SB
1847 if (NULL != bio_is_patched)
1848 *bio_is_patched = 0;
1849
e06baab4
SB
1850again:
1851 if (num_pages == 0)
1852 return;
1853
1854 processed_len = 0;
1855 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1856 num_pages));
1857
5db02760
SB
1858 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1859 &state->block_hashtable);
1860 if (NULL != block) {
0b485143 1861 u64 bytenr = 0;
5db02760
SB
1862 struct list_head *elem_ref_to;
1863 struct list_head *tmp_ref_to;
1864
1865 if (block->is_superblock) {
1866 bytenr = le64_to_cpu(((struct btrfs_super_block *)
e06baab4
SB
1867 mapped_datav[0])->bytenr);
1868 if (num_pages * PAGE_CACHE_SIZE <
1869 BTRFS_SUPER_INFO_SIZE) {
1870 printk(KERN_INFO
1871 "btrfsic: cannot work with too short bios!\n");
1872 return;
1873 }
5db02760 1874 is_metadata = 1;
e06baab4
SB
1875 BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1876 processed_len = BTRFS_SUPER_INFO_SIZE;
5db02760
SB
1877 if (state->print_mask &
1878 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1879 printk(KERN_INFO
1880 "[before new superblock is written]:\n");
1881 btrfsic_dump_tree_sub(state, block, 0);
1882 }
1883 }
1884 if (is_metadata) {
1885 if (!block->is_superblock) {
e06baab4
SB
1886 if (num_pages * PAGE_CACHE_SIZE <
1887 state->metablock_size) {
1888 printk(KERN_INFO
1889 "btrfsic: cannot work with too short bios!\n");
1890 return;
1891 }
1892 processed_len = state->metablock_size;
5db02760 1893 bytenr = le64_to_cpu(((struct btrfs_header *)
e06baab4 1894 mapped_datav[0])->bytenr);
5db02760
SB
1895 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1896 dev_state,
e06baab4 1897 dev_bytenr);
5db02760
SB
1898 }
1899 if (block->logical_bytenr != bytenr) {
1900 printk(KERN_INFO
1901 "Written block @%llu (%s/%llu/%d)"
1902 " found in hash table, %c,"
1903 " bytenr mismatch"
1904 " (!= stored %llu).\n",
1905 (unsigned long long)bytenr,
1906 dev_state->name,
1907 (unsigned long long)dev_bytenr,
1908 block->mirror_num,
1909 btrfsic_get_block_type(state, block),
1910 (unsigned long long)
1911 block->logical_bytenr);
1912 block->logical_bytenr = bytenr;
1913 } else if (state->print_mask &
1914 BTRFSIC_PRINT_MASK_VERBOSE)
1915 printk(KERN_INFO
1916 "Written block @%llu (%s/%llu/%d)"
1917 " found in hash table, %c.\n",
1918 (unsigned long long)bytenr,
1919 dev_state->name,
1920 (unsigned long long)dev_bytenr,
1921 block->mirror_num,
1922 btrfsic_get_block_type(state, block));
1923 } else {
e06baab4
SB
1924 if (num_pages * PAGE_CACHE_SIZE <
1925 state->datablock_size) {
1926 printk(KERN_INFO
1927 "btrfsic: cannot work with too short bios!\n");
1928 return;
1929 }
1930 processed_len = state->datablock_size;
5db02760
SB
1931 bytenr = block->logical_bytenr;
1932 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1933 printk(KERN_INFO
1934 "Written block @%llu (%s/%llu/%d)"
1935 " found in hash table, %c.\n",
1936 (unsigned long long)bytenr,
1937 dev_state->name,
1938 (unsigned long long)dev_bytenr,
1939 block->mirror_num,
1940 btrfsic_get_block_type(state, block));
1941 }
1942
1943 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1944 printk(KERN_INFO
1945 "ref_to_list: %cE, ref_from_list: %cE\n",
1946 list_empty(&block->ref_to_list) ? ' ' : '!',
1947 list_empty(&block->ref_from_list) ? ' ' : '!');
1948 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1949 printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1950 " @%llu (%s/%llu/%d), old(gen=%llu,"
1951 " objectid=%llu, type=%d, offset=%llu),"
1952 " new(gen=%llu),"
1953 " which is referenced by most recent superblock"
1954 " (superblockgen=%llu)!\n",
1955 btrfsic_get_block_type(state, block),
1956 (unsigned long long)bytenr,
1957 dev_state->name,
1958 (unsigned long long)dev_bytenr,
1959 block->mirror_num,
1960 (unsigned long long)block->generation,
1961 (unsigned long long)
1962 le64_to_cpu(block->disk_key.objectid),
1963 block->disk_key.type,
1964 (unsigned long long)
1965 le64_to_cpu(block->disk_key.offset),
1966 (unsigned long long)
1967 le64_to_cpu(((struct btrfs_header *)
e06baab4 1968 mapped_datav[0])->generation),
5db02760
SB
1969 (unsigned long long)
1970 state->max_superblock_generation);
1971 btrfsic_dump_tree(state);
1972 }
1973
1974 if (!block->is_iodone && !block->never_written) {
1975 printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1976 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1977 " which is not yet iodone!\n",
1978 btrfsic_get_block_type(state, block),
1979 (unsigned long long)bytenr,
1980 dev_state->name,
1981 (unsigned long long)dev_bytenr,
1982 block->mirror_num,
1983 (unsigned long long)block->generation,
1984 (unsigned long long)
1985 le64_to_cpu(((struct btrfs_header *)
e06baab4 1986 mapped_datav[0])->generation));
5db02760
SB
1987 /* it would not be safe to go on */
1988 btrfsic_dump_tree(state);
e06baab4 1989 goto continue_loop;
5db02760
SB
1990 }
1991
1992 /*
1993 * Clear all references of this block. Do not free
1994 * the block itself even if is not referenced anymore
1995 * because it still carries valueable information
1996 * like whether it was ever written and IO completed.
1997 */
1998 list_for_each_safe(elem_ref_to, tmp_ref_to,
1999 &block->ref_to_list) {
2000 struct btrfsic_block_link *const l =
2001 list_entry(elem_ref_to,
2002 struct btrfsic_block_link,
2003 node_ref_to);
2004
2005 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2006 btrfsic_print_rem_link(state, l);
2007 l->ref_cnt--;
2008 if (0 == l->ref_cnt) {
2009 list_del(&l->node_ref_to);
2010 list_del(&l->node_ref_from);
2011 btrfsic_block_link_hashtable_remove(l);
2012 btrfsic_block_link_free(l);
2013 }
2014 }
2015
2016 if (block->is_superblock)
e06baab4
SB
2017 ret = btrfsic_map_superblock(state, bytenr,
2018 processed_len,
5db02760
SB
2019 bdev, &block_ctx);
2020 else
e06baab4 2021 ret = btrfsic_map_block(state, bytenr, processed_len,
5db02760
SB
2022 &block_ctx, 0);
2023 if (ret) {
2024 printk(KERN_INFO
2025 "btrfsic: btrfsic_map_block(root @%llu)"
2026 " failed!\n", (unsigned long long)bytenr);
e06baab4 2027 goto continue_loop;
5db02760 2028 }
e06baab4 2029 block_ctx.datav = mapped_datav;
5db02760
SB
2030 /* the following is required in case of writes to mirrors,
2031 * use the same that was used for the lookup */
2032 block_ctx.dev = dev_state;
2033 block_ctx.dev_bytenr = dev_bytenr;
2034
2035 if (is_metadata || state->include_extent_data) {
2036 block->never_written = 0;
2037 block->iodone_w_error = 0;
2038 if (NULL != bio) {
2039 block->is_iodone = 0;
2040 BUG_ON(NULL == bio_is_patched);
2041 if (!*bio_is_patched) {
2042 block->orig_bio_bh_private =
2043 bio->bi_private;
2044 block->orig_bio_bh_end_io.bio =
2045 bio->bi_end_io;
2046 block->next_in_same_bio = NULL;
2047 bio->bi_private = block;
2048 bio->bi_end_io = btrfsic_bio_end_io;
2049 *bio_is_patched = 1;
2050 } else {
2051 struct btrfsic_block *chained_block =
2052 (struct btrfsic_block *)
2053 bio->bi_private;
2054
2055 BUG_ON(NULL == chained_block);
2056 block->orig_bio_bh_private =
2057 chained_block->orig_bio_bh_private;
2058 block->orig_bio_bh_end_io.bio =
2059 chained_block->orig_bio_bh_end_io.
2060 bio;
2061 block->next_in_same_bio = chained_block;
2062 bio->bi_private = block;
2063 }
2064 } else if (NULL != bh) {
2065 block->is_iodone = 0;
2066 block->orig_bio_bh_private = bh->b_private;
2067 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2068 block->next_in_same_bio = NULL;
2069 bh->b_private = block;
2070 bh->b_end_io = btrfsic_bh_end_io;
2071 } else {
2072 block->is_iodone = 1;
2073 block->orig_bio_bh_private = NULL;
2074 block->orig_bio_bh_end_io.bio = NULL;
2075 block->next_in_same_bio = NULL;
2076 }
2077 }
2078
2079 block->flush_gen = dev_state->last_flush_gen + 1;
2080 block->submit_bio_bh_rw = submit_bio_bh_rw;
2081 if (is_metadata) {
2082 block->logical_bytenr = bytenr;
2083 block->is_metadata = 1;
2084 if (block->is_superblock) {
e06baab4
SB
2085 BUG_ON(PAGE_CACHE_SIZE !=
2086 BTRFS_SUPER_INFO_SIZE);
5db02760
SB
2087 ret = btrfsic_process_written_superblock(
2088 state,
2089 block,
2090 (struct btrfs_super_block *)
e06baab4 2091 mapped_datav[0]);
5db02760
SB
2092 if (state->print_mask &
2093 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2094 printk(KERN_INFO
2095 "[after new superblock is written]:\n");
2096 btrfsic_dump_tree_sub(state, block, 0);
2097 }
2098 } else {
2099 block->mirror_num = 0; /* unknown */
2100 ret = btrfsic_process_metablock(
2101 state,
2102 block,
2103 &block_ctx,
5db02760
SB
2104 0, 0);
2105 }
2106 if (ret)
2107 printk(KERN_INFO
2108 "btrfsic: btrfsic_process_metablock"
2109 "(root @%llu) failed!\n",
2110 (unsigned long long)dev_bytenr);
2111 } else {
2112 block->is_metadata = 0;
2113 block->mirror_num = 0; /* unknown */
2114 block->generation = BTRFSIC_GENERATION_UNKNOWN;
2115 if (!state->include_extent_data
2116 && list_empty(&block->ref_from_list)) {
2117 /*
2118 * disk block is overwritten with extent
2119 * data (not meta data) and we are configured
2120 * to not include extent data: take the
2121 * chance and free the block's memory
2122 */
2123 btrfsic_block_hashtable_remove(block);
2124 list_del(&block->all_blocks_node);
2125 btrfsic_block_free(block);
2126 }
2127 }
2128 btrfsic_release_block_ctx(&block_ctx);
2129 } else {
2130 /* block has not been found in hash table */
2131 u64 bytenr;
2132
2133 if (!is_metadata) {
e06baab4 2134 processed_len = state->datablock_size;
5db02760
SB
2135 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2136 printk(KERN_INFO "Written block (%s/%llu/?)"
2137 " !found in hash table, D.\n",
2138 dev_state->name,
2139 (unsigned long long)dev_bytenr);
e06baab4
SB
2140 if (!state->include_extent_data) {
2141 /* ignore that written D block */
2142 goto continue_loop;
2143 }
5db02760
SB
2144
2145 /* this is getting ugly for the
2146 * include_extent_data case... */
2147 bytenr = 0; /* unknown */
2148 block_ctx.start = bytenr;
e06baab4
SB
2149 block_ctx.len = processed_len;
2150 block_ctx.mem_to_free = NULL;
2151 block_ctx.pagev = NULL;
5db02760 2152 } else {
e06baab4 2153 processed_len = state->metablock_size;
5db02760 2154 bytenr = le64_to_cpu(((struct btrfs_header *)
e06baab4 2155 mapped_datav[0])->bytenr);
5db02760 2156 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
e06baab4 2157 dev_bytenr);
5db02760
SB
2158 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2159 printk(KERN_INFO
2160 "Written block @%llu (%s/%llu/?)"
2161 " !found in hash table, M.\n",
2162 (unsigned long long)bytenr,
2163 dev_state->name,
2164 (unsigned long long)dev_bytenr);
2165
e06baab4
SB
2166 ret = btrfsic_map_block(state, bytenr, processed_len,
2167 &block_ctx, 0);
5db02760
SB
2168 if (ret) {
2169 printk(KERN_INFO
2170 "btrfsic: btrfsic_map_block(root @%llu)"
2171 " failed!\n",
2172 (unsigned long long)dev_bytenr);
e06baab4 2173 goto continue_loop;
5db02760
SB
2174 }
2175 }
e06baab4 2176 block_ctx.datav = mapped_datav;
5db02760
SB
2177 /* the following is required in case of writes to mirrors,
2178 * use the same that was used for the lookup */
2179 block_ctx.dev = dev_state;
2180 block_ctx.dev_bytenr = dev_bytenr;
2181
2182 block = btrfsic_block_alloc();
2183 if (NULL == block) {
2184 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2185 btrfsic_release_block_ctx(&block_ctx);
e06baab4 2186 goto continue_loop;
5db02760
SB
2187 }
2188 block->dev_state = dev_state;
2189 block->dev_bytenr = dev_bytenr;
2190 block->logical_bytenr = bytenr;
2191 block->is_metadata = is_metadata;
2192 block->never_written = 0;
2193 block->iodone_w_error = 0;
2194 block->mirror_num = 0; /* unknown */
2195 block->flush_gen = dev_state->last_flush_gen + 1;
2196 block->submit_bio_bh_rw = submit_bio_bh_rw;
2197 if (NULL != bio) {
2198 block->is_iodone = 0;
2199 BUG_ON(NULL == bio_is_patched);
2200 if (!*bio_is_patched) {
2201 block->orig_bio_bh_private = bio->bi_private;
2202 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2203 block->next_in_same_bio = NULL;
2204 bio->bi_private = block;
2205 bio->bi_end_io = btrfsic_bio_end_io;
2206 *bio_is_patched = 1;
2207 } else {
2208 struct btrfsic_block *chained_block =
2209 (struct btrfsic_block *)
2210 bio->bi_private;
2211
2212 BUG_ON(NULL == chained_block);
2213 block->orig_bio_bh_private =
2214 chained_block->orig_bio_bh_private;
2215 block->orig_bio_bh_end_io.bio =
2216 chained_block->orig_bio_bh_end_io.bio;
2217 block->next_in_same_bio = chained_block;
2218 bio->bi_private = block;
2219 }
2220 } else if (NULL != bh) {
2221 block->is_iodone = 0;
2222 block->orig_bio_bh_private = bh->b_private;
2223 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2224 block->next_in_same_bio = NULL;
2225 bh->b_private = block;
2226 bh->b_end_io = btrfsic_bh_end_io;
2227 } else {
2228 block->is_iodone = 1;
2229 block->orig_bio_bh_private = NULL;
2230 block->orig_bio_bh_end_io.bio = NULL;
2231 block->next_in_same_bio = NULL;
2232 }
2233 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2234 printk(KERN_INFO
2235 "New written %c-block @%llu (%s/%llu/%d)\n",
2236 is_metadata ? 'M' : 'D',
2237 (unsigned long long)block->logical_bytenr,
2238 block->dev_state->name,
2239 (unsigned long long)block->dev_bytenr,
2240 block->mirror_num);
2241 list_add(&block->all_blocks_node, &state->all_blocks_list);
2242 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2243
2244 if (is_metadata) {
2245 ret = btrfsic_process_metablock(state, block,
e06baab4 2246 &block_ctx, 0, 0);
5db02760
SB
2247 if (ret)
2248 printk(KERN_INFO
2249 "btrfsic: process_metablock(root @%llu)"
2250 " failed!\n",
2251 (unsigned long long)dev_bytenr);
2252 }
2253 btrfsic_release_block_ctx(&block_ctx);
2254 }
e06baab4
SB
2255
2256continue_loop:
2257 BUG_ON(!processed_len);
2258 dev_bytenr += processed_len;
2259 mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2260 num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2261 goto again;
5db02760
SB
2262}
2263
2264static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2265{
2266 struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2267 int iodone_w_error;
2268
2269 /* mutex is not held! This is not save if IO is not yet completed
2270 * on umount */
2271 iodone_w_error = 0;
2272 if (bio_error_status)
2273 iodone_w_error = 1;
2274
2275 BUG_ON(NULL == block);
2276 bp->bi_private = block->orig_bio_bh_private;
2277 bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2278
2279 do {
2280 struct btrfsic_block *next_block;
2281 struct btrfsic_dev_state *const dev_state = block->dev_state;
2282
2283 if ((dev_state->state->print_mask &
2284 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2285 printk(KERN_INFO
2286 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2287 bio_error_status,
2288 btrfsic_get_block_type(dev_state->state, block),
2289 (unsigned long long)block->logical_bytenr,
2290 dev_state->name,
2291 (unsigned long long)block->dev_bytenr,
2292 block->mirror_num);
2293 next_block = block->next_in_same_bio;
2294 block->iodone_w_error = iodone_w_error;
2295 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2296 dev_state->last_flush_gen++;
2297 if ((dev_state->state->print_mask &
2298 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2299 printk(KERN_INFO
2300 "bio_end_io() new %s flush_gen=%llu\n",
2301 dev_state->name,
2302 (unsigned long long)
2303 dev_state->last_flush_gen);
2304 }
2305 if (block->submit_bio_bh_rw & REQ_FUA)
2306 block->flush_gen = 0; /* FUA completed means block is
2307 * on disk */
2308 block->is_iodone = 1; /* for FLUSH, this releases the block */
2309 block = next_block;
2310 } while (NULL != block);
2311
2312 bp->bi_end_io(bp, bio_error_status);
2313}
2314
2315static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2316{
2317 struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2318 int iodone_w_error = !uptodate;
2319 struct btrfsic_dev_state *dev_state;
2320
2321 BUG_ON(NULL == block);
2322 dev_state = block->dev_state;
2323 if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2324 printk(KERN_INFO
2325 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2326 iodone_w_error,
2327 btrfsic_get_block_type(dev_state->state, block),
2328 (unsigned long long)block->logical_bytenr,
2329 block->dev_state->name,
2330 (unsigned long long)block->dev_bytenr,
2331 block->mirror_num);
2332
2333 block->iodone_w_error = iodone_w_error;
2334 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2335 dev_state->last_flush_gen++;
2336 if ((dev_state->state->print_mask &
2337 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2338 printk(KERN_INFO
2339 "bh_end_io() new %s flush_gen=%llu\n",
2340 dev_state->name,
2341 (unsigned long long)dev_state->last_flush_gen);
2342 }
2343 if (block->submit_bio_bh_rw & REQ_FUA)
2344 block->flush_gen = 0; /* FUA completed means block is on disk */
2345
2346 bh->b_private = block->orig_bio_bh_private;
2347 bh->b_end_io = block->orig_bio_bh_end_io.bh;
2348 block->is_iodone = 1; /* for FLUSH, this releases the block */
2349 bh->b_end_io(bh, uptodate);
2350}
2351
2352static int btrfsic_process_written_superblock(
2353 struct btrfsic_state *state,
2354 struct btrfsic_block *const superblock,
2355 struct btrfs_super_block *const super_hdr)
2356{
2357 int pass;
2358
2359 superblock->generation = btrfs_super_generation(super_hdr);
2360 if (!(superblock->generation > state->max_superblock_generation ||
2361 0 == state->max_superblock_generation)) {
2362 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2363 printk(KERN_INFO
2364 "btrfsic: superblock @%llu (%s/%llu/%d)"
2365 " with old gen %llu <= %llu\n",
2366 (unsigned long long)superblock->logical_bytenr,
2367 superblock->dev_state->name,
2368 (unsigned long long)superblock->dev_bytenr,
2369 superblock->mirror_num,
2370 (unsigned long long)
2371 btrfs_super_generation(super_hdr),
2372 (unsigned long long)
2373 state->max_superblock_generation);
2374 } else {
2375 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2376 printk(KERN_INFO
2377 "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2378 " with new gen %llu > %llu\n",
2379 (unsigned long long)superblock->logical_bytenr,
2380 superblock->dev_state->name,
2381 (unsigned long long)superblock->dev_bytenr,
2382 superblock->mirror_num,
2383 (unsigned long long)
2384 btrfs_super_generation(super_hdr),
2385 (unsigned long long)
2386 state->max_superblock_generation);
2387
2388 state->max_superblock_generation =
2389 btrfs_super_generation(super_hdr);
2390 state->latest_superblock = superblock;
2391 }
2392
2393 for (pass = 0; pass < 3; pass++) {
2394 int ret;
2395 u64 next_bytenr;
2396 struct btrfsic_block *next_block;
2397 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2398 struct btrfsic_block_link *l;
2399 int num_copies;
2400 int mirror_num;
2401 const char *additional_string = NULL;
2402 struct btrfs_disk_key tmp_disk_key;
2403
2404 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
2405 tmp_disk_key.offset = 0;
2406
2407 switch (pass) {
2408 case 0:
2409 tmp_disk_key.objectid =
2410 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
2411 additional_string = "root ";
2412 next_bytenr = btrfs_super_root(super_hdr);
2413 if (state->print_mask &
2414 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2415 printk(KERN_INFO "root@%llu\n",
2416 (unsigned long long)next_bytenr);
2417 break;
2418 case 1:
2419 tmp_disk_key.objectid =
2420 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
2421 additional_string = "chunk ";
2422 next_bytenr = btrfs_super_chunk_root(super_hdr);
2423 if (state->print_mask &
2424 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2425 printk(KERN_INFO "chunk@%llu\n",
2426 (unsigned long long)next_bytenr);
2427 break;
2428 case 2:
2429 tmp_disk_key.objectid =
2430 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
2431 additional_string = "log ";
2432 next_bytenr = btrfs_super_log_root(super_hdr);
2433 if (0 == next_bytenr)
2434 continue;
2435 if (state->print_mask &
2436 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2437 printk(KERN_INFO "log@%llu\n",
2438 (unsigned long long)next_bytenr);
2439 break;
2440 }
2441
2442 num_copies =
2443 btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 2444 next_bytenr, BTRFS_SUPER_INFO_SIZE);
5db02760
SB
2445 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2446 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2447 (unsigned long long)next_bytenr, num_copies);
2448 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2449 int was_created;
2450
2451 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2452 printk(KERN_INFO
2453 "btrfsic_process_written_superblock("
2454 "mirror_num=%d)\n", mirror_num);
e06baab4
SB
2455 ret = btrfsic_map_block(state, next_bytenr,
2456 BTRFS_SUPER_INFO_SIZE,
5db02760
SB
2457 &tmp_next_block_ctx,
2458 mirror_num);
2459 if (ret) {
2460 printk(KERN_INFO
2461 "btrfsic: btrfsic_map_block(@%llu,"
2462 " mirror=%d) failed!\n",
2463 (unsigned long long)next_bytenr,
2464 mirror_num);
2465 return -1;
2466 }
2467
2468 next_block = btrfsic_block_lookup_or_add(
2469 state,
2470 &tmp_next_block_ctx,
2471 additional_string,
2472 1, 0, 1,
2473 mirror_num,
2474 &was_created);
2475 if (NULL == next_block) {
2476 printk(KERN_INFO
2477 "btrfsic: error, kmalloc failed!\n");
2478 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2479 return -1;
2480 }
2481
2482 next_block->disk_key = tmp_disk_key;
2483 if (was_created)
2484 next_block->generation =
2485 BTRFSIC_GENERATION_UNKNOWN;
2486 l = btrfsic_block_link_lookup_or_add(
2487 state,
2488 &tmp_next_block_ctx,
2489 next_block,
2490 superblock,
2491 BTRFSIC_GENERATION_UNKNOWN);
2492 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2493 if (NULL == l)
2494 return -1;
2495 }
2496 }
2497
2498 if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
2499 WARN_ON(1);
2500 btrfsic_dump_tree(state);
2501 }
2502
2503 return 0;
2504}
2505
2506static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2507 struct btrfsic_block *const block,
2508 int recursion_level)
2509{
2510 struct list_head *elem_ref_to;
2511 int ret = 0;
2512
2513 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2514 /*
2515 * Note that this situation can happen and does not
2516 * indicate an error in regular cases. It happens
2517 * when disk blocks are freed and later reused.
2518 * The check-integrity module is not aware of any
2519 * block free operations, it just recognizes block
2520 * write operations. Therefore it keeps the linkage
2521 * information for a block until a block is
2522 * rewritten. This can temporarily cause incorrect
2523 * and even circular linkage informations. This
2524 * causes no harm unless such blocks are referenced
2525 * by the most recent super block.
2526 */
2527 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2528 printk(KERN_INFO
2529 "btrfsic: abort cyclic linkage (case 1).\n");
2530
2531 return ret;
2532 }
2533
2534 /*
2535 * This algorithm is recursive because the amount of used stack
2536 * space is very small and the max recursion depth is limited.
2537 */
2538 list_for_each(elem_ref_to, &block->ref_to_list) {
2539 const struct btrfsic_block_link *const l =
2540 list_entry(elem_ref_to, struct btrfsic_block_link,
2541 node_ref_to);
2542
2543 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2544 printk(KERN_INFO
2545 "rl=%d, %c @%llu (%s/%llu/%d)"
2546 " %u* refers to %c @%llu (%s/%llu/%d)\n",
2547 recursion_level,
2548 btrfsic_get_block_type(state, block),
2549 (unsigned long long)block->logical_bytenr,
2550 block->dev_state->name,
2551 (unsigned long long)block->dev_bytenr,
2552 block->mirror_num,
2553 l->ref_cnt,
2554 btrfsic_get_block_type(state, l->block_ref_to),
2555 (unsigned long long)
2556 l->block_ref_to->logical_bytenr,
2557 l->block_ref_to->dev_state->name,
2558 (unsigned long long)l->block_ref_to->dev_bytenr,
2559 l->block_ref_to->mirror_num);
2560 if (l->block_ref_to->never_written) {
2561 printk(KERN_INFO "btrfs: attempt to write superblock"
2562 " which references block %c @%llu (%s/%llu/%d)"
2563 " which is never written!\n",
2564 btrfsic_get_block_type(state, l->block_ref_to),
2565 (unsigned long long)
2566 l->block_ref_to->logical_bytenr,
2567 l->block_ref_to->dev_state->name,
2568 (unsigned long long)l->block_ref_to->dev_bytenr,
2569 l->block_ref_to->mirror_num);
2570 ret = -1;
2571 } else if (!l->block_ref_to->is_iodone) {
2572 printk(KERN_INFO "btrfs: attempt to write superblock"
2573 " which references block %c @%llu (%s/%llu/%d)"
2574 " which is not yet iodone!\n",
2575 btrfsic_get_block_type(state, l->block_ref_to),
2576 (unsigned long long)
2577 l->block_ref_to->logical_bytenr,
2578 l->block_ref_to->dev_state->name,
2579 (unsigned long long)l->block_ref_to->dev_bytenr,
2580 l->block_ref_to->mirror_num);
2581 ret = -1;
2582 } else if (l->parent_generation !=
2583 l->block_ref_to->generation &&
2584 BTRFSIC_GENERATION_UNKNOWN !=
2585 l->parent_generation &&
2586 BTRFSIC_GENERATION_UNKNOWN !=
2587 l->block_ref_to->generation) {
2588 printk(KERN_INFO "btrfs: attempt to write superblock"
2589 " which references block %c @%llu (%s/%llu/%d)"
2590 " with generation %llu !="
2591 " parent generation %llu!\n",
2592 btrfsic_get_block_type(state, l->block_ref_to),
2593 (unsigned long long)
2594 l->block_ref_to->logical_bytenr,
2595 l->block_ref_to->dev_state->name,
2596 (unsigned long long)l->block_ref_to->dev_bytenr,
2597 l->block_ref_to->mirror_num,
2598 (unsigned long long)l->block_ref_to->generation,
2599 (unsigned long long)l->parent_generation);
2600 ret = -1;
2601 } else if (l->block_ref_to->flush_gen >
2602 l->block_ref_to->dev_state->last_flush_gen) {
2603 printk(KERN_INFO "btrfs: attempt to write superblock"
2604 " which references block %c @%llu (%s/%llu/%d)"
2605 " which is not flushed out of disk's write cache"
2606 " (block flush_gen=%llu,"
2607 " dev->flush_gen=%llu)!\n",
2608 btrfsic_get_block_type(state, l->block_ref_to),
2609 (unsigned long long)
2610 l->block_ref_to->logical_bytenr,
2611 l->block_ref_to->dev_state->name,
2612 (unsigned long long)l->block_ref_to->dev_bytenr,
2613 l->block_ref_to->mirror_num,
2614 (unsigned long long)block->flush_gen,
2615 (unsigned long long)
2616 l->block_ref_to->dev_state->last_flush_gen);
2617 ret = -1;
2618 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2619 l->block_ref_to,
2620 recursion_level +
2621 1)) {
2622 ret = -1;
2623 }
2624 }
2625
2626 return ret;
2627}
2628
2629static int btrfsic_is_block_ref_by_superblock(
2630 const struct btrfsic_state *state,
2631 const struct btrfsic_block *block,
2632 int recursion_level)
2633{
2634 struct list_head *elem_ref_from;
2635
2636 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2637 /* refer to comment at "abort cyclic linkage (case 1)" */
2638 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2639 printk(KERN_INFO
2640 "btrfsic: abort cyclic linkage (case 2).\n");
2641
2642 return 0;
2643 }
2644
2645 /*
2646 * This algorithm is recursive because the amount of used stack space
2647 * is very small and the max recursion depth is limited.
2648 */
2649 list_for_each(elem_ref_from, &block->ref_from_list) {
2650 const struct btrfsic_block_link *const l =
2651 list_entry(elem_ref_from, struct btrfsic_block_link,
2652 node_ref_from);
2653
2654 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2655 printk(KERN_INFO
2656 "rl=%d, %c @%llu (%s/%llu/%d)"
2657 " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2658 recursion_level,
2659 btrfsic_get_block_type(state, block),
2660 (unsigned long long)block->logical_bytenr,
2661 block->dev_state->name,
2662 (unsigned long long)block->dev_bytenr,
2663 block->mirror_num,
2664 l->ref_cnt,
2665 btrfsic_get_block_type(state, l->block_ref_from),
2666 (unsigned long long)
2667 l->block_ref_from->logical_bytenr,
2668 l->block_ref_from->dev_state->name,
2669 (unsigned long long)
2670 l->block_ref_from->dev_bytenr,
2671 l->block_ref_from->mirror_num);
2672 if (l->block_ref_from->is_superblock &&
2673 state->latest_superblock->dev_bytenr ==
2674 l->block_ref_from->dev_bytenr &&
2675 state->latest_superblock->dev_state->bdev ==
2676 l->block_ref_from->dev_state->bdev)
2677 return 1;
2678 else if (btrfsic_is_block_ref_by_superblock(state,
2679 l->block_ref_from,
2680 recursion_level +
2681 1))
2682 return 1;
2683 }
2684
2685 return 0;
2686}
2687
2688static void btrfsic_print_add_link(const struct btrfsic_state *state,
2689 const struct btrfsic_block_link *l)
2690{
2691 printk(KERN_INFO
2692 "Add %u* link from %c @%llu (%s/%llu/%d)"
2693 " to %c @%llu (%s/%llu/%d).\n",
2694 l->ref_cnt,
2695 btrfsic_get_block_type(state, l->block_ref_from),
2696 (unsigned long long)l->block_ref_from->logical_bytenr,
2697 l->block_ref_from->dev_state->name,
2698 (unsigned long long)l->block_ref_from->dev_bytenr,
2699 l->block_ref_from->mirror_num,
2700 btrfsic_get_block_type(state, l->block_ref_to),
2701 (unsigned long long)l->block_ref_to->logical_bytenr,
2702 l->block_ref_to->dev_state->name,
2703 (unsigned long long)l->block_ref_to->dev_bytenr,
2704 l->block_ref_to->mirror_num);
2705}
2706
2707static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2708 const struct btrfsic_block_link *l)
2709{
2710 printk(KERN_INFO
2711 "Rem %u* link from %c @%llu (%s/%llu/%d)"
2712 " to %c @%llu (%s/%llu/%d).\n",
2713 l->ref_cnt,
2714 btrfsic_get_block_type(state, l->block_ref_from),
2715 (unsigned long long)l->block_ref_from->logical_bytenr,
2716 l->block_ref_from->dev_state->name,
2717 (unsigned long long)l->block_ref_from->dev_bytenr,
2718 l->block_ref_from->mirror_num,
2719 btrfsic_get_block_type(state, l->block_ref_to),
2720 (unsigned long long)l->block_ref_to->logical_bytenr,
2721 l->block_ref_to->dev_state->name,
2722 (unsigned long long)l->block_ref_to->dev_bytenr,
2723 l->block_ref_to->mirror_num);
2724}
2725
2726static char btrfsic_get_block_type(const struct btrfsic_state *state,
2727 const struct btrfsic_block *block)
2728{
2729 if (block->is_superblock &&
2730 state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2731 state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2732 return 'S';
2733 else if (block->is_superblock)
2734 return 's';
2735 else if (block->is_metadata)
2736 return 'M';
2737 else
2738 return 'D';
2739}
2740
2741static void btrfsic_dump_tree(const struct btrfsic_state *state)
2742{
2743 btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2744}
2745
2746static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2747 const struct btrfsic_block *block,
2748 int indent_level)
2749{
2750 struct list_head *elem_ref_to;
2751 int indent_add;
2752 static char buf[80];
2753 int cursor_position;
2754
2755 /*
2756 * Should better fill an on-stack buffer with a complete line and
2757 * dump it at once when it is time to print a newline character.
2758 */
2759
2760 /*
2761 * This algorithm is recursive because the amount of used stack space
2762 * is very small and the max recursion depth is limited.
2763 */
2764 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2765 btrfsic_get_block_type(state, block),
2766 (unsigned long long)block->logical_bytenr,
2767 block->dev_state->name,
2768 (unsigned long long)block->dev_bytenr,
2769 block->mirror_num);
2770 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2771 printk("[...]\n");
2772 return;
2773 }
2774 printk(buf);
2775 indent_level += indent_add;
2776 if (list_empty(&block->ref_to_list)) {
2777 printk("\n");
2778 return;
2779 }
2780 if (block->mirror_num > 1 &&
2781 !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2782 printk(" [...]\n");
2783 return;
2784 }
2785
2786 cursor_position = indent_level;
2787 list_for_each(elem_ref_to, &block->ref_to_list) {
2788 const struct btrfsic_block_link *const l =
2789 list_entry(elem_ref_to, struct btrfsic_block_link,
2790 node_ref_to);
2791
2792 while (cursor_position < indent_level) {
2793 printk(" ");
2794 cursor_position++;
2795 }
2796 if (l->ref_cnt > 1)
2797 indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2798 else
2799 indent_add = sprintf(buf, " --> ");
2800 if (indent_level + indent_add >
2801 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2802 printk("[...]\n");
2803 cursor_position = 0;
2804 continue;
2805 }
2806
2807 printk(buf);
2808
2809 btrfsic_dump_tree_sub(state, l->block_ref_to,
2810 indent_level + indent_add);
2811 cursor_position = 0;
2812 }
2813}
2814
2815static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2816 struct btrfsic_state *state,
2817 struct btrfsic_block_data_ctx *next_block_ctx,
2818 struct btrfsic_block *next_block,
2819 struct btrfsic_block *from_block,
2820 u64 parent_generation)
2821{
2822 struct btrfsic_block_link *l;
2823
2824 l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2825 next_block_ctx->dev_bytenr,
2826 from_block->dev_state->bdev,
2827 from_block->dev_bytenr,
2828 &state->block_link_hashtable);
2829 if (NULL == l) {
2830 l = btrfsic_block_link_alloc();
2831 if (NULL == l) {
2832 printk(KERN_INFO
2833 "btrfsic: error, kmalloc" " failed!\n");
2834 return NULL;
2835 }
2836
2837 l->block_ref_to = next_block;
2838 l->block_ref_from = from_block;
2839 l->ref_cnt = 1;
2840 l->parent_generation = parent_generation;
2841
2842 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2843 btrfsic_print_add_link(state, l);
2844
2845 list_add(&l->node_ref_to, &from_block->ref_to_list);
2846 list_add(&l->node_ref_from, &next_block->ref_from_list);
2847
2848 btrfsic_block_link_hashtable_add(l,
2849 &state->block_link_hashtable);
2850 } else {
2851 l->ref_cnt++;
2852 l->parent_generation = parent_generation;
2853 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2854 btrfsic_print_add_link(state, l);
2855 }
2856
2857 return l;
2858}
2859
2860static struct btrfsic_block *btrfsic_block_lookup_or_add(
2861 struct btrfsic_state *state,
2862 struct btrfsic_block_data_ctx *block_ctx,
2863 const char *additional_string,
2864 int is_metadata,
2865 int is_iodone,
2866 int never_written,
2867 int mirror_num,
2868 int *was_created)
2869{
2870 struct btrfsic_block *block;
2871
2872 block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2873 block_ctx->dev_bytenr,
2874 &state->block_hashtable);
2875 if (NULL == block) {
2876 struct btrfsic_dev_state *dev_state;
2877
2878 block = btrfsic_block_alloc();
2879 if (NULL == block) {
2880 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2881 return NULL;
2882 }
2883 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2884 if (NULL == dev_state) {
2885 printk(KERN_INFO
2886 "btrfsic: error, lookup dev_state failed!\n");
2887 btrfsic_block_free(block);
2888 return NULL;
2889 }
2890 block->dev_state = dev_state;
2891 block->dev_bytenr = block_ctx->dev_bytenr;
2892 block->logical_bytenr = block_ctx->start;
2893 block->is_metadata = is_metadata;
2894 block->is_iodone = is_iodone;
2895 block->never_written = never_written;
2896 block->mirror_num = mirror_num;
2897 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2898 printk(KERN_INFO
2899 "New %s%c-block @%llu (%s/%llu/%d)\n",
2900 additional_string,
2901 btrfsic_get_block_type(state, block),
2902 (unsigned long long)block->logical_bytenr,
2903 dev_state->name,
2904 (unsigned long long)block->dev_bytenr,
2905 mirror_num);
2906 list_add(&block->all_blocks_node, &state->all_blocks_list);
2907 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2908 if (NULL != was_created)
2909 *was_created = 1;
2910 } else {
2911 if (NULL != was_created)
2912 *was_created = 0;
2913 }
2914
2915 return block;
2916}
2917
2918static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2919 u64 bytenr,
2920 struct btrfsic_dev_state *dev_state,
e06baab4 2921 u64 dev_bytenr)
5db02760
SB
2922{
2923 int num_copies;
2924 int mirror_num;
2925 int ret;
2926 struct btrfsic_block_data_ctx block_ctx;
2927 int match = 0;
2928
2929 num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 2930 bytenr, state->metablock_size);
5db02760
SB
2931
2932 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
e06baab4 2933 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
5db02760
SB
2934 &block_ctx, mirror_num);
2935 if (ret) {
2936 printk(KERN_INFO "btrfsic:"
2937 " btrfsic_map_block(logical @%llu,"
2938 " mirror %d) failed!\n",
2939 (unsigned long long)bytenr, mirror_num);
2940 continue;
2941 }
2942
2943 if (dev_state->bdev == block_ctx.dev->bdev &&
2944 dev_bytenr == block_ctx.dev_bytenr) {
2945 match++;
2946 btrfsic_release_block_ctx(&block_ctx);
2947 break;
2948 }
2949 btrfsic_release_block_ctx(&block_ctx);
2950 }
2951
2952 if (!match) {
2953 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2954 " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2955 " phys_bytenr=%llu)!\n",
2956 (unsigned long long)bytenr, dev_state->name,
2957 (unsigned long long)dev_bytenr);
2958 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
e06baab4
SB
2959 ret = btrfsic_map_block(state, bytenr,
2960 state->metablock_size,
5db02760
SB
2961 &block_ctx, mirror_num);
2962 if (ret)
2963 continue;
2964
2965 printk(KERN_INFO "Read logical bytenr @%llu maps to"
2966 " (%s/%llu/%d)\n",
2967 (unsigned long long)bytenr,
2968 block_ctx.dev->name,
2969 (unsigned long long)block_ctx.dev_bytenr,
2970 mirror_num);
2971 }
2972 WARN_ON(1);
2973 }
2974}
2975
2976static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
2977 struct block_device *bdev)
2978{
2979 struct btrfsic_dev_state *ds;
2980
2981 ds = btrfsic_dev_state_hashtable_lookup(bdev,
2982 &btrfsic_dev_state_hashtable);
2983 return ds;
2984}
2985
2986int btrfsic_submit_bh(int rw, struct buffer_head *bh)
2987{
2988 struct btrfsic_dev_state *dev_state;
2989
2990 if (!btrfsic_is_initialized)
2991 return submit_bh(rw, bh);
2992
2993 mutex_lock(&btrfsic_mutex);
2994 /* since btrfsic_submit_bh() might also be called before
2995 * btrfsic_mount(), this might return NULL */
2996 dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
2997
2998 /* Only called to write the superblock (incl. FLUSH/FUA) */
2999 if (NULL != dev_state &&
3000 (rw & WRITE) && bh->b_size > 0) {
3001 u64 dev_bytenr;
3002
3003 dev_bytenr = 4096 * bh->b_blocknr;
3004 if (dev_state->state->print_mask &
3005 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3006 printk(KERN_INFO
3007 "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
3008 " size=%lu, data=%p, bdev=%p)\n",
0b485143
SB
3009 rw, (unsigned long)bh->b_blocknr,
3010 (unsigned long long)dev_bytenr,
3011 (unsigned long)bh->b_size, bh->b_data,
3012 bh->b_bdev);
5db02760 3013 btrfsic_process_written_block(dev_state, dev_bytenr,
e06baab4 3014 &bh->b_data, 1, NULL,
5db02760
SB
3015 NULL, bh, rw);
3016 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3017 if (dev_state->state->print_mask &
3018 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3019 printk(KERN_INFO
e06baab4 3020 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
5db02760
SB
3021 rw, bh->b_bdev);
3022 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3023 if ((dev_state->state->print_mask &
3024 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3025 BTRFSIC_PRINT_MASK_VERBOSE)))
3026 printk(KERN_INFO
3027 "btrfsic_submit_bh(%s) with FLUSH"
3028 " but dummy block already in use"
3029 " (ignored)!\n",
3030 dev_state->name);
3031 } else {
3032 struct btrfsic_block *const block =
3033 &dev_state->dummy_block_for_bio_bh_flush;
3034
3035 block->is_iodone = 0;
3036 block->never_written = 0;
3037 block->iodone_w_error = 0;
3038 block->flush_gen = dev_state->last_flush_gen + 1;
3039 block->submit_bio_bh_rw = rw;
3040 block->orig_bio_bh_private = bh->b_private;
3041 block->orig_bio_bh_end_io.bh = bh->b_end_io;
3042 block->next_in_same_bio = NULL;
3043 bh->b_private = block;
3044 bh->b_end_io = btrfsic_bh_end_io;
3045 }
3046 }
3047 mutex_unlock(&btrfsic_mutex);
3048 return submit_bh(rw, bh);
3049}
3050
3051void btrfsic_submit_bio(int rw, struct bio *bio)
3052{
3053 struct btrfsic_dev_state *dev_state;
3054
3055 if (!btrfsic_is_initialized) {
3056 submit_bio(rw, bio);
3057 return;
3058 }
3059
3060 mutex_lock(&btrfsic_mutex);
3061 /* since btrfsic_submit_bio() is also called before
3062 * btrfsic_mount(), this might return NULL */
3063 dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
3064 if (NULL != dev_state &&
3065 (rw & WRITE) && NULL != bio->bi_io_vec) {
3066 unsigned int i;
3067 u64 dev_bytenr;
3068 int bio_is_patched;
e06baab4 3069 char **mapped_datav;
5db02760
SB
3070
3071 dev_bytenr = 512 * bio->bi_sector;
3072 bio_is_patched = 0;
3073 if (dev_state->state->print_mask &
3074 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3075 printk(KERN_INFO
3076 "submit_bio(rw=0x%x, bi_vcnt=%u,"
3077 " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
0b485143 3078 rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
5db02760
SB
3079 (unsigned long long)dev_bytenr,
3080 bio->bi_bdev);
3081
e06baab4
SB
3082 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3083 GFP_NOFS);
3084 if (!mapped_datav)
3085 goto leave;
5db02760 3086 for (i = 0; i < bio->bi_vcnt; i++) {
e06baab4
SB
3087 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
3088 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
3089 if (!mapped_datav[i]) {
3090 while (i > 0) {
3091 i--;
3092 kunmap(bio->bi_io_vec[i].bv_page);
3093 }
3094 kfree(mapped_datav);
3095 goto leave;
3096 }
5db02760
SB
3097 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3098 BTRFSIC_PRINT_MASK_VERBOSE) ==
3099 (dev_state->state->print_mask &
3100 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3101 BTRFSIC_PRINT_MASK_VERBOSE)))
3102 printk(KERN_INFO
e06baab4 3103 "#%u: page=%p, len=%u, offset=%u\n",
5db02760 3104 i, bio->bi_io_vec[i].bv_page,
5db02760
SB
3105 bio->bi_io_vec[i].bv_len,
3106 bio->bi_io_vec[i].bv_offset);
e06baab4
SB
3107 }
3108 btrfsic_process_written_block(dev_state, dev_bytenr,
3109 mapped_datav, bio->bi_vcnt,
3110 bio, &bio_is_patched,
3111 NULL, rw);
3112 while (i > 0) {
3113 i--;
5db02760 3114 kunmap(bio->bi_io_vec[i].bv_page);
5db02760 3115 }
e06baab4 3116 kfree(mapped_datav);
5db02760
SB
3117 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3118 if (dev_state->state->print_mask &
3119 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3120 printk(KERN_INFO
e06baab4 3121 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
5db02760
SB
3122 rw, bio->bi_bdev);
3123 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3124 if ((dev_state->state->print_mask &
3125 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3126 BTRFSIC_PRINT_MASK_VERBOSE)))
3127 printk(KERN_INFO
3128 "btrfsic_submit_bio(%s) with FLUSH"
3129 " but dummy block already in use"
3130 " (ignored)!\n",
3131 dev_state->name);
3132 } else {
3133 struct btrfsic_block *const block =
3134 &dev_state->dummy_block_for_bio_bh_flush;
3135
3136 block->is_iodone = 0;
3137 block->never_written = 0;
3138 block->iodone_w_error = 0;
3139 block->flush_gen = dev_state->last_flush_gen + 1;
3140 block->submit_bio_bh_rw = rw;
3141 block->orig_bio_bh_private = bio->bi_private;
3142 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3143 block->next_in_same_bio = NULL;
3144 bio->bi_private = block;
3145 bio->bi_end_io = btrfsic_bio_end_io;
3146 }
3147 }
e06baab4 3148leave:
5db02760
SB
3149 mutex_unlock(&btrfsic_mutex);
3150
3151 submit_bio(rw, bio);
3152}
3153
3154int btrfsic_mount(struct btrfs_root *root,
3155 struct btrfs_fs_devices *fs_devices,
3156 int including_extent_data, u32 print_mask)
3157{
3158 int ret;
3159 struct btrfsic_state *state;
3160 struct list_head *dev_head = &fs_devices->devices;
3161 struct btrfs_device *device;
3162
e06baab4
SB
3163 if (root->nodesize != root->leafsize) {
3164 printk(KERN_INFO
3165 "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3166 root->nodesize, root->leafsize);
3167 return -1;
3168 }
3169 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3170 printk(KERN_INFO
3171 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3172 root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
3173 return -1;
3174 }
3175 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3176 printk(KERN_INFO
3177 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3178 root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
3179 return -1;
3180 }
3181 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3182 printk(KERN_INFO
3183 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3184 root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
3185 return -1;
3186 }
5db02760
SB
3187 state = kzalloc(sizeof(*state), GFP_NOFS);
3188 if (NULL == state) {
3189 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
3190 return -1;
3191 }
3192
3193 if (!btrfsic_is_initialized) {
3194 mutex_init(&btrfsic_mutex);
3195 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3196 btrfsic_is_initialized = 1;
3197 }
3198 mutex_lock(&btrfsic_mutex);
3199 state->root = root;
3200 state->print_mask = print_mask;
3201 state->include_extent_data = including_extent_data;
3202 state->csum_size = 0;
e06baab4
SB
3203 state->metablock_size = root->nodesize;
3204 state->datablock_size = root->sectorsize;
5db02760
SB
3205 INIT_LIST_HEAD(&state->all_blocks_list);
3206 btrfsic_block_hashtable_init(&state->block_hashtable);
3207 btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3208 state->max_superblock_generation = 0;
3209 state->latest_superblock = NULL;
3210
3211 list_for_each_entry(device, dev_head, dev_list) {
3212 struct btrfsic_dev_state *ds;
3213 char *p;
3214
3215 if (!device->bdev || !device->name)
3216 continue;
3217
3218 ds = btrfsic_dev_state_alloc();
3219 if (NULL == ds) {
3220 printk(KERN_INFO
3221 "btrfs check-integrity: kmalloc() failed!\n");
3222 mutex_unlock(&btrfsic_mutex);
3223 return -1;
3224 }
3225 ds->bdev = device->bdev;
3226 ds->state = state;
3227 bdevname(ds->bdev, ds->name);
3228 ds->name[BDEVNAME_SIZE - 1] = '\0';
3229 for (p = ds->name; *p != '\0'; p++);
3230 while (p > ds->name && *p != '/')
3231 p--;
3232 if (*p == '/')
3233 p++;
3234 strlcpy(ds->name, p, sizeof(ds->name));
3235 btrfsic_dev_state_hashtable_add(ds,
3236 &btrfsic_dev_state_hashtable);
3237 }
3238
3239 ret = btrfsic_process_superblock(state, fs_devices);
3240 if (0 != ret) {
3241 mutex_unlock(&btrfsic_mutex);
3242 btrfsic_unmount(root, fs_devices);
3243 return ret;
3244 }
3245
3246 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3247 btrfsic_dump_database(state);
3248 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3249 btrfsic_dump_tree(state);
3250
3251 mutex_unlock(&btrfsic_mutex);
3252 return 0;
3253}
3254
3255void btrfsic_unmount(struct btrfs_root *root,
3256 struct btrfs_fs_devices *fs_devices)
3257{
3258 struct list_head *elem_all;
3259 struct list_head *tmp_all;
3260 struct btrfsic_state *state;
3261 struct list_head *dev_head = &fs_devices->devices;
3262 struct btrfs_device *device;
3263
3264 if (!btrfsic_is_initialized)
3265 return;
3266
3267 mutex_lock(&btrfsic_mutex);
3268
3269 state = NULL;
3270 list_for_each_entry(device, dev_head, dev_list) {
3271 struct btrfsic_dev_state *ds;
3272
3273 if (!device->bdev || !device->name)
3274 continue;
3275
3276 ds = btrfsic_dev_state_hashtable_lookup(
3277 device->bdev,
3278 &btrfsic_dev_state_hashtable);
3279 if (NULL != ds) {
3280 state = ds->state;
3281 btrfsic_dev_state_hashtable_remove(ds);
3282 btrfsic_dev_state_free(ds);
3283 }
3284 }
3285
3286 if (NULL == state) {
3287 printk(KERN_INFO
3288 "btrfsic: error, cannot find state information"
3289 " on umount!\n");
3290 mutex_unlock(&btrfsic_mutex);
3291 return;
3292 }
3293
3294 /*
3295 * Don't care about keeping the lists' state up to date,
3296 * just free all memory that was allocated dynamically.
3297 * Free the blocks and the block_links.
3298 */
3299 list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
3300 struct btrfsic_block *const b_all =
3301 list_entry(elem_all, struct btrfsic_block,
3302 all_blocks_node);
3303 struct list_head *elem_ref_to;
3304 struct list_head *tmp_ref_to;
3305
3306 list_for_each_safe(elem_ref_to, tmp_ref_to,
3307 &b_all->ref_to_list) {
3308 struct btrfsic_block_link *const l =
3309 list_entry(elem_ref_to,
3310 struct btrfsic_block_link,
3311 node_ref_to);
3312
3313 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3314 btrfsic_print_rem_link(state, l);
3315
3316 l->ref_cnt--;
3317 if (0 == l->ref_cnt)
3318 btrfsic_block_link_free(l);
3319 }
3320
3321 if (b_all->is_iodone)
3322 btrfsic_block_free(b_all);
3323 else
3324 printk(KERN_INFO "btrfs: attempt to free %c-block"
3325 " @%llu (%s/%llu/%d) on umount which is"
3326 " not yet iodone!\n",
3327 btrfsic_get_block_type(state, b_all),
3328 (unsigned long long)b_all->logical_bytenr,
3329 b_all->dev_state->name,
3330 (unsigned long long)b_all->dev_bytenr,
3331 b_all->mirror_num);
3332 }
3333
3334 mutex_unlock(&btrfsic_mutex);
3335
3336 kfree(state);
3337}