Btrfs: fix runtime warning in check-integrity check data mode
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / btrfs / check-integrity.c
CommitLineData
5db02760
SB
1/*
2 * Copyright (C) STRATO AG 2011. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19/*
20 * This module can be used to catch cases when the btrfs kernel
21 * code executes write requests to the disk that bring the file
22 * system in an inconsistent state. In such a state, a power-loss
23 * or kernel panic event would cause that the data on disk is
24 * lost or at least damaged.
25 *
26 * Code is added that examines all block write requests during
27 * runtime (including writes of the super block). Three rules
28 * are verified and an error is printed on violation of the
29 * rules:
30 * 1. It is not allowed to write a disk block which is
31 * currently referenced by the super block (either directly
32 * or indirectly).
33 * 2. When a super block is written, it is verified that all
34 * referenced (directly or indirectly) blocks fulfill the
35 * following requirements:
36 * 2a. All referenced blocks have either been present when
37 * the file system was mounted, (i.e., they have been
38 * referenced by the super block) or they have been
39 * written since then and the write completion callback
40 * was called and a FLUSH request to the device where
41 * these blocks are located was received and completed.
42 * 2b. All referenced blocks need to have a generation
43 * number which is equal to the parent's number.
44 *
45 * One issue that was found using this module was that the log
46 * tree on disk became temporarily corrupted because disk blocks
47 * that had been in use for the log tree had been freed and
48 * reused too early, while being referenced by the written super
49 * block.
50 *
51 * The search term in the kernel log that can be used to filter
52 * on the existence of detected integrity issues is
53 * "btrfs: attempt".
54 *
55 * The integrity check is enabled via mount options. These
56 * mount options are only supported if the integrity check
57 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
58 *
59 * Example #1, apply integrity checks to all metadata:
60 * mount /dev/sdb1 /mnt -o check_int
61 *
62 * Example #2, apply integrity checks to all metadata and
63 * to data extents:
64 * mount /dev/sdb1 /mnt -o check_int_data
65 *
66 * Example #3, apply integrity checks to all metadata and dump
67 * the tree that the super block references to kernel messages
68 * each time after a super block was written:
69 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
70 *
71 * If the integrity check tool is included and activated in
72 * the mount options, plenty of kernel memory is used, and
73 * plenty of additional CPU cycles are spent. Enabling this
74 * functionality is not intended for normal use. In most
75 * cases, unless you are a btrfs developer who needs to verify
76 * the integrity of (super)-block write requests, do not
77 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
78 * include and compile the integrity check tool.
79 */
80
81#include <linux/sched.h>
82#include <linux/slab.h>
83#include <linux/buffer_head.h>
84#include <linux/mutex.h>
85#include <linux/crc32c.h>
86#include <linux/genhd.h>
87#include <linux/blkdev.h>
88#include "ctree.h"
89#include "disk-io.h"
90#include "transaction.h"
91#include "extent_io.h"
5db02760
SB
92#include "volumes.h"
93#include "print-tree.h"
94#include "locking.h"
95#include "check-integrity.h"
96
97#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
98#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
99#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
100#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
101#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
102#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
103#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
104#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
105 * excluding " [...]" */
5db02760
SB
106#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
107
108/*
109 * The definition of the bitmask fields for the print_mask.
110 * They are specified with the mount option check_integrity_print_mask.
111 */
112#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001
113#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002
114#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004
115#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008
116#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010
117#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020
118#define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040
119#define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080
120#define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100
121#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200
122#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
123#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
124#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
125
126struct btrfsic_dev_state;
127struct btrfsic_state;
128
129struct btrfsic_block {
130 u32 magic_num; /* only used for debug purposes */
131 unsigned int is_metadata:1; /* if it is meta-data, not data-data */
132 unsigned int is_superblock:1; /* if it is one of the superblocks */
133 unsigned int is_iodone:1; /* if is done by lower subsystem */
134 unsigned int iodone_w_error:1; /* error was indicated to endio */
135 unsigned int never_written:1; /* block was added because it was
136 * referenced, not because it was
137 * written */
138 unsigned int mirror_num:2; /* large enough to hold
139 * BTRFS_SUPER_MIRROR_MAX */
140 struct btrfsic_dev_state *dev_state;
141 u64 dev_bytenr; /* key, physical byte num on disk */
142 u64 logical_bytenr; /* logical byte num on disk */
143 u64 generation;
144 struct btrfs_disk_key disk_key; /* extra info to print in case of
145 * issues, will not always be correct */
146 struct list_head collision_resolving_node; /* list node */
147 struct list_head all_blocks_node; /* list node */
148
149 /* the following two lists contain block_link items */
150 struct list_head ref_to_list; /* list */
151 struct list_head ref_from_list; /* list */
152 struct btrfsic_block *next_in_same_bio;
153 void *orig_bio_bh_private;
154 union {
155 bio_end_io_t *bio;
156 bh_end_io_t *bh;
157 } orig_bio_bh_end_io;
158 int submit_bio_bh_rw;
159 u64 flush_gen; /* only valid if !never_written */
160};
161
162/*
163 * Elements of this type are allocated dynamically and required because
164 * each block object can refer to and can be ref from multiple blocks.
165 * The key to lookup them in the hashtable is the dev_bytenr of
166 * the block ref to plus the one from the block refered from.
167 * The fact that they are searchable via a hashtable and that a
168 * ref_cnt is maintained is not required for the btrfs integrity
169 * check algorithm itself, it is only used to make the output more
170 * beautiful in case that an error is detected (an error is defined
171 * as a write operation to a block while that block is still referenced).
172 */
173struct btrfsic_block_link {
174 u32 magic_num; /* only used for debug purposes */
175 u32 ref_cnt;
176 struct list_head node_ref_to; /* list node */
177 struct list_head node_ref_from; /* list node */
178 struct list_head collision_resolving_node; /* list node */
179 struct btrfsic_block *block_ref_to;
180 struct btrfsic_block *block_ref_from;
181 u64 parent_generation;
182};
183
184struct btrfsic_dev_state {
185 u32 magic_num; /* only used for debug purposes */
186 struct block_device *bdev;
187 struct btrfsic_state *state;
188 struct list_head collision_resolving_node; /* list node */
189 struct btrfsic_block dummy_block_for_bio_bh_flush;
190 u64 last_flush_gen;
191 char name[BDEVNAME_SIZE];
192};
193
194struct btrfsic_block_hashtable {
195 struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
196};
197
198struct btrfsic_block_link_hashtable {
199 struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
200};
201
202struct btrfsic_dev_state_hashtable {
203 struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
204};
205
206struct btrfsic_block_data_ctx {
207 u64 start; /* virtual bytenr */
208 u64 dev_bytenr; /* physical bytenr on device */
209 u32 len;
210 struct btrfsic_dev_state *dev;
e06baab4
SB
211 char **datav;
212 struct page **pagev;
213 void *mem_to_free;
5db02760
SB
214};
215
216/* This structure is used to implement recursion without occupying
217 * any stack space, refer to btrfsic_process_metablock() */
218struct btrfsic_stack_frame {
219 u32 magic;
220 u32 nr;
221 int error;
222 int i;
223 int limit_nesting;
224 int num_copies;
225 int mirror_num;
226 struct btrfsic_block *block;
227 struct btrfsic_block_data_ctx *block_ctx;
228 struct btrfsic_block *next_block;
229 struct btrfsic_block_data_ctx next_block_ctx;
230 struct btrfs_header *hdr;
231 struct btrfsic_stack_frame *prev;
232};
233
234/* Some state per mounted filesystem */
235struct btrfsic_state {
236 u32 print_mask;
237 int include_extent_data;
238 int csum_size;
239 struct list_head all_blocks_list;
240 struct btrfsic_block_hashtable block_hashtable;
241 struct btrfsic_block_link_hashtable block_link_hashtable;
242 struct btrfs_root *root;
243 u64 max_superblock_generation;
244 struct btrfsic_block *latest_superblock;
e06baab4
SB
245 u32 metablock_size;
246 u32 datablock_size;
5db02760
SB
247};
248
249static void btrfsic_block_init(struct btrfsic_block *b);
250static struct btrfsic_block *btrfsic_block_alloc(void);
251static void btrfsic_block_free(struct btrfsic_block *b);
252static void btrfsic_block_link_init(struct btrfsic_block_link *n);
253static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
254static void btrfsic_block_link_free(struct btrfsic_block_link *n);
255static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
256static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
257static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
258static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
259static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
260 struct btrfsic_block_hashtable *h);
261static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
262static struct btrfsic_block *btrfsic_block_hashtable_lookup(
263 struct block_device *bdev,
264 u64 dev_bytenr,
265 struct btrfsic_block_hashtable *h);
266static void btrfsic_block_link_hashtable_init(
267 struct btrfsic_block_link_hashtable *h);
268static void btrfsic_block_link_hashtable_add(
269 struct btrfsic_block_link *l,
270 struct btrfsic_block_link_hashtable *h);
271static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
272static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
273 struct block_device *bdev_ref_to,
274 u64 dev_bytenr_ref_to,
275 struct block_device *bdev_ref_from,
276 u64 dev_bytenr_ref_from,
277 struct btrfsic_block_link_hashtable *h);
278static void btrfsic_dev_state_hashtable_init(
279 struct btrfsic_dev_state_hashtable *h);
280static void btrfsic_dev_state_hashtable_add(
281 struct btrfsic_dev_state *ds,
282 struct btrfsic_dev_state_hashtable *h);
283static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
284static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
285 struct block_device *bdev,
286 struct btrfsic_dev_state_hashtable *h);
287static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
288static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
289static int btrfsic_process_superblock(struct btrfsic_state *state,
290 struct btrfs_fs_devices *fs_devices);
291static int btrfsic_process_metablock(struct btrfsic_state *state,
292 struct btrfsic_block *block,
293 struct btrfsic_block_data_ctx *block_ctx,
5db02760 294 int limit_nesting, int force_iodone_flag);
e06baab4
SB
295static void btrfsic_read_from_block_data(
296 struct btrfsic_block_data_ctx *block_ctx,
297 void *dst, u32 offset, size_t len);
5db02760
SB
298static int btrfsic_create_link_to_next_block(
299 struct btrfsic_state *state,
300 struct btrfsic_block *block,
301 struct btrfsic_block_data_ctx
302 *block_ctx, u64 next_bytenr,
303 int limit_nesting,
304 struct btrfsic_block_data_ctx *next_block_ctx,
305 struct btrfsic_block **next_blockp,
306 int force_iodone_flag,
307 int *num_copiesp, int *mirror_nump,
308 struct btrfs_disk_key *disk_key,
309 u64 parent_generation);
310static int btrfsic_handle_extent_data(struct btrfsic_state *state,
311 struct btrfsic_block *block,
312 struct btrfsic_block_data_ctx *block_ctx,
313 u32 item_offset, int force_iodone_flag);
314static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
315 struct btrfsic_block_data_ctx *block_ctx_out,
316 int mirror_num);
317static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
318 u32 len, struct block_device *bdev,
319 struct btrfsic_block_data_ctx *block_ctx_out);
320static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
321static int btrfsic_read_block(struct btrfsic_state *state,
322 struct btrfsic_block_data_ctx *block_ctx);
323static void btrfsic_dump_database(struct btrfsic_state *state);
e06baab4 324static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
5db02760 325static int btrfsic_test_for_metadata(struct btrfsic_state *state,
e06baab4 326 char **datav, unsigned int num_pages);
5db02760 327static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
e06baab4
SB
328 u64 dev_bytenr, char **mapped_datav,
329 unsigned int num_pages,
330 struct bio *bio, int *bio_is_patched,
5db02760
SB
331 struct buffer_head *bh,
332 int submit_bio_bh_rw);
333static int btrfsic_process_written_superblock(
334 struct btrfsic_state *state,
335 struct btrfsic_block *const block,
336 struct btrfs_super_block *const super_hdr);
337static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
338static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
339static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
340 const struct btrfsic_block *block,
341 int recursion_level);
342static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
343 struct btrfsic_block *const block,
344 int recursion_level);
345static void btrfsic_print_add_link(const struct btrfsic_state *state,
346 const struct btrfsic_block_link *l);
347static void btrfsic_print_rem_link(const struct btrfsic_state *state,
348 const struct btrfsic_block_link *l);
349static char btrfsic_get_block_type(const struct btrfsic_state *state,
350 const struct btrfsic_block *block);
351static void btrfsic_dump_tree(const struct btrfsic_state *state);
352static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
353 const struct btrfsic_block *block,
354 int indent_level);
355static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
356 struct btrfsic_state *state,
357 struct btrfsic_block_data_ctx *next_block_ctx,
358 struct btrfsic_block *next_block,
359 struct btrfsic_block *from_block,
360 u64 parent_generation);
361static struct btrfsic_block *btrfsic_block_lookup_or_add(
362 struct btrfsic_state *state,
363 struct btrfsic_block_data_ctx *block_ctx,
364 const char *additional_string,
365 int is_metadata,
366 int is_iodone,
367 int never_written,
368 int mirror_num,
369 int *was_created);
370static int btrfsic_process_superblock_dev_mirror(
371 struct btrfsic_state *state,
372 struct btrfsic_dev_state *dev_state,
373 struct btrfs_device *device,
374 int superblock_mirror_num,
375 struct btrfsic_dev_state **selected_dev_state,
376 struct btrfs_super_block *selected_super);
377static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
378 struct block_device *bdev);
379static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
380 u64 bytenr,
381 struct btrfsic_dev_state *dev_state,
e06baab4 382 u64 dev_bytenr);
5db02760
SB
383
384static struct mutex btrfsic_mutex;
385static int btrfsic_is_initialized;
386static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
387
388
389static void btrfsic_block_init(struct btrfsic_block *b)
390{
391 b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
392 b->dev_state = NULL;
393 b->dev_bytenr = 0;
394 b->logical_bytenr = 0;
395 b->generation = BTRFSIC_GENERATION_UNKNOWN;
396 b->disk_key.objectid = 0;
397 b->disk_key.type = 0;
398 b->disk_key.offset = 0;
399 b->is_metadata = 0;
400 b->is_superblock = 0;
401 b->is_iodone = 0;
402 b->iodone_w_error = 0;
403 b->never_written = 0;
404 b->mirror_num = 0;
405 b->next_in_same_bio = NULL;
406 b->orig_bio_bh_private = NULL;
407 b->orig_bio_bh_end_io.bio = NULL;
408 INIT_LIST_HEAD(&b->collision_resolving_node);
409 INIT_LIST_HEAD(&b->all_blocks_node);
410 INIT_LIST_HEAD(&b->ref_to_list);
411 INIT_LIST_HEAD(&b->ref_from_list);
412 b->submit_bio_bh_rw = 0;
413 b->flush_gen = 0;
414}
415
416static struct btrfsic_block *btrfsic_block_alloc(void)
417{
418 struct btrfsic_block *b;
419
420 b = kzalloc(sizeof(*b), GFP_NOFS);
421 if (NULL != b)
422 btrfsic_block_init(b);
423
424 return b;
425}
426
427static void btrfsic_block_free(struct btrfsic_block *b)
428{
429 BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
430 kfree(b);
431}
432
433static void btrfsic_block_link_init(struct btrfsic_block_link *l)
434{
435 l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
436 l->ref_cnt = 1;
437 INIT_LIST_HEAD(&l->node_ref_to);
438 INIT_LIST_HEAD(&l->node_ref_from);
439 INIT_LIST_HEAD(&l->collision_resolving_node);
440 l->block_ref_to = NULL;
441 l->block_ref_from = NULL;
442}
443
444static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
445{
446 struct btrfsic_block_link *l;
447
448 l = kzalloc(sizeof(*l), GFP_NOFS);
449 if (NULL != l)
450 btrfsic_block_link_init(l);
451
452 return l;
453}
454
455static void btrfsic_block_link_free(struct btrfsic_block_link *l)
456{
457 BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
458 kfree(l);
459}
460
461static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
462{
463 ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
464 ds->bdev = NULL;
465 ds->state = NULL;
466 ds->name[0] = '\0';
467 INIT_LIST_HEAD(&ds->collision_resolving_node);
468 ds->last_flush_gen = 0;
469 btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
470 ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
471 ds->dummy_block_for_bio_bh_flush.dev_state = ds;
472}
473
474static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
475{
476 struct btrfsic_dev_state *ds;
477
478 ds = kzalloc(sizeof(*ds), GFP_NOFS);
479 if (NULL != ds)
480 btrfsic_dev_state_init(ds);
481
482 return ds;
483}
484
485static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
486{
487 BUG_ON(!(NULL == ds ||
488 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
489 kfree(ds);
490}
491
492static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
493{
494 int i;
495
496 for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
497 INIT_LIST_HEAD(h->table + i);
498}
499
500static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
501 struct btrfsic_block_hashtable *h)
502{
503 const unsigned int hashval =
504 (((unsigned int)(b->dev_bytenr >> 16)) ^
505 ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
506 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
507
508 list_add(&b->collision_resolving_node, h->table + hashval);
509}
510
511static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
512{
513 list_del(&b->collision_resolving_node);
514}
515
516static struct btrfsic_block *btrfsic_block_hashtable_lookup(
517 struct block_device *bdev,
518 u64 dev_bytenr,
519 struct btrfsic_block_hashtable *h)
520{
521 const unsigned int hashval =
522 (((unsigned int)(dev_bytenr >> 16)) ^
523 ((unsigned int)((uintptr_t)bdev))) &
524 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
525 struct list_head *elem;
526
527 list_for_each(elem, h->table + hashval) {
528 struct btrfsic_block *const b =
529 list_entry(elem, struct btrfsic_block,
530 collision_resolving_node);
531
532 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
533 return b;
534 }
535
536 return NULL;
537}
538
539static void btrfsic_block_link_hashtable_init(
540 struct btrfsic_block_link_hashtable *h)
541{
542 int i;
543
544 for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
545 INIT_LIST_HEAD(h->table + i);
546}
547
548static void btrfsic_block_link_hashtable_add(
549 struct btrfsic_block_link *l,
550 struct btrfsic_block_link_hashtable *h)
551{
552 const unsigned int hashval =
553 (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
554 ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
555 ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
556 ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
557 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
558
559 BUG_ON(NULL == l->block_ref_to);
560 BUG_ON(NULL == l->block_ref_from);
561 list_add(&l->collision_resolving_node, h->table + hashval);
562}
563
564static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
565{
566 list_del(&l->collision_resolving_node);
567}
568
569static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
570 struct block_device *bdev_ref_to,
571 u64 dev_bytenr_ref_to,
572 struct block_device *bdev_ref_from,
573 u64 dev_bytenr_ref_from,
574 struct btrfsic_block_link_hashtable *h)
575{
576 const unsigned int hashval =
577 (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
578 ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
579 ((unsigned int)((uintptr_t)bdev_ref_to)) ^
580 ((unsigned int)((uintptr_t)bdev_ref_from))) &
581 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
582 struct list_head *elem;
583
584 list_for_each(elem, h->table + hashval) {
585 struct btrfsic_block_link *const l =
586 list_entry(elem, struct btrfsic_block_link,
587 collision_resolving_node);
588
589 BUG_ON(NULL == l->block_ref_to);
590 BUG_ON(NULL == l->block_ref_from);
591 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
592 l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
593 l->block_ref_from->dev_state->bdev == bdev_ref_from &&
594 l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
595 return l;
596 }
597
598 return NULL;
599}
600
601static void btrfsic_dev_state_hashtable_init(
602 struct btrfsic_dev_state_hashtable *h)
603{
604 int i;
605
606 for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
607 INIT_LIST_HEAD(h->table + i);
608}
609
610static void btrfsic_dev_state_hashtable_add(
611 struct btrfsic_dev_state *ds,
612 struct btrfsic_dev_state_hashtable *h)
613{
614 const unsigned int hashval =
615 (((unsigned int)((uintptr_t)ds->bdev)) &
616 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
617
618 list_add(&ds->collision_resolving_node, h->table + hashval);
619}
620
621static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
622{
623 list_del(&ds->collision_resolving_node);
624}
625
626static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
627 struct block_device *bdev,
628 struct btrfsic_dev_state_hashtable *h)
629{
630 const unsigned int hashval =
631 (((unsigned int)((uintptr_t)bdev)) &
632 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
633 struct list_head *elem;
634
635 list_for_each(elem, h->table + hashval) {
636 struct btrfsic_dev_state *const ds =
637 list_entry(elem, struct btrfsic_dev_state,
638 collision_resolving_node);
639
640 if (ds->bdev == bdev)
641 return ds;
642 }
643
644 return NULL;
645}
646
647static int btrfsic_process_superblock(struct btrfsic_state *state,
648 struct btrfs_fs_devices *fs_devices)
649{
e77266e4 650 int ret = 0;
5db02760
SB
651 struct btrfs_super_block *selected_super;
652 struct list_head *dev_head = &fs_devices->devices;
653 struct btrfs_device *device;
654 struct btrfsic_dev_state *selected_dev_state = NULL;
655 int pass;
656
657 BUG_ON(NULL == state);
e06baab4 658 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
5db02760
SB
659 if (NULL == selected_super) {
660 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
661 return -1;
662 }
663
664 list_for_each_entry(device, dev_head, dev_list) {
665 int i;
666 struct btrfsic_dev_state *dev_state;
667
668 if (!device->bdev || !device->name)
669 continue;
670
671 dev_state = btrfsic_dev_state_lookup(device->bdev);
672 BUG_ON(NULL == dev_state);
673 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
674 ret = btrfsic_process_superblock_dev_mirror(
675 state, dev_state, device, i,
676 &selected_dev_state, selected_super);
677 if (0 != ret && 0 == i) {
678 kfree(selected_super);
679 return ret;
680 }
681 }
682 }
683
684 if (NULL == state->latest_superblock) {
685 printk(KERN_INFO "btrfsic: no superblock found!\n");
686 kfree(selected_super);
687 return -1;
688 }
689
690 state->csum_size = btrfs_super_csum_size(selected_super);
691
692 for (pass = 0; pass < 3; pass++) {
693 int num_copies;
694 int mirror_num;
695 u64 next_bytenr;
696
697 switch (pass) {
698 case 0:
699 next_bytenr = btrfs_super_root(selected_super);
700 if (state->print_mask &
701 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
702 printk(KERN_INFO "root@%llu\n",
703 (unsigned long long)next_bytenr);
704 break;
705 case 1:
706 next_bytenr = btrfs_super_chunk_root(selected_super);
707 if (state->print_mask &
708 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
709 printk(KERN_INFO "chunk@%llu\n",
710 (unsigned long long)next_bytenr);
711 break;
712 case 2:
713 next_bytenr = btrfs_super_log_root(selected_super);
714 if (0 == next_bytenr)
715 continue;
716 if (state->print_mask &
717 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
718 printk(KERN_INFO "log@%llu\n",
719 (unsigned long long)next_bytenr);
720 break;
721 }
722
723 num_copies =
724 btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 725 next_bytenr, state->metablock_size);
5db02760
SB
726 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
727 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
728 (unsigned long long)next_bytenr, num_copies);
729
730 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
731 struct btrfsic_block *next_block;
732 struct btrfsic_block_data_ctx tmp_next_block_ctx;
733 struct btrfsic_block_link *l;
5db02760 734
e06baab4
SB
735 ret = btrfsic_map_block(state, next_bytenr,
736 state->metablock_size,
5db02760
SB
737 &tmp_next_block_ctx,
738 mirror_num);
739 if (ret) {
740 printk(KERN_INFO "btrfsic:"
741 " btrfsic_map_block(root @%llu,"
742 " mirror %d) failed!\n",
743 (unsigned long long)next_bytenr,
744 mirror_num);
745 kfree(selected_super);
746 return -1;
747 }
748
749 next_block = btrfsic_block_hashtable_lookup(
750 tmp_next_block_ctx.dev->bdev,
751 tmp_next_block_ctx.dev_bytenr,
752 &state->block_hashtable);
753 BUG_ON(NULL == next_block);
754
755 l = btrfsic_block_link_hashtable_lookup(
756 tmp_next_block_ctx.dev->bdev,
757 tmp_next_block_ctx.dev_bytenr,
758 state->latest_superblock->dev_state->
759 bdev,
760 state->latest_superblock->dev_bytenr,
761 &state->block_link_hashtable);
762 BUG_ON(NULL == l);
763
764 ret = btrfsic_read_block(state, &tmp_next_block_ctx);
e06baab4 765 if (ret < (int)PAGE_CACHE_SIZE) {
5db02760
SB
766 printk(KERN_INFO
767 "btrfsic: read @logical %llu failed!\n",
768 (unsigned long long)
769 tmp_next_block_ctx.start);
770 btrfsic_release_block_ctx(&tmp_next_block_ctx);
771 kfree(selected_super);
772 return -1;
773 }
774
5db02760
SB
775 ret = btrfsic_process_metablock(state,
776 next_block,
777 &tmp_next_block_ctx,
5db02760
SB
778 BTRFS_MAX_LEVEL + 3, 1);
779 btrfsic_release_block_ctx(&tmp_next_block_ctx);
780 }
781 }
782
783 kfree(selected_super);
784 return ret;
785}
786
787static int btrfsic_process_superblock_dev_mirror(
788 struct btrfsic_state *state,
789 struct btrfsic_dev_state *dev_state,
790 struct btrfs_device *device,
791 int superblock_mirror_num,
792 struct btrfsic_dev_state **selected_dev_state,
793 struct btrfs_super_block *selected_super)
794{
795 struct btrfs_super_block *super_tmp;
796 u64 dev_bytenr;
797 struct buffer_head *bh;
798 struct btrfsic_block *superblock_tmp;
799 int pass;
800 struct block_device *const superblock_bdev = device->bdev;
801
802 /* super block bytenr is always the unmapped device bytenr */
803 dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
e06baab4
SB
804 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
805 return -1;
806 bh = __bread(superblock_bdev, dev_bytenr / 4096,
807 BTRFS_SUPER_INFO_SIZE);
5db02760
SB
808 if (NULL == bh)
809 return -1;
810 super_tmp = (struct btrfs_super_block *)
811 (bh->b_data + (dev_bytenr & 4095));
812
813 if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
814 strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
815 sizeof(super_tmp->magic)) ||
e06baab4
SB
816 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
817 btrfs_super_nodesize(super_tmp) != state->metablock_size ||
818 btrfs_super_leafsize(super_tmp) != state->metablock_size ||
819 btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
5db02760
SB
820 brelse(bh);
821 return 0;
822 }
823
824 superblock_tmp =
825 btrfsic_block_hashtable_lookup(superblock_bdev,
826 dev_bytenr,
827 &state->block_hashtable);
828 if (NULL == superblock_tmp) {
829 superblock_tmp = btrfsic_block_alloc();
830 if (NULL == superblock_tmp) {
831 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
832 brelse(bh);
833 return -1;
834 }
835 /* for superblock, only the dev_bytenr makes sense */
836 superblock_tmp->dev_bytenr = dev_bytenr;
837 superblock_tmp->dev_state = dev_state;
838 superblock_tmp->logical_bytenr = dev_bytenr;
839 superblock_tmp->generation = btrfs_super_generation(super_tmp);
840 superblock_tmp->is_metadata = 1;
841 superblock_tmp->is_superblock = 1;
842 superblock_tmp->is_iodone = 1;
843 superblock_tmp->never_written = 0;
844 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
845 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
846 printk(KERN_INFO "New initial S-block (bdev %p, %s)"
847 " @%llu (%s/%llu/%d)\n",
848 superblock_bdev, device->name,
849 (unsigned long long)dev_bytenr,
850 dev_state->name,
851 (unsigned long long)dev_bytenr,
852 superblock_mirror_num);
853 list_add(&superblock_tmp->all_blocks_node,
854 &state->all_blocks_list);
855 btrfsic_block_hashtable_add(superblock_tmp,
856 &state->block_hashtable);
857 }
858
859 /* select the one with the highest generation field */
860 if (btrfs_super_generation(super_tmp) >
861 state->max_superblock_generation ||
862 0 == state->max_superblock_generation) {
863 memcpy(selected_super, super_tmp, sizeof(*selected_super));
864 *selected_dev_state = dev_state;
865 state->max_superblock_generation =
866 btrfs_super_generation(super_tmp);
867 state->latest_superblock = superblock_tmp;
868 }
869
870 for (pass = 0; pass < 3; pass++) {
871 u64 next_bytenr;
872 int num_copies;
873 int mirror_num;
874 const char *additional_string = NULL;
875 struct btrfs_disk_key tmp_disk_key;
876
877 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
878 tmp_disk_key.offset = 0;
879 switch (pass) {
880 case 0:
881 tmp_disk_key.objectid =
882 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
883 additional_string = "initial root ";
884 next_bytenr = btrfs_super_root(super_tmp);
885 break;
886 case 1:
887 tmp_disk_key.objectid =
888 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
889 additional_string = "initial chunk ";
890 next_bytenr = btrfs_super_chunk_root(super_tmp);
891 break;
892 case 2:
893 tmp_disk_key.objectid =
894 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
895 additional_string = "initial log ";
896 next_bytenr = btrfs_super_log_root(super_tmp);
897 if (0 == next_bytenr)
898 continue;
899 break;
900 }
901
902 num_copies =
903 btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 904 next_bytenr, state->metablock_size);
5db02760
SB
905 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
906 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
907 (unsigned long long)next_bytenr, num_copies);
908 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
909 struct btrfsic_block *next_block;
910 struct btrfsic_block_data_ctx tmp_next_block_ctx;
911 struct btrfsic_block_link *l;
912
e06baab4
SB
913 if (btrfsic_map_block(state, next_bytenr,
914 state->metablock_size,
5db02760
SB
915 &tmp_next_block_ctx,
916 mirror_num)) {
917 printk(KERN_INFO "btrfsic: btrfsic_map_block("
918 "bytenr @%llu, mirror %d) failed!\n",
919 (unsigned long long)next_bytenr,
920 mirror_num);
921 brelse(bh);
922 return -1;
923 }
924
925 next_block = btrfsic_block_lookup_or_add(
926 state, &tmp_next_block_ctx,
927 additional_string, 1, 1, 0,
928 mirror_num, NULL);
929 if (NULL == next_block) {
930 btrfsic_release_block_ctx(&tmp_next_block_ctx);
931 brelse(bh);
932 return -1;
933 }
934
935 next_block->disk_key = tmp_disk_key;
936 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
937 l = btrfsic_block_link_lookup_or_add(
938 state, &tmp_next_block_ctx,
939 next_block, superblock_tmp,
940 BTRFSIC_GENERATION_UNKNOWN);
941 btrfsic_release_block_ctx(&tmp_next_block_ctx);
942 if (NULL == l) {
943 brelse(bh);
944 return -1;
945 }
946 }
947 }
948 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
949 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
950
951 brelse(bh);
952 return 0;
953}
954
955static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
956{
957 struct btrfsic_stack_frame *sf;
958
959 sf = kzalloc(sizeof(*sf), GFP_NOFS);
960 if (NULL == sf)
961 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
962 else
963 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
964 return sf;
965}
966
967static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
968{
969 BUG_ON(!(NULL == sf ||
970 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
971 kfree(sf);
972}
973
974static int btrfsic_process_metablock(
975 struct btrfsic_state *state,
976 struct btrfsic_block *const first_block,
977 struct btrfsic_block_data_ctx *const first_block_ctx,
5db02760
SB
978 int first_limit_nesting, int force_iodone_flag)
979{
980 struct btrfsic_stack_frame initial_stack_frame = { 0 };
981 struct btrfsic_stack_frame *sf;
982 struct btrfsic_stack_frame *next_stack;
e06baab4
SB
983 struct btrfs_header *const first_hdr =
984 (struct btrfs_header *)first_block_ctx->datav[0];
5db02760 985
e06baab4 986 BUG_ON(!first_hdr);
5db02760
SB
987 sf = &initial_stack_frame;
988 sf->error = 0;
989 sf->i = -1;
990 sf->limit_nesting = first_limit_nesting;
991 sf->block = first_block;
992 sf->block_ctx = first_block_ctx;
993 sf->next_block = NULL;
994 sf->hdr = first_hdr;
995 sf->prev = NULL;
996
997continue_with_new_stack_frame:
998 sf->block->generation = le64_to_cpu(sf->hdr->generation);
999 if (0 == sf->hdr->level) {
1000 struct btrfs_leaf *const leafhdr =
1001 (struct btrfs_leaf *)sf->hdr;
1002
1003 if (-1 == sf->i) {
1004 sf->nr = le32_to_cpu(leafhdr->header.nritems);
1005
1006 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1007 printk(KERN_INFO
1008 "leaf %llu items %d generation %llu"
1009 " owner %llu\n",
1010 (unsigned long long)
1011 sf->block_ctx->start,
1012 sf->nr,
1013 (unsigned long long)
1014 le64_to_cpu(leafhdr->header.generation),
1015 (unsigned long long)
1016 le64_to_cpu(leafhdr->header.owner));
1017 }
1018
1019continue_with_current_leaf_stack_frame:
1020 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1021 sf->i++;
1022 sf->num_copies = 0;
1023 }
1024
1025 if (sf->i < sf->nr) {
e06baab4
SB
1026 struct btrfs_item disk_item;
1027 u32 disk_item_offset =
1028 (uintptr_t)(leafhdr->items + sf->i) -
1029 (uintptr_t)leafhdr;
1030 struct btrfs_disk_key *disk_key;
5db02760 1031 u8 type;
e06baab4 1032 u32 item_offset;
5db02760 1033
e06baab4
SB
1034 if (disk_item_offset + sizeof(struct btrfs_item) >
1035 sf->block_ctx->len) {
1036leaf_item_out_of_bounce_error:
1037 printk(KERN_INFO
1038 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1039 sf->block_ctx->start,
1040 sf->block_ctx->dev->name);
1041 goto one_stack_frame_backwards;
1042 }
1043 btrfsic_read_from_block_data(sf->block_ctx,
1044 &disk_item,
1045 disk_item_offset,
1046 sizeof(struct btrfs_item));
1047 item_offset = le32_to_cpu(disk_item.offset);
1048 disk_key = &disk_item.key;
5db02760
SB
1049 type = disk_key->type;
1050
1051 if (BTRFS_ROOT_ITEM_KEY == type) {
e06baab4
SB
1052 struct btrfs_root_item root_item;
1053 u32 root_item_offset;
1054 u64 next_bytenr;
1055
1056 root_item_offset = item_offset +
1057 offsetof(struct btrfs_leaf, items);
1058 if (root_item_offset +
1059 sizeof(struct btrfs_root_item) >
1060 sf->block_ctx->len)
1061 goto leaf_item_out_of_bounce_error;
1062 btrfsic_read_from_block_data(
1063 sf->block_ctx, &root_item,
1064 root_item_offset,
1065 sizeof(struct btrfs_root_item));
1066 next_bytenr = le64_to_cpu(root_item.bytenr);
5db02760
SB
1067
1068 sf->error =
1069 btrfsic_create_link_to_next_block(
1070 state,
1071 sf->block,
1072 sf->block_ctx,
1073 next_bytenr,
1074 sf->limit_nesting,
1075 &sf->next_block_ctx,
1076 &sf->next_block,
1077 force_iodone_flag,
1078 &sf->num_copies,
1079 &sf->mirror_num,
1080 disk_key,
e06baab4 1081 le64_to_cpu(root_item.
5db02760
SB
1082 generation));
1083 if (sf->error)
1084 goto one_stack_frame_backwards;
1085
1086 if (NULL != sf->next_block) {
1087 struct btrfs_header *const next_hdr =
1088 (struct btrfs_header *)
e06baab4 1089 sf->next_block_ctx.datav[0];
5db02760
SB
1090
1091 next_stack =
1092 btrfsic_stack_frame_alloc();
1093 if (NULL == next_stack) {
1094 btrfsic_release_block_ctx(
1095 &sf->
1096 next_block_ctx);
1097 goto one_stack_frame_backwards;
1098 }
1099
1100 next_stack->i = -1;
1101 next_stack->block = sf->next_block;
1102 next_stack->block_ctx =
1103 &sf->next_block_ctx;
1104 next_stack->next_block = NULL;
1105 next_stack->hdr = next_hdr;
1106 next_stack->limit_nesting =
1107 sf->limit_nesting - 1;
1108 next_stack->prev = sf;
1109 sf = next_stack;
1110 goto continue_with_new_stack_frame;
1111 }
1112 } else if (BTRFS_EXTENT_DATA_KEY == type &&
1113 state->include_extent_data) {
1114 sf->error = btrfsic_handle_extent_data(
1115 state,
1116 sf->block,
1117 sf->block_ctx,
1118 item_offset,
1119 force_iodone_flag);
1120 if (sf->error)
1121 goto one_stack_frame_backwards;
1122 }
1123
1124 goto continue_with_current_leaf_stack_frame;
1125 }
1126 } else {
1127 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1128
1129 if (-1 == sf->i) {
1130 sf->nr = le32_to_cpu(nodehdr->header.nritems);
1131
1132 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1133 printk(KERN_INFO "node %llu level %d items %d"
1134 " generation %llu owner %llu\n",
1135 (unsigned long long)
1136 sf->block_ctx->start,
1137 nodehdr->header.level, sf->nr,
1138 (unsigned long long)
1139 le64_to_cpu(nodehdr->header.generation),
1140 (unsigned long long)
1141 le64_to_cpu(nodehdr->header.owner));
1142 }
1143
1144continue_with_current_node_stack_frame:
1145 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1146 sf->i++;
1147 sf->num_copies = 0;
1148 }
1149
1150 if (sf->i < sf->nr) {
e06baab4
SB
1151 struct btrfs_key_ptr key_ptr;
1152 u32 key_ptr_offset;
1153 u64 next_bytenr;
1154
1155 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1156 (uintptr_t)nodehdr;
1157 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1158 sf->block_ctx->len) {
1159 printk(KERN_INFO
1160 "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1161 sf->block_ctx->start,
1162 sf->block_ctx->dev->name);
1163 goto one_stack_frame_backwards;
1164 }
1165 btrfsic_read_from_block_data(
1166 sf->block_ctx, &key_ptr, key_ptr_offset,
1167 sizeof(struct btrfs_key_ptr));
1168 next_bytenr = le64_to_cpu(key_ptr.blockptr);
5db02760
SB
1169
1170 sf->error = btrfsic_create_link_to_next_block(
1171 state,
1172 sf->block,
1173 sf->block_ctx,
1174 next_bytenr,
1175 sf->limit_nesting,
1176 &sf->next_block_ctx,
1177 &sf->next_block,
1178 force_iodone_flag,
1179 &sf->num_copies,
1180 &sf->mirror_num,
e06baab4
SB
1181 &key_ptr.key,
1182 le64_to_cpu(key_ptr.generation));
5db02760
SB
1183 if (sf->error)
1184 goto one_stack_frame_backwards;
1185
1186 if (NULL != sf->next_block) {
1187 struct btrfs_header *const next_hdr =
1188 (struct btrfs_header *)
e06baab4 1189 sf->next_block_ctx.datav[0];
5db02760
SB
1190
1191 next_stack = btrfsic_stack_frame_alloc();
1192 if (NULL == next_stack)
1193 goto one_stack_frame_backwards;
1194
1195 next_stack->i = -1;
1196 next_stack->block = sf->next_block;
1197 next_stack->block_ctx = &sf->next_block_ctx;
1198 next_stack->next_block = NULL;
1199 next_stack->hdr = next_hdr;
1200 next_stack->limit_nesting =
1201 sf->limit_nesting - 1;
1202 next_stack->prev = sf;
1203 sf = next_stack;
1204 goto continue_with_new_stack_frame;
1205 }
1206
1207 goto continue_with_current_node_stack_frame;
1208 }
1209 }
1210
1211one_stack_frame_backwards:
1212 if (NULL != sf->prev) {
1213 struct btrfsic_stack_frame *const prev = sf->prev;
1214
1215 /* the one for the initial block is freed in the caller */
1216 btrfsic_release_block_ctx(sf->block_ctx);
1217
1218 if (sf->error) {
1219 prev->error = sf->error;
1220 btrfsic_stack_frame_free(sf);
1221 sf = prev;
1222 goto one_stack_frame_backwards;
1223 }
1224
1225 btrfsic_stack_frame_free(sf);
1226 sf = prev;
1227 goto continue_with_new_stack_frame;
1228 } else {
1229 BUG_ON(&initial_stack_frame != sf);
1230 }
1231
1232 return sf->error;
1233}
1234
e06baab4
SB
1235static void btrfsic_read_from_block_data(
1236 struct btrfsic_block_data_ctx *block_ctx,
1237 void *dstv, u32 offset, size_t len)
1238{
1239 size_t cur;
1240 size_t offset_in_page;
1241 char *kaddr;
1242 char *dst = (char *)dstv;
1243 size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1244 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1245
1246 WARN_ON(offset + len > block_ctx->len);
1247 offset_in_page = (start_offset + offset) &
1248 ((unsigned long)PAGE_CACHE_SIZE - 1);
1249
1250 while (len > 0) {
1251 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1252 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
1253 PAGE_CACHE_SHIFT);
1254 kaddr = block_ctx->datav[i];
1255 memcpy(dst, kaddr + offset_in_page, cur);
1256
1257 dst += cur;
1258 len -= cur;
1259 offset_in_page = 0;
1260 i++;
1261 }
1262}
1263
5db02760
SB
1264static int btrfsic_create_link_to_next_block(
1265 struct btrfsic_state *state,
1266 struct btrfsic_block *block,
1267 struct btrfsic_block_data_ctx *block_ctx,
1268 u64 next_bytenr,
1269 int limit_nesting,
1270 struct btrfsic_block_data_ctx *next_block_ctx,
1271 struct btrfsic_block **next_blockp,
1272 int force_iodone_flag,
1273 int *num_copiesp, int *mirror_nump,
1274 struct btrfs_disk_key *disk_key,
1275 u64 parent_generation)
1276{
1277 struct btrfsic_block *next_block = NULL;
1278 int ret;
1279 struct btrfsic_block_link *l;
1280 int did_alloc_block_link;
1281 int block_was_created;
1282
1283 *next_blockp = NULL;
1284 if (0 == *num_copiesp) {
1285 *num_copiesp =
1286 btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 1287 next_bytenr, state->metablock_size);
5db02760
SB
1288 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1289 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1290 (unsigned long long)next_bytenr, *num_copiesp);
1291 *mirror_nump = 1;
1292 }
1293
1294 if (*mirror_nump > *num_copiesp)
1295 return 0;
1296
1297 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1298 printk(KERN_INFO
1299 "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1300 *mirror_nump);
1301 ret = btrfsic_map_block(state, next_bytenr,
e06baab4 1302 state->metablock_size,
5db02760
SB
1303 next_block_ctx, *mirror_nump);
1304 if (ret) {
1305 printk(KERN_INFO
1306 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1307 (unsigned long long)next_bytenr, *mirror_nump);
1308 btrfsic_release_block_ctx(next_block_ctx);
1309 *next_blockp = NULL;
1310 return -1;
1311 }
1312
1313 next_block = btrfsic_block_lookup_or_add(state,
1314 next_block_ctx, "referenced ",
1315 1, force_iodone_flag,
1316 !force_iodone_flag,
1317 *mirror_nump,
1318 &block_was_created);
1319 if (NULL == next_block) {
1320 btrfsic_release_block_ctx(next_block_ctx);
1321 *next_blockp = NULL;
1322 return -1;
1323 }
1324 if (block_was_created) {
1325 l = NULL;
1326 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1327 } else {
1328 if (next_block->logical_bytenr != next_bytenr &&
1329 !(!next_block->is_metadata &&
1330 0 == next_block->logical_bytenr)) {
1331 printk(KERN_INFO
1332 "Referenced block @%llu (%s/%llu/%d)"
1333 " found in hash table, %c,"
1334 " bytenr mismatch (!= stored %llu).\n",
1335 (unsigned long long)next_bytenr,
1336 next_block_ctx->dev->name,
1337 (unsigned long long)next_block_ctx->dev_bytenr,
1338 *mirror_nump,
1339 btrfsic_get_block_type(state, next_block),
1340 (unsigned long long)next_block->logical_bytenr);
1341 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1342 printk(KERN_INFO
1343 "Referenced block @%llu (%s/%llu/%d)"
1344 " found in hash table, %c.\n",
1345 (unsigned long long)next_bytenr,
1346 next_block_ctx->dev->name,
1347 (unsigned long long)next_block_ctx->dev_bytenr,
1348 *mirror_nump,
1349 btrfsic_get_block_type(state, next_block));
1350 next_block->logical_bytenr = next_bytenr;
1351
1352 next_block->mirror_num = *mirror_nump;
1353 l = btrfsic_block_link_hashtable_lookup(
1354 next_block_ctx->dev->bdev,
1355 next_block_ctx->dev_bytenr,
1356 block_ctx->dev->bdev,
1357 block_ctx->dev_bytenr,
1358 &state->block_link_hashtable);
1359 }
1360
1361 next_block->disk_key = *disk_key;
1362 if (NULL == l) {
1363 l = btrfsic_block_link_alloc();
1364 if (NULL == l) {
1365 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1366 btrfsic_release_block_ctx(next_block_ctx);
1367 *next_blockp = NULL;
1368 return -1;
1369 }
1370
1371 did_alloc_block_link = 1;
1372 l->block_ref_to = next_block;
1373 l->block_ref_from = block;
1374 l->ref_cnt = 1;
1375 l->parent_generation = parent_generation;
1376
1377 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1378 btrfsic_print_add_link(state, l);
1379
1380 list_add(&l->node_ref_to, &block->ref_to_list);
1381 list_add(&l->node_ref_from, &next_block->ref_from_list);
1382
1383 btrfsic_block_link_hashtable_add(l,
1384 &state->block_link_hashtable);
1385 } else {
1386 did_alloc_block_link = 0;
1387 if (0 == limit_nesting) {
1388 l->ref_cnt++;
1389 l->parent_generation = parent_generation;
1390 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1391 btrfsic_print_add_link(state, l);
1392 }
1393 }
1394
1395 if (limit_nesting > 0 && did_alloc_block_link) {
1396 ret = btrfsic_read_block(state, next_block_ctx);
e06baab4 1397 if (ret < (int)next_block_ctx->len) {
5db02760
SB
1398 printk(KERN_INFO
1399 "btrfsic: read block @logical %llu failed!\n",
1400 (unsigned long long)next_bytenr);
1401 btrfsic_release_block_ctx(next_block_ctx);
1402 *next_blockp = NULL;
1403 return -1;
1404 }
1405
1406 *next_blockp = next_block;
1407 } else {
1408 *next_blockp = NULL;
1409 }
1410 (*mirror_nump)++;
1411
1412 return 0;
1413}
1414
1415static int btrfsic_handle_extent_data(
1416 struct btrfsic_state *state,
1417 struct btrfsic_block *block,
1418 struct btrfsic_block_data_ctx *block_ctx,
1419 u32 item_offset, int force_iodone_flag)
1420{
1421 int ret;
e06baab4
SB
1422 struct btrfs_file_extent_item file_extent_item;
1423 u64 file_extent_item_offset;
1424 u64 next_bytenr;
1425 u64 num_bytes;
1426 u64 generation;
5db02760
SB
1427 struct btrfsic_block_link *l;
1428
e06baab4
SB
1429 file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1430 item_offset;
86ff7ffc
SB
1431 if (file_extent_item_offset +
1432 offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1433 block_ctx->len) {
1434 printk(KERN_INFO
1435 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1436 block_ctx->start, block_ctx->dev->name);
1437 return -1;
1438 }
1439
1440 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1441 file_extent_item_offset,
1442 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1443 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1444 ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
1445 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1446 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1447 file_extent_item.type,
1448 (unsigned long long)
1449 le64_to_cpu(file_extent_item.disk_bytenr));
1450 return 0;
1451 }
1452
e06baab4
SB
1453 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1454 block_ctx->len) {
1455 printk(KERN_INFO
1456 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1457 block_ctx->start, block_ctx->dev->name);
1458 return -1;
1459 }
1460 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1461 file_extent_item_offset,
1462 sizeof(struct btrfs_file_extent_item));
1463 next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
1464 le64_to_cpu(file_extent_item.offset);
1465 generation = le64_to_cpu(file_extent_item.generation);
1466 num_bytes = le64_to_cpu(file_extent_item.num_bytes);
1467 generation = le64_to_cpu(file_extent_item.generation);
1468
5db02760
SB
1469 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1470 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1471 " offset = %llu, num_bytes = %llu\n",
e06baab4 1472 file_extent_item.type,
5db02760 1473 (unsigned long long)
e06baab4
SB
1474 le64_to_cpu(file_extent_item.disk_bytenr),
1475 (unsigned long long)le64_to_cpu(file_extent_item.offset),
1476 (unsigned long long)num_bytes);
5db02760
SB
1477 while (num_bytes > 0) {
1478 u32 chunk_len;
1479 int num_copies;
1480 int mirror_num;
1481
e06baab4
SB
1482 if (num_bytes > state->datablock_size)
1483 chunk_len = state->datablock_size;
5db02760
SB
1484 else
1485 chunk_len = num_bytes;
1486
1487 num_copies =
1488 btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 1489 next_bytenr, state->datablock_size);
5db02760
SB
1490 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1491 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1492 (unsigned long long)next_bytenr, num_copies);
1493 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1494 struct btrfsic_block_data_ctx next_block_ctx;
1495 struct btrfsic_block *next_block;
1496 int block_was_created;
1497
1498 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1499 printk(KERN_INFO "btrfsic_handle_extent_data("
1500 "mirror_num=%d)\n", mirror_num);
1501 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1502 printk(KERN_INFO
1503 "\tdisk_bytenr = %llu, num_bytes %u\n",
1504 (unsigned long long)next_bytenr,
1505 chunk_len);
1506 ret = btrfsic_map_block(state, next_bytenr,
1507 chunk_len, &next_block_ctx,
1508 mirror_num);
1509 if (ret) {
1510 printk(KERN_INFO
1511 "btrfsic: btrfsic_map_block(@%llu,"
1512 " mirror=%d) failed!\n",
1513 (unsigned long long)next_bytenr,
1514 mirror_num);
1515 return -1;
1516 }
1517
1518 next_block = btrfsic_block_lookup_or_add(
1519 state,
1520 &next_block_ctx,
1521 "referenced ",
1522 0,
1523 force_iodone_flag,
1524 !force_iodone_flag,
1525 mirror_num,
1526 &block_was_created);
1527 if (NULL == next_block) {
1528 printk(KERN_INFO
1529 "btrfsic: error, kmalloc failed!\n");
1530 btrfsic_release_block_ctx(&next_block_ctx);
1531 return -1;
1532 }
1533 if (!block_was_created) {
1534 if (next_block->logical_bytenr != next_bytenr &&
1535 !(!next_block->is_metadata &&
1536 0 == next_block->logical_bytenr)) {
1537 printk(KERN_INFO
1538 "Referenced block"
1539 " @%llu (%s/%llu/%d)"
1540 " found in hash table, D,"
1541 " bytenr mismatch"
1542 " (!= stored %llu).\n",
1543 (unsigned long long)next_bytenr,
1544 next_block_ctx.dev->name,
1545 (unsigned long long)
1546 next_block_ctx.dev_bytenr,
1547 mirror_num,
1548 (unsigned long long)
1549 next_block->logical_bytenr);
1550 }
1551 next_block->logical_bytenr = next_bytenr;
1552 next_block->mirror_num = mirror_num;
1553 }
1554
1555 l = btrfsic_block_link_lookup_or_add(state,
1556 &next_block_ctx,
1557 next_block, block,
1558 generation);
1559 btrfsic_release_block_ctx(&next_block_ctx);
1560 if (NULL == l)
1561 return -1;
1562 }
1563
1564 next_bytenr += chunk_len;
1565 num_bytes -= chunk_len;
1566 }
1567
1568 return 0;
1569}
1570
1571static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1572 struct btrfsic_block_data_ctx *block_ctx_out,
1573 int mirror_num)
1574{
1575 int ret;
1576 u64 length;
1577 struct btrfs_bio *multi = NULL;
1578 struct btrfs_device *device;
1579
1580 length = len;
1581 ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
1582 bytenr, &length, &multi, mirror_num);
1583
1584 device = multi->stripes[0].dev;
1585 block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1586 block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1587 block_ctx_out->start = bytenr;
1588 block_ctx_out->len = len;
e06baab4
SB
1589 block_ctx_out->datav = NULL;
1590 block_ctx_out->pagev = NULL;
1591 block_ctx_out->mem_to_free = NULL;
5db02760
SB
1592
1593 if (0 == ret)
1594 kfree(multi);
1595 if (NULL == block_ctx_out->dev) {
1596 ret = -ENXIO;
1597 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1598 }
1599
1600 return ret;
1601}
1602
1603static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1604 u32 len, struct block_device *bdev,
1605 struct btrfsic_block_data_ctx *block_ctx_out)
1606{
1607 block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
1608 block_ctx_out->dev_bytenr = bytenr;
1609 block_ctx_out->start = bytenr;
1610 block_ctx_out->len = len;
e06baab4
SB
1611 block_ctx_out->datav = NULL;
1612 block_ctx_out->pagev = NULL;
1613 block_ctx_out->mem_to_free = NULL;
5db02760
SB
1614 if (NULL != block_ctx_out->dev) {
1615 return 0;
1616 } else {
1617 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
1618 return -ENXIO;
1619 }
1620}
1621
1622static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1623{
e06baab4
SB
1624 if (block_ctx->mem_to_free) {
1625 unsigned int num_pages;
1626
1627 BUG_ON(!block_ctx->datav);
1628 BUG_ON(!block_ctx->pagev);
1629 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1630 PAGE_CACHE_SHIFT;
1631 while (num_pages > 0) {
1632 num_pages--;
1633 if (block_ctx->datav[num_pages]) {
1634 kunmap(block_ctx->pagev[num_pages]);
1635 block_ctx->datav[num_pages] = NULL;
1636 }
1637 if (block_ctx->pagev[num_pages]) {
1638 __free_page(block_ctx->pagev[num_pages]);
1639 block_ctx->pagev[num_pages] = NULL;
1640 }
1641 }
1642
1643 kfree(block_ctx->mem_to_free);
1644 block_ctx->mem_to_free = NULL;
1645 block_ctx->pagev = NULL;
1646 block_ctx->datav = NULL;
5db02760
SB
1647 }
1648}
1649
1650static int btrfsic_read_block(struct btrfsic_state *state,
1651 struct btrfsic_block_data_ctx *block_ctx)
1652{
e06baab4
SB
1653 unsigned int num_pages;
1654 unsigned int i;
1655 u64 dev_bytenr;
1656 int ret;
1657
1658 BUG_ON(block_ctx->datav);
1659 BUG_ON(block_ctx->pagev);
1660 BUG_ON(block_ctx->mem_to_free);
1661 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
5db02760
SB
1662 printk(KERN_INFO
1663 "btrfsic: read_block() with unaligned bytenr %llu\n",
1664 (unsigned long long)block_ctx->dev_bytenr);
1665 return -1;
1666 }
e06baab4
SB
1667
1668 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1669 PAGE_CACHE_SHIFT;
1670 block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1671 sizeof(*block_ctx->pagev)) *
1672 num_pages, GFP_NOFS);
1673 if (!block_ctx->mem_to_free)
5db02760 1674 return -1;
e06baab4
SB
1675 block_ctx->datav = block_ctx->mem_to_free;
1676 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1677 for (i = 0; i < num_pages; i++) {
1678 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1679 if (!block_ctx->pagev[i])
1680 return -1;
5db02760
SB
1681 }
1682
e06baab4
SB
1683 dev_bytenr = block_ctx->dev_bytenr;
1684 for (i = 0; i < num_pages;) {
1685 struct bio *bio;
1686 unsigned int j;
1687 DECLARE_COMPLETION_ONSTACK(complete);
1688
1689 bio = bio_alloc(GFP_NOFS, num_pages - i);
1690 if (!bio) {
1691 printk(KERN_INFO
1692 "btrfsic: bio_alloc() for %u pages failed!\n",
1693 num_pages - i);
1694 return -1;
1695 }
1696 bio->bi_bdev = block_ctx->dev->bdev;
1697 bio->bi_sector = dev_bytenr >> 9;
1698 bio->bi_end_io = btrfsic_complete_bio_end_io;
1699 bio->bi_private = &complete;
1700
1701 for (j = i; j < num_pages; j++) {
1702 ret = bio_add_page(bio, block_ctx->pagev[j],
1703 PAGE_CACHE_SIZE, 0);
1704 if (PAGE_CACHE_SIZE != ret)
1705 break;
1706 }
1707 if (j == i) {
1708 printk(KERN_INFO
1709 "btrfsic: error, failed to add a single page!\n");
1710 return -1;
1711 }
1712 submit_bio(READ, bio);
1713
1714 /* this will also unplug the queue */
1715 wait_for_completion(&complete);
1716
1717 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1718 printk(KERN_INFO
1719 "btrfsic: read error at logical %llu dev %s!\n",
1720 block_ctx->start, block_ctx->dev->name);
1721 bio_put(bio);
1722 return -1;
1723 }
1724 bio_put(bio);
1725 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1726 i = j;
1727 }
1728 for (i = 0; i < num_pages; i++) {
1729 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1730 if (!block_ctx->datav[i]) {
1731 printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1732 block_ctx->dev->name);
1733 return -1;
1734 }
1735 }
5db02760
SB
1736
1737 return block_ctx->len;
1738}
1739
e06baab4
SB
1740static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1741{
1742 complete((struct completion *)bio->bi_private);
1743}
1744
5db02760
SB
1745static void btrfsic_dump_database(struct btrfsic_state *state)
1746{
1747 struct list_head *elem_all;
1748
1749 BUG_ON(NULL == state);
1750
1751 printk(KERN_INFO "all_blocks_list:\n");
1752 list_for_each(elem_all, &state->all_blocks_list) {
1753 const struct btrfsic_block *const b_all =
1754 list_entry(elem_all, struct btrfsic_block,
1755 all_blocks_node);
1756 struct list_head *elem_ref_to;
1757 struct list_head *elem_ref_from;
1758
1759 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1760 btrfsic_get_block_type(state, b_all),
1761 (unsigned long long)b_all->logical_bytenr,
1762 b_all->dev_state->name,
1763 (unsigned long long)b_all->dev_bytenr,
1764 b_all->mirror_num);
1765
1766 list_for_each(elem_ref_to, &b_all->ref_to_list) {
1767 const struct btrfsic_block_link *const l =
1768 list_entry(elem_ref_to,
1769 struct btrfsic_block_link,
1770 node_ref_to);
1771
1772 printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1773 " refers %u* to"
1774 " %c @%llu (%s/%llu/%d)\n",
1775 btrfsic_get_block_type(state, b_all),
1776 (unsigned long long)b_all->logical_bytenr,
1777 b_all->dev_state->name,
1778 (unsigned long long)b_all->dev_bytenr,
1779 b_all->mirror_num,
1780 l->ref_cnt,
1781 btrfsic_get_block_type(state, l->block_ref_to),
1782 (unsigned long long)
1783 l->block_ref_to->logical_bytenr,
1784 l->block_ref_to->dev_state->name,
1785 (unsigned long long)l->block_ref_to->dev_bytenr,
1786 l->block_ref_to->mirror_num);
1787 }
1788
1789 list_for_each(elem_ref_from, &b_all->ref_from_list) {
1790 const struct btrfsic_block_link *const l =
1791 list_entry(elem_ref_from,
1792 struct btrfsic_block_link,
1793 node_ref_from);
1794
1795 printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1796 " is ref %u* from"
1797 " %c @%llu (%s/%llu/%d)\n",
1798 btrfsic_get_block_type(state, b_all),
1799 (unsigned long long)b_all->logical_bytenr,
1800 b_all->dev_state->name,
1801 (unsigned long long)b_all->dev_bytenr,
1802 b_all->mirror_num,
1803 l->ref_cnt,
1804 btrfsic_get_block_type(state, l->block_ref_from),
1805 (unsigned long long)
1806 l->block_ref_from->logical_bytenr,
1807 l->block_ref_from->dev_state->name,
1808 (unsigned long long)
1809 l->block_ref_from->dev_bytenr,
1810 l->block_ref_from->mirror_num);
1811 }
1812
1813 printk(KERN_INFO "\n");
1814 }
1815}
1816
1817/*
1818 * Test whether the disk block contains a tree block (leaf or node)
1819 * (note that this test fails for the super block)
1820 */
1821static int btrfsic_test_for_metadata(struct btrfsic_state *state,
e06baab4 1822 char **datav, unsigned int num_pages)
5db02760
SB
1823{
1824 struct btrfs_header *h;
1825 u8 csum[BTRFS_CSUM_SIZE];
1826 u32 crc = ~(u32)0;
e06baab4 1827 unsigned int i;
5db02760 1828
e06baab4
SB
1829 if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1830 return 1; /* not metadata */
1831 num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1832 h = (struct btrfs_header *)datav[0];
5db02760
SB
1833
1834 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
e06baab4 1835 return 1;
5db02760 1836
e06baab4
SB
1837 for (i = 0; i < num_pages; i++) {
1838 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1839 size_t sublen = i ? PAGE_CACHE_SIZE :
1840 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1841
1842 crc = crc32c(crc, data, sublen);
1843 }
5db02760
SB
1844 btrfs_csum_final(crc, csum);
1845 if (memcmp(csum, h->csum, state->csum_size))
e06baab4 1846 return 1;
5db02760 1847
e06baab4 1848 return 0; /* is metadata */
5db02760
SB
1849}
1850
1851static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
e06baab4
SB
1852 u64 dev_bytenr, char **mapped_datav,
1853 unsigned int num_pages,
1854 struct bio *bio, int *bio_is_patched,
5db02760
SB
1855 struct buffer_head *bh,
1856 int submit_bio_bh_rw)
1857{
1858 int is_metadata;
1859 struct btrfsic_block *block;
1860 struct btrfsic_block_data_ctx block_ctx;
1861 int ret;
1862 struct btrfsic_state *state = dev_state->state;
1863 struct block_device *bdev = dev_state->bdev;
e06baab4 1864 unsigned int processed_len;
5db02760 1865
5db02760
SB
1866 if (NULL != bio_is_patched)
1867 *bio_is_patched = 0;
1868
e06baab4
SB
1869again:
1870 if (num_pages == 0)
1871 return;
1872
1873 processed_len = 0;
1874 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1875 num_pages));
1876
5db02760
SB
1877 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1878 &state->block_hashtable);
1879 if (NULL != block) {
0b485143 1880 u64 bytenr = 0;
5db02760
SB
1881 struct list_head *elem_ref_to;
1882 struct list_head *tmp_ref_to;
1883
1884 if (block->is_superblock) {
1885 bytenr = le64_to_cpu(((struct btrfs_super_block *)
e06baab4
SB
1886 mapped_datav[0])->bytenr);
1887 if (num_pages * PAGE_CACHE_SIZE <
1888 BTRFS_SUPER_INFO_SIZE) {
1889 printk(KERN_INFO
1890 "btrfsic: cannot work with too short bios!\n");
1891 return;
1892 }
5db02760 1893 is_metadata = 1;
e06baab4
SB
1894 BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1895 processed_len = BTRFS_SUPER_INFO_SIZE;
5db02760
SB
1896 if (state->print_mask &
1897 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1898 printk(KERN_INFO
1899 "[before new superblock is written]:\n");
1900 btrfsic_dump_tree_sub(state, block, 0);
1901 }
1902 }
1903 if (is_metadata) {
1904 if (!block->is_superblock) {
e06baab4
SB
1905 if (num_pages * PAGE_CACHE_SIZE <
1906 state->metablock_size) {
1907 printk(KERN_INFO
1908 "btrfsic: cannot work with too short bios!\n");
1909 return;
1910 }
1911 processed_len = state->metablock_size;
5db02760 1912 bytenr = le64_to_cpu(((struct btrfs_header *)
e06baab4 1913 mapped_datav[0])->bytenr);
5db02760
SB
1914 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1915 dev_state,
e06baab4 1916 dev_bytenr);
5db02760
SB
1917 }
1918 if (block->logical_bytenr != bytenr) {
1919 printk(KERN_INFO
1920 "Written block @%llu (%s/%llu/%d)"
1921 " found in hash table, %c,"
1922 " bytenr mismatch"
1923 " (!= stored %llu).\n",
1924 (unsigned long long)bytenr,
1925 dev_state->name,
1926 (unsigned long long)dev_bytenr,
1927 block->mirror_num,
1928 btrfsic_get_block_type(state, block),
1929 (unsigned long long)
1930 block->logical_bytenr);
1931 block->logical_bytenr = bytenr;
1932 } else if (state->print_mask &
1933 BTRFSIC_PRINT_MASK_VERBOSE)
1934 printk(KERN_INFO
1935 "Written block @%llu (%s/%llu/%d)"
1936 " found in hash table, %c.\n",
1937 (unsigned long long)bytenr,
1938 dev_state->name,
1939 (unsigned long long)dev_bytenr,
1940 block->mirror_num,
1941 btrfsic_get_block_type(state, block));
1942 } else {
e06baab4
SB
1943 if (num_pages * PAGE_CACHE_SIZE <
1944 state->datablock_size) {
1945 printk(KERN_INFO
1946 "btrfsic: cannot work with too short bios!\n");
1947 return;
1948 }
1949 processed_len = state->datablock_size;
5db02760
SB
1950 bytenr = block->logical_bytenr;
1951 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1952 printk(KERN_INFO
1953 "Written block @%llu (%s/%llu/%d)"
1954 " found in hash table, %c.\n",
1955 (unsigned long long)bytenr,
1956 dev_state->name,
1957 (unsigned long long)dev_bytenr,
1958 block->mirror_num,
1959 btrfsic_get_block_type(state, block));
1960 }
1961
1962 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1963 printk(KERN_INFO
1964 "ref_to_list: %cE, ref_from_list: %cE\n",
1965 list_empty(&block->ref_to_list) ? ' ' : '!',
1966 list_empty(&block->ref_from_list) ? ' ' : '!');
1967 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1968 printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1969 " @%llu (%s/%llu/%d), old(gen=%llu,"
1970 " objectid=%llu, type=%d, offset=%llu),"
1971 " new(gen=%llu),"
1972 " which is referenced by most recent superblock"
1973 " (superblockgen=%llu)!\n",
1974 btrfsic_get_block_type(state, block),
1975 (unsigned long long)bytenr,
1976 dev_state->name,
1977 (unsigned long long)dev_bytenr,
1978 block->mirror_num,
1979 (unsigned long long)block->generation,
1980 (unsigned long long)
1981 le64_to_cpu(block->disk_key.objectid),
1982 block->disk_key.type,
1983 (unsigned long long)
1984 le64_to_cpu(block->disk_key.offset),
1985 (unsigned long long)
1986 le64_to_cpu(((struct btrfs_header *)
e06baab4 1987 mapped_datav[0])->generation),
5db02760
SB
1988 (unsigned long long)
1989 state->max_superblock_generation);
1990 btrfsic_dump_tree(state);
1991 }
1992
1993 if (!block->is_iodone && !block->never_written) {
1994 printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1995 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1996 " which is not yet iodone!\n",
1997 btrfsic_get_block_type(state, block),
1998 (unsigned long long)bytenr,
1999 dev_state->name,
2000 (unsigned long long)dev_bytenr,
2001 block->mirror_num,
2002 (unsigned long long)block->generation,
2003 (unsigned long long)
2004 le64_to_cpu(((struct btrfs_header *)
e06baab4 2005 mapped_datav[0])->generation));
5db02760
SB
2006 /* it would not be safe to go on */
2007 btrfsic_dump_tree(state);
e06baab4 2008 goto continue_loop;
5db02760
SB
2009 }
2010
2011 /*
2012 * Clear all references of this block. Do not free
2013 * the block itself even if is not referenced anymore
2014 * because it still carries valueable information
2015 * like whether it was ever written and IO completed.
2016 */
2017 list_for_each_safe(elem_ref_to, tmp_ref_to,
2018 &block->ref_to_list) {
2019 struct btrfsic_block_link *const l =
2020 list_entry(elem_ref_to,
2021 struct btrfsic_block_link,
2022 node_ref_to);
2023
2024 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2025 btrfsic_print_rem_link(state, l);
2026 l->ref_cnt--;
2027 if (0 == l->ref_cnt) {
2028 list_del(&l->node_ref_to);
2029 list_del(&l->node_ref_from);
2030 btrfsic_block_link_hashtable_remove(l);
2031 btrfsic_block_link_free(l);
2032 }
2033 }
2034
2035 if (block->is_superblock)
e06baab4
SB
2036 ret = btrfsic_map_superblock(state, bytenr,
2037 processed_len,
5db02760
SB
2038 bdev, &block_ctx);
2039 else
e06baab4 2040 ret = btrfsic_map_block(state, bytenr, processed_len,
5db02760
SB
2041 &block_ctx, 0);
2042 if (ret) {
2043 printk(KERN_INFO
2044 "btrfsic: btrfsic_map_block(root @%llu)"
2045 " failed!\n", (unsigned long long)bytenr);
e06baab4 2046 goto continue_loop;
5db02760 2047 }
e06baab4 2048 block_ctx.datav = mapped_datav;
5db02760
SB
2049 /* the following is required in case of writes to mirrors,
2050 * use the same that was used for the lookup */
2051 block_ctx.dev = dev_state;
2052 block_ctx.dev_bytenr = dev_bytenr;
2053
2054 if (is_metadata || state->include_extent_data) {
2055 block->never_written = 0;
2056 block->iodone_w_error = 0;
2057 if (NULL != bio) {
2058 block->is_iodone = 0;
2059 BUG_ON(NULL == bio_is_patched);
2060 if (!*bio_is_patched) {
2061 block->orig_bio_bh_private =
2062 bio->bi_private;
2063 block->orig_bio_bh_end_io.bio =
2064 bio->bi_end_io;
2065 block->next_in_same_bio = NULL;
2066 bio->bi_private = block;
2067 bio->bi_end_io = btrfsic_bio_end_io;
2068 *bio_is_patched = 1;
2069 } else {
2070 struct btrfsic_block *chained_block =
2071 (struct btrfsic_block *)
2072 bio->bi_private;
2073
2074 BUG_ON(NULL == chained_block);
2075 block->orig_bio_bh_private =
2076 chained_block->orig_bio_bh_private;
2077 block->orig_bio_bh_end_io.bio =
2078 chained_block->orig_bio_bh_end_io.
2079 bio;
2080 block->next_in_same_bio = chained_block;
2081 bio->bi_private = block;
2082 }
2083 } else if (NULL != bh) {
2084 block->is_iodone = 0;
2085 block->orig_bio_bh_private = bh->b_private;
2086 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2087 block->next_in_same_bio = NULL;
2088 bh->b_private = block;
2089 bh->b_end_io = btrfsic_bh_end_io;
2090 } else {
2091 block->is_iodone = 1;
2092 block->orig_bio_bh_private = NULL;
2093 block->orig_bio_bh_end_io.bio = NULL;
2094 block->next_in_same_bio = NULL;
2095 }
2096 }
2097
2098 block->flush_gen = dev_state->last_flush_gen + 1;
2099 block->submit_bio_bh_rw = submit_bio_bh_rw;
2100 if (is_metadata) {
2101 block->logical_bytenr = bytenr;
2102 block->is_metadata = 1;
2103 if (block->is_superblock) {
e06baab4
SB
2104 BUG_ON(PAGE_CACHE_SIZE !=
2105 BTRFS_SUPER_INFO_SIZE);
5db02760
SB
2106 ret = btrfsic_process_written_superblock(
2107 state,
2108 block,
2109 (struct btrfs_super_block *)
e06baab4 2110 mapped_datav[0]);
5db02760
SB
2111 if (state->print_mask &
2112 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2113 printk(KERN_INFO
2114 "[after new superblock is written]:\n");
2115 btrfsic_dump_tree_sub(state, block, 0);
2116 }
2117 } else {
2118 block->mirror_num = 0; /* unknown */
2119 ret = btrfsic_process_metablock(
2120 state,
2121 block,
2122 &block_ctx,
5db02760
SB
2123 0, 0);
2124 }
2125 if (ret)
2126 printk(KERN_INFO
2127 "btrfsic: btrfsic_process_metablock"
2128 "(root @%llu) failed!\n",
2129 (unsigned long long)dev_bytenr);
2130 } else {
2131 block->is_metadata = 0;
2132 block->mirror_num = 0; /* unknown */
2133 block->generation = BTRFSIC_GENERATION_UNKNOWN;
2134 if (!state->include_extent_data
2135 && list_empty(&block->ref_from_list)) {
2136 /*
2137 * disk block is overwritten with extent
2138 * data (not meta data) and we are configured
2139 * to not include extent data: take the
2140 * chance and free the block's memory
2141 */
2142 btrfsic_block_hashtable_remove(block);
2143 list_del(&block->all_blocks_node);
2144 btrfsic_block_free(block);
2145 }
2146 }
2147 btrfsic_release_block_ctx(&block_ctx);
2148 } else {
2149 /* block has not been found in hash table */
2150 u64 bytenr;
2151
2152 if (!is_metadata) {
e06baab4 2153 processed_len = state->datablock_size;
5db02760
SB
2154 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2155 printk(KERN_INFO "Written block (%s/%llu/?)"
2156 " !found in hash table, D.\n",
2157 dev_state->name,
2158 (unsigned long long)dev_bytenr);
e06baab4
SB
2159 if (!state->include_extent_data) {
2160 /* ignore that written D block */
2161 goto continue_loop;
2162 }
5db02760
SB
2163
2164 /* this is getting ugly for the
2165 * include_extent_data case... */
2166 bytenr = 0; /* unknown */
2167 block_ctx.start = bytenr;
e06baab4
SB
2168 block_ctx.len = processed_len;
2169 block_ctx.mem_to_free = NULL;
2170 block_ctx.pagev = NULL;
5db02760 2171 } else {
e06baab4 2172 processed_len = state->metablock_size;
5db02760 2173 bytenr = le64_to_cpu(((struct btrfs_header *)
e06baab4 2174 mapped_datav[0])->bytenr);
5db02760 2175 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
e06baab4 2176 dev_bytenr);
5db02760
SB
2177 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2178 printk(KERN_INFO
2179 "Written block @%llu (%s/%llu/?)"
2180 " !found in hash table, M.\n",
2181 (unsigned long long)bytenr,
2182 dev_state->name,
2183 (unsigned long long)dev_bytenr);
2184
e06baab4
SB
2185 ret = btrfsic_map_block(state, bytenr, processed_len,
2186 &block_ctx, 0);
5db02760
SB
2187 if (ret) {
2188 printk(KERN_INFO
2189 "btrfsic: btrfsic_map_block(root @%llu)"
2190 " failed!\n",
2191 (unsigned long long)dev_bytenr);
e06baab4 2192 goto continue_loop;
5db02760
SB
2193 }
2194 }
e06baab4 2195 block_ctx.datav = mapped_datav;
5db02760
SB
2196 /* the following is required in case of writes to mirrors,
2197 * use the same that was used for the lookup */
2198 block_ctx.dev = dev_state;
2199 block_ctx.dev_bytenr = dev_bytenr;
2200
2201 block = btrfsic_block_alloc();
2202 if (NULL == block) {
2203 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2204 btrfsic_release_block_ctx(&block_ctx);
e06baab4 2205 goto continue_loop;
5db02760
SB
2206 }
2207 block->dev_state = dev_state;
2208 block->dev_bytenr = dev_bytenr;
2209 block->logical_bytenr = bytenr;
2210 block->is_metadata = is_metadata;
2211 block->never_written = 0;
2212 block->iodone_w_error = 0;
2213 block->mirror_num = 0; /* unknown */
2214 block->flush_gen = dev_state->last_flush_gen + 1;
2215 block->submit_bio_bh_rw = submit_bio_bh_rw;
2216 if (NULL != bio) {
2217 block->is_iodone = 0;
2218 BUG_ON(NULL == bio_is_patched);
2219 if (!*bio_is_patched) {
2220 block->orig_bio_bh_private = bio->bi_private;
2221 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2222 block->next_in_same_bio = NULL;
2223 bio->bi_private = block;
2224 bio->bi_end_io = btrfsic_bio_end_io;
2225 *bio_is_patched = 1;
2226 } else {
2227 struct btrfsic_block *chained_block =
2228 (struct btrfsic_block *)
2229 bio->bi_private;
2230
2231 BUG_ON(NULL == chained_block);
2232 block->orig_bio_bh_private =
2233 chained_block->orig_bio_bh_private;
2234 block->orig_bio_bh_end_io.bio =
2235 chained_block->orig_bio_bh_end_io.bio;
2236 block->next_in_same_bio = chained_block;
2237 bio->bi_private = block;
2238 }
2239 } else if (NULL != bh) {
2240 block->is_iodone = 0;
2241 block->orig_bio_bh_private = bh->b_private;
2242 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2243 block->next_in_same_bio = NULL;
2244 bh->b_private = block;
2245 bh->b_end_io = btrfsic_bh_end_io;
2246 } else {
2247 block->is_iodone = 1;
2248 block->orig_bio_bh_private = NULL;
2249 block->orig_bio_bh_end_io.bio = NULL;
2250 block->next_in_same_bio = NULL;
2251 }
2252 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2253 printk(KERN_INFO
2254 "New written %c-block @%llu (%s/%llu/%d)\n",
2255 is_metadata ? 'M' : 'D',
2256 (unsigned long long)block->logical_bytenr,
2257 block->dev_state->name,
2258 (unsigned long long)block->dev_bytenr,
2259 block->mirror_num);
2260 list_add(&block->all_blocks_node, &state->all_blocks_list);
2261 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2262
2263 if (is_metadata) {
2264 ret = btrfsic_process_metablock(state, block,
e06baab4 2265 &block_ctx, 0, 0);
5db02760
SB
2266 if (ret)
2267 printk(KERN_INFO
2268 "btrfsic: process_metablock(root @%llu)"
2269 " failed!\n",
2270 (unsigned long long)dev_bytenr);
2271 }
2272 btrfsic_release_block_ctx(&block_ctx);
2273 }
e06baab4
SB
2274
2275continue_loop:
2276 BUG_ON(!processed_len);
2277 dev_bytenr += processed_len;
2278 mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2279 num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2280 goto again;
5db02760
SB
2281}
2282
2283static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2284{
2285 struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2286 int iodone_w_error;
2287
2288 /* mutex is not held! This is not save if IO is not yet completed
2289 * on umount */
2290 iodone_w_error = 0;
2291 if (bio_error_status)
2292 iodone_w_error = 1;
2293
2294 BUG_ON(NULL == block);
2295 bp->bi_private = block->orig_bio_bh_private;
2296 bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2297
2298 do {
2299 struct btrfsic_block *next_block;
2300 struct btrfsic_dev_state *const dev_state = block->dev_state;
2301
2302 if ((dev_state->state->print_mask &
2303 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2304 printk(KERN_INFO
2305 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2306 bio_error_status,
2307 btrfsic_get_block_type(dev_state->state, block),
2308 (unsigned long long)block->logical_bytenr,
2309 dev_state->name,
2310 (unsigned long long)block->dev_bytenr,
2311 block->mirror_num);
2312 next_block = block->next_in_same_bio;
2313 block->iodone_w_error = iodone_w_error;
2314 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2315 dev_state->last_flush_gen++;
2316 if ((dev_state->state->print_mask &
2317 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2318 printk(KERN_INFO
2319 "bio_end_io() new %s flush_gen=%llu\n",
2320 dev_state->name,
2321 (unsigned long long)
2322 dev_state->last_flush_gen);
2323 }
2324 if (block->submit_bio_bh_rw & REQ_FUA)
2325 block->flush_gen = 0; /* FUA completed means block is
2326 * on disk */
2327 block->is_iodone = 1; /* for FLUSH, this releases the block */
2328 block = next_block;
2329 } while (NULL != block);
2330
2331 bp->bi_end_io(bp, bio_error_status);
2332}
2333
2334static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2335{
2336 struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2337 int iodone_w_error = !uptodate;
2338 struct btrfsic_dev_state *dev_state;
2339
2340 BUG_ON(NULL == block);
2341 dev_state = block->dev_state;
2342 if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2343 printk(KERN_INFO
2344 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2345 iodone_w_error,
2346 btrfsic_get_block_type(dev_state->state, block),
2347 (unsigned long long)block->logical_bytenr,
2348 block->dev_state->name,
2349 (unsigned long long)block->dev_bytenr,
2350 block->mirror_num);
2351
2352 block->iodone_w_error = iodone_w_error;
2353 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2354 dev_state->last_flush_gen++;
2355 if ((dev_state->state->print_mask &
2356 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2357 printk(KERN_INFO
2358 "bh_end_io() new %s flush_gen=%llu\n",
2359 dev_state->name,
2360 (unsigned long long)dev_state->last_flush_gen);
2361 }
2362 if (block->submit_bio_bh_rw & REQ_FUA)
2363 block->flush_gen = 0; /* FUA completed means block is on disk */
2364
2365 bh->b_private = block->orig_bio_bh_private;
2366 bh->b_end_io = block->orig_bio_bh_end_io.bh;
2367 block->is_iodone = 1; /* for FLUSH, this releases the block */
2368 bh->b_end_io(bh, uptodate);
2369}
2370
2371static int btrfsic_process_written_superblock(
2372 struct btrfsic_state *state,
2373 struct btrfsic_block *const superblock,
2374 struct btrfs_super_block *const super_hdr)
2375{
2376 int pass;
2377
2378 superblock->generation = btrfs_super_generation(super_hdr);
2379 if (!(superblock->generation > state->max_superblock_generation ||
2380 0 == state->max_superblock_generation)) {
2381 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2382 printk(KERN_INFO
2383 "btrfsic: superblock @%llu (%s/%llu/%d)"
2384 " with old gen %llu <= %llu\n",
2385 (unsigned long long)superblock->logical_bytenr,
2386 superblock->dev_state->name,
2387 (unsigned long long)superblock->dev_bytenr,
2388 superblock->mirror_num,
2389 (unsigned long long)
2390 btrfs_super_generation(super_hdr),
2391 (unsigned long long)
2392 state->max_superblock_generation);
2393 } else {
2394 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2395 printk(KERN_INFO
2396 "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2397 " with new gen %llu > %llu\n",
2398 (unsigned long long)superblock->logical_bytenr,
2399 superblock->dev_state->name,
2400 (unsigned long long)superblock->dev_bytenr,
2401 superblock->mirror_num,
2402 (unsigned long long)
2403 btrfs_super_generation(super_hdr),
2404 (unsigned long long)
2405 state->max_superblock_generation);
2406
2407 state->max_superblock_generation =
2408 btrfs_super_generation(super_hdr);
2409 state->latest_superblock = superblock;
2410 }
2411
2412 for (pass = 0; pass < 3; pass++) {
2413 int ret;
2414 u64 next_bytenr;
2415 struct btrfsic_block *next_block;
2416 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2417 struct btrfsic_block_link *l;
2418 int num_copies;
2419 int mirror_num;
2420 const char *additional_string = NULL;
2421 struct btrfs_disk_key tmp_disk_key;
2422
2423 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
2424 tmp_disk_key.offset = 0;
2425
2426 switch (pass) {
2427 case 0:
2428 tmp_disk_key.objectid =
2429 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
2430 additional_string = "root ";
2431 next_bytenr = btrfs_super_root(super_hdr);
2432 if (state->print_mask &
2433 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2434 printk(KERN_INFO "root@%llu\n",
2435 (unsigned long long)next_bytenr);
2436 break;
2437 case 1:
2438 tmp_disk_key.objectid =
2439 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
2440 additional_string = "chunk ";
2441 next_bytenr = btrfs_super_chunk_root(super_hdr);
2442 if (state->print_mask &
2443 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2444 printk(KERN_INFO "chunk@%llu\n",
2445 (unsigned long long)next_bytenr);
2446 break;
2447 case 2:
2448 tmp_disk_key.objectid =
2449 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
2450 additional_string = "log ";
2451 next_bytenr = btrfs_super_log_root(super_hdr);
2452 if (0 == next_bytenr)
2453 continue;
2454 if (state->print_mask &
2455 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2456 printk(KERN_INFO "log@%llu\n",
2457 (unsigned long long)next_bytenr);
2458 break;
2459 }
2460
2461 num_copies =
2462 btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 2463 next_bytenr, BTRFS_SUPER_INFO_SIZE);
5db02760
SB
2464 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2465 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2466 (unsigned long long)next_bytenr, num_copies);
2467 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2468 int was_created;
2469
2470 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2471 printk(KERN_INFO
2472 "btrfsic_process_written_superblock("
2473 "mirror_num=%d)\n", mirror_num);
e06baab4
SB
2474 ret = btrfsic_map_block(state, next_bytenr,
2475 BTRFS_SUPER_INFO_SIZE,
5db02760
SB
2476 &tmp_next_block_ctx,
2477 mirror_num);
2478 if (ret) {
2479 printk(KERN_INFO
2480 "btrfsic: btrfsic_map_block(@%llu,"
2481 " mirror=%d) failed!\n",
2482 (unsigned long long)next_bytenr,
2483 mirror_num);
2484 return -1;
2485 }
2486
2487 next_block = btrfsic_block_lookup_or_add(
2488 state,
2489 &tmp_next_block_ctx,
2490 additional_string,
2491 1, 0, 1,
2492 mirror_num,
2493 &was_created);
2494 if (NULL == next_block) {
2495 printk(KERN_INFO
2496 "btrfsic: error, kmalloc failed!\n");
2497 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2498 return -1;
2499 }
2500
2501 next_block->disk_key = tmp_disk_key;
2502 if (was_created)
2503 next_block->generation =
2504 BTRFSIC_GENERATION_UNKNOWN;
2505 l = btrfsic_block_link_lookup_or_add(
2506 state,
2507 &tmp_next_block_ctx,
2508 next_block,
2509 superblock,
2510 BTRFSIC_GENERATION_UNKNOWN);
2511 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2512 if (NULL == l)
2513 return -1;
2514 }
2515 }
2516
2517 if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
2518 WARN_ON(1);
2519 btrfsic_dump_tree(state);
2520 }
2521
2522 return 0;
2523}
2524
2525static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2526 struct btrfsic_block *const block,
2527 int recursion_level)
2528{
2529 struct list_head *elem_ref_to;
2530 int ret = 0;
2531
2532 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2533 /*
2534 * Note that this situation can happen and does not
2535 * indicate an error in regular cases. It happens
2536 * when disk blocks are freed and later reused.
2537 * The check-integrity module is not aware of any
2538 * block free operations, it just recognizes block
2539 * write operations. Therefore it keeps the linkage
2540 * information for a block until a block is
2541 * rewritten. This can temporarily cause incorrect
2542 * and even circular linkage informations. This
2543 * causes no harm unless such blocks are referenced
2544 * by the most recent super block.
2545 */
2546 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2547 printk(KERN_INFO
2548 "btrfsic: abort cyclic linkage (case 1).\n");
2549
2550 return ret;
2551 }
2552
2553 /*
2554 * This algorithm is recursive because the amount of used stack
2555 * space is very small and the max recursion depth is limited.
2556 */
2557 list_for_each(elem_ref_to, &block->ref_to_list) {
2558 const struct btrfsic_block_link *const l =
2559 list_entry(elem_ref_to, struct btrfsic_block_link,
2560 node_ref_to);
2561
2562 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2563 printk(KERN_INFO
2564 "rl=%d, %c @%llu (%s/%llu/%d)"
2565 " %u* refers to %c @%llu (%s/%llu/%d)\n",
2566 recursion_level,
2567 btrfsic_get_block_type(state, block),
2568 (unsigned long long)block->logical_bytenr,
2569 block->dev_state->name,
2570 (unsigned long long)block->dev_bytenr,
2571 block->mirror_num,
2572 l->ref_cnt,
2573 btrfsic_get_block_type(state, l->block_ref_to),
2574 (unsigned long long)
2575 l->block_ref_to->logical_bytenr,
2576 l->block_ref_to->dev_state->name,
2577 (unsigned long long)l->block_ref_to->dev_bytenr,
2578 l->block_ref_to->mirror_num);
2579 if (l->block_ref_to->never_written) {
2580 printk(KERN_INFO "btrfs: attempt to write superblock"
2581 " which references block %c @%llu (%s/%llu/%d)"
2582 " which is never written!\n",
2583 btrfsic_get_block_type(state, l->block_ref_to),
2584 (unsigned long long)
2585 l->block_ref_to->logical_bytenr,
2586 l->block_ref_to->dev_state->name,
2587 (unsigned long long)l->block_ref_to->dev_bytenr,
2588 l->block_ref_to->mirror_num);
2589 ret = -1;
2590 } else if (!l->block_ref_to->is_iodone) {
2591 printk(KERN_INFO "btrfs: attempt to write superblock"
2592 " which references block %c @%llu (%s/%llu/%d)"
2593 " which is not yet iodone!\n",
2594 btrfsic_get_block_type(state, l->block_ref_to),
2595 (unsigned long long)
2596 l->block_ref_to->logical_bytenr,
2597 l->block_ref_to->dev_state->name,
2598 (unsigned long long)l->block_ref_to->dev_bytenr,
2599 l->block_ref_to->mirror_num);
2600 ret = -1;
2601 } else if (l->parent_generation !=
2602 l->block_ref_to->generation &&
2603 BTRFSIC_GENERATION_UNKNOWN !=
2604 l->parent_generation &&
2605 BTRFSIC_GENERATION_UNKNOWN !=
2606 l->block_ref_to->generation) {
2607 printk(KERN_INFO "btrfs: attempt to write superblock"
2608 " which references block %c @%llu (%s/%llu/%d)"
2609 " with generation %llu !="
2610 " parent generation %llu!\n",
2611 btrfsic_get_block_type(state, l->block_ref_to),
2612 (unsigned long long)
2613 l->block_ref_to->logical_bytenr,
2614 l->block_ref_to->dev_state->name,
2615 (unsigned long long)l->block_ref_to->dev_bytenr,
2616 l->block_ref_to->mirror_num,
2617 (unsigned long long)l->block_ref_to->generation,
2618 (unsigned long long)l->parent_generation);
2619 ret = -1;
2620 } else if (l->block_ref_to->flush_gen >
2621 l->block_ref_to->dev_state->last_flush_gen) {
2622 printk(KERN_INFO "btrfs: attempt to write superblock"
2623 " which references block %c @%llu (%s/%llu/%d)"
2624 " which is not flushed out of disk's write cache"
2625 " (block flush_gen=%llu,"
2626 " dev->flush_gen=%llu)!\n",
2627 btrfsic_get_block_type(state, l->block_ref_to),
2628 (unsigned long long)
2629 l->block_ref_to->logical_bytenr,
2630 l->block_ref_to->dev_state->name,
2631 (unsigned long long)l->block_ref_to->dev_bytenr,
2632 l->block_ref_to->mirror_num,
2633 (unsigned long long)block->flush_gen,
2634 (unsigned long long)
2635 l->block_ref_to->dev_state->last_flush_gen);
2636 ret = -1;
2637 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2638 l->block_ref_to,
2639 recursion_level +
2640 1)) {
2641 ret = -1;
2642 }
2643 }
2644
2645 return ret;
2646}
2647
2648static int btrfsic_is_block_ref_by_superblock(
2649 const struct btrfsic_state *state,
2650 const struct btrfsic_block *block,
2651 int recursion_level)
2652{
2653 struct list_head *elem_ref_from;
2654
2655 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2656 /* refer to comment at "abort cyclic linkage (case 1)" */
2657 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2658 printk(KERN_INFO
2659 "btrfsic: abort cyclic linkage (case 2).\n");
2660
2661 return 0;
2662 }
2663
2664 /*
2665 * This algorithm is recursive because the amount of used stack space
2666 * is very small and the max recursion depth is limited.
2667 */
2668 list_for_each(elem_ref_from, &block->ref_from_list) {
2669 const struct btrfsic_block_link *const l =
2670 list_entry(elem_ref_from, struct btrfsic_block_link,
2671 node_ref_from);
2672
2673 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2674 printk(KERN_INFO
2675 "rl=%d, %c @%llu (%s/%llu/%d)"
2676 " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2677 recursion_level,
2678 btrfsic_get_block_type(state, block),
2679 (unsigned long long)block->logical_bytenr,
2680 block->dev_state->name,
2681 (unsigned long long)block->dev_bytenr,
2682 block->mirror_num,
2683 l->ref_cnt,
2684 btrfsic_get_block_type(state, l->block_ref_from),
2685 (unsigned long long)
2686 l->block_ref_from->logical_bytenr,
2687 l->block_ref_from->dev_state->name,
2688 (unsigned long long)
2689 l->block_ref_from->dev_bytenr,
2690 l->block_ref_from->mirror_num);
2691 if (l->block_ref_from->is_superblock &&
2692 state->latest_superblock->dev_bytenr ==
2693 l->block_ref_from->dev_bytenr &&
2694 state->latest_superblock->dev_state->bdev ==
2695 l->block_ref_from->dev_state->bdev)
2696 return 1;
2697 else if (btrfsic_is_block_ref_by_superblock(state,
2698 l->block_ref_from,
2699 recursion_level +
2700 1))
2701 return 1;
2702 }
2703
2704 return 0;
2705}
2706
2707static void btrfsic_print_add_link(const struct btrfsic_state *state,
2708 const struct btrfsic_block_link *l)
2709{
2710 printk(KERN_INFO
2711 "Add %u* link from %c @%llu (%s/%llu/%d)"
2712 " to %c @%llu (%s/%llu/%d).\n",
2713 l->ref_cnt,
2714 btrfsic_get_block_type(state, l->block_ref_from),
2715 (unsigned long long)l->block_ref_from->logical_bytenr,
2716 l->block_ref_from->dev_state->name,
2717 (unsigned long long)l->block_ref_from->dev_bytenr,
2718 l->block_ref_from->mirror_num,
2719 btrfsic_get_block_type(state, l->block_ref_to),
2720 (unsigned long long)l->block_ref_to->logical_bytenr,
2721 l->block_ref_to->dev_state->name,
2722 (unsigned long long)l->block_ref_to->dev_bytenr,
2723 l->block_ref_to->mirror_num);
2724}
2725
2726static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2727 const struct btrfsic_block_link *l)
2728{
2729 printk(KERN_INFO
2730 "Rem %u* link from %c @%llu (%s/%llu/%d)"
2731 " to %c @%llu (%s/%llu/%d).\n",
2732 l->ref_cnt,
2733 btrfsic_get_block_type(state, l->block_ref_from),
2734 (unsigned long long)l->block_ref_from->logical_bytenr,
2735 l->block_ref_from->dev_state->name,
2736 (unsigned long long)l->block_ref_from->dev_bytenr,
2737 l->block_ref_from->mirror_num,
2738 btrfsic_get_block_type(state, l->block_ref_to),
2739 (unsigned long long)l->block_ref_to->logical_bytenr,
2740 l->block_ref_to->dev_state->name,
2741 (unsigned long long)l->block_ref_to->dev_bytenr,
2742 l->block_ref_to->mirror_num);
2743}
2744
2745static char btrfsic_get_block_type(const struct btrfsic_state *state,
2746 const struct btrfsic_block *block)
2747{
2748 if (block->is_superblock &&
2749 state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2750 state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2751 return 'S';
2752 else if (block->is_superblock)
2753 return 's';
2754 else if (block->is_metadata)
2755 return 'M';
2756 else
2757 return 'D';
2758}
2759
2760static void btrfsic_dump_tree(const struct btrfsic_state *state)
2761{
2762 btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2763}
2764
2765static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2766 const struct btrfsic_block *block,
2767 int indent_level)
2768{
2769 struct list_head *elem_ref_to;
2770 int indent_add;
2771 static char buf[80];
2772 int cursor_position;
2773
2774 /*
2775 * Should better fill an on-stack buffer with a complete line and
2776 * dump it at once when it is time to print a newline character.
2777 */
2778
2779 /*
2780 * This algorithm is recursive because the amount of used stack space
2781 * is very small and the max recursion depth is limited.
2782 */
2783 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2784 btrfsic_get_block_type(state, block),
2785 (unsigned long long)block->logical_bytenr,
2786 block->dev_state->name,
2787 (unsigned long long)block->dev_bytenr,
2788 block->mirror_num);
2789 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2790 printk("[...]\n");
2791 return;
2792 }
2793 printk(buf);
2794 indent_level += indent_add;
2795 if (list_empty(&block->ref_to_list)) {
2796 printk("\n");
2797 return;
2798 }
2799 if (block->mirror_num > 1 &&
2800 !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2801 printk(" [...]\n");
2802 return;
2803 }
2804
2805 cursor_position = indent_level;
2806 list_for_each(elem_ref_to, &block->ref_to_list) {
2807 const struct btrfsic_block_link *const l =
2808 list_entry(elem_ref_to, struct btrfsic_block_link,
2809 node_ref_to);
2810
2811 while (cursor_position < indent_level) {
2812 printk(" ");
2813 cursor_position++;
2814 }
2815 if (l->ref_cnt > 1)
2816 indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2817 else
2818 indent_add = sprintf(buf, " --> ");
2819 if (indent_level + indent_add >
2820 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2821 printk("[...]\n");
2822 cursor_position = 0;
2823 continue;
2824 }
2825
2826 printk(buf);
2827
2828 btrfsic_dump_tree_sub(state, l->block_ref_to,
2829 indent_level + indent_add);
2830 cursor_position = 0;
2831 }
2832}
2833
2834static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2835 struct btrfsic_state *state,
2836 struct btrfsic_block_data_ctx *next_block_ctx,
2837 struct btrfsic_block *next_block,
2838 struct btrfsic_block *from_block,
2839 u64 parent_generation)
2840{
2841 struct btrfsic_block_link *l;
2842
2843 l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2844 next_block_ctx->dev_bytenr,
2845 from_block->dev_state->bdev,
2846 from_block->dev_bytenr,
2847 &state->block_link_hashtable);
2848 if (NULL == l) {
2849 l = btrfsic_block_link_alloc();
2850 if (NULL == l) {
2851 printk(KERN_INFO
2852 "btrfsic: error, kmalloc" " failed!\n");
2853 return NULL;
2854 }
2855
2856 l->block_ref_to = next_block;
2857 l->block_ref_from = from_block;
2858 l->ref_cnt = 1;
2859 l->parent_generation = parent_generation;
2860
2861 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2862 btrfsic_print_add_link(state, l);
2863
2864 list_add(&l->node_ref_to, &from_block->ref_to_list);
2865 list_add(&l->node_ref_from, &next_block->ref_from_list);
2866
2867 btrfsic_block_link_hashtable_add(l,
2868 &state->block_link_hashtable);
2869 } else {
2870 l->ref_cnt++;
2871 l->parent_generation = parent_generation;
2872 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2873 btrfsic_print_add_link(state, l);
2874 }
2875
2876 return l;
2877}
2878
2879static struct btrfsic_block *btrfsic_block_lookup_or_add(
2880 struct btrfsic_state *state,
2881 struct btrfsic_block_data_ctx *block_ctx,
2882 const char *additional_string,
2883 int is_metadata,
2884 int is_iodone,
2885 int never_written,
2886 int mirror_num,
2887 int *was_created)
2888{
2889 struct btrfsic_block *block;
2890
2891 block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2892 block_ctx->dev_bytenr,
2893 &state->block_hashtable);
2894 if (NULL == block) {
2895 struct btrfsic_dev_state *dev_state;
2896
2897 block = btrfsic_block_alloc();
2898 if (NULL == block) {
2899 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2900 return NULL;
2901 }
2902 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2903 if (NULL == dev_state) {
2904 printk(KERN_INFO
2905 "btrfsic: error, lookup dev_state failed!\n");
2906 btrfsic_block_free(block);
2907 return NULL;
2908 }
2909 block->dev_state = dev_state;
2910 block->dev_bytenr = block_ctx->dev_bytenr;
2911 block->logical_bytenr = block_ctx->start;
2912 block->is_metadata = is_metadata;
2913 block->is_iodone = is_iodone;
2914 block->never_written = never_written;
2915 block->mirror_num = mirror_num;
2916 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2917 printk(KERN_INFO
2918 "New %s%c-block @%llu (%s/%llu/%d)\n",
2919 additional_string,
2920 btrfsic_get_block_type(state, block),
2921 (unsigned long long)block->logical_bytenr,
2922 dev_state->name,
2923 (unsigned long long)block->dev_bytenr,
2924 mirror_num);
2925 list_add(&block->all_blocks_node, &state->all_blocks_list);
2926 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2927 if (NULL != was_created)
2928 *was_created = 1;
2929 } else {
2930 if (NULL != was_created)
2931 *was_created = 0;
2932 }
2933
2934 return block;
2935}
2936
2937static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2938 u64 bytenr,
2939 struct btrfsic_dev_state *dev_state,
e06baab4 2940 u64 dev_bytenr)
5db02760
SB
2941{
2942 int num_copies;
2943 int mirror_num;
2944 int ret;
2945 struct btrfsic_block_data_ctx block_ctx;
2946 int match = 0;
2947
2948 num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
e06baab4 2949 bytenr, state->metablock_size);
5db02760
SB
2950
2951 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
e06baab4 2952 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
5db02760
SB
2953 &block_ctx, mirror_num);
2954 if (ret) {
2955 printk(KERN_INFO "btrfsic:"
2956 " btrfsic_map_block(logical @%llu,"
2957 " mirror %d) failed!\n",
2958 (unsigned long long)bytenr, mirror_num);
2959 continue;
2960 }
2961
2962 if (dev_state->bdev == block_ctx.dev->bdev &&
2963 dev_bytenr == block_ctx.dev_bytenr) {
2964 match++;
2965 btrfsic_release_block_ctx(&block_ctx);
2966 break;
2967 }
2968 btrfsic_release_block_ctx(&block_ctx);
2969 }
2970
2971 if (!match) {
2972 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2973 " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2974 " phys_bytenr=%llu)!\n",
2975 (unsigned long long)bytenr, dev_state->name,
2976 (unsigned long long)dev_bytenr);
2977 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
e06baab4
SB
2978 ret = btrfsic_map_block(state, bytenr,
2979 state->metablock_size,
5db02760
SB
2980 &block_ctx, mirror_num);
2981 if (ret)
2982 continue;
2983
2984 printk(KERN_INFO "Read logical bytenr @%llu maps to"
2985 " (%s/%llu/%d)\n",
2986 (unsigned long long)bytenr,
2987 block_ctx.dev->name,
2988 (unsigned long long)block_ctx.dev_bytenr,
2989 mirror_num);
2990 }
2991 WARN_ON(1);
2992 }
2993}
2994
2995static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
2996 struct block_device *bdev)
2997{
2998 struct btrfsic_dev_state *ds;
2999
3000 ds = btrfsic_dev_state_hashtable_lookup(bdev,
3001 &btrfsic_dev_state_hashtable);
3002 return ds;
3003}
3004
3005int btrfsic_submit_bh(int rw, struct buffer_head *bh)
3006{
3007 struct btrfsic_dev_state *dev_state;
3008
3009 if (!btrfsic_is_initialized)
3010 return submit_bh(rw, bh);
3011
3012 mutex_lock(&btrfsic_mutex);
3013 /* since btrfsic_submit_bh() might also be called before
3014 * btrfsic_mount(), this might return NULL */
3015 dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
3016
3017 /* Only called to write the superblock (incl. FLUSH/FUA) */
3018 if (NULL != dev_state &&
3019 (rw & WRITE) && bh->b_size > 0) {
3020 u64 dev_bytenr;
3021
3022 dev_bytenr = 4096 * bh->b_blocknr;
3023 if (dev_state->state->print_mask &
3024 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3025 printk(KERN_INFO
3026 "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
3027 " size=%lu, data=%p, bdev=%p)\n",
0b485143
SB
3028 rw, (unsigned long)bh->b_blocknr,
3029 (unsigned long long)dev_bytenr,
3030 (unsigned long)bh->b_size, bh->b_data,
3031 bh->b_bdev);
5db02760 3032 btrfsic_process_written_block(dev_state, dev_bytenr,
e06baab4 3033 &bh->b_data, 1, NULL,
5db02760
SB
3034 NULL, bh, rw);
3035 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3036 if (dev_state->state->print_mask &
3037 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3038 printk(KERN_INFO
e06baab4 3039 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
5db02760
SB
3040 rw, bh->b_bdev);
3041 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3042 if ((dev_state->state->print_mask &
3043 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3044 BTRFSIC_PRINT_MASK_VERBOSE)))
3045 printk(KERN_INFO
3046 "btrfsic_submit_bh(%s) with FLUSH"
3047 " but dummy block already in use"
3048 " (ignored)!\n",
3049 dev_state->name);
3050 } else {
3051 struct btrfsic_block *const block =
3052 &dev_state->dummy_block_for_bio_bh_flush;
3053
3054 block->is_iodone = 0;
3055 block->never_written = 0;
3056 block->iodone_w_error = 0;
3057 block->flush_gen = dev_state->last_flush_gen + 1;
3058 block->submit_bio_bh_rw = rw;
3059 block->orig_bio_bh_private = bh->b_private;
3060 block->orig_bio_bh_end_io.bh = bh->b_end_io;
3061 block->next_in_same_bio = NULL;
3062 bh->b_private = block;
3063 bh->b_end_io = btrfsic_bh_end_io;
3064 }
3065 }
3066 mutex_unlock(&btrfsic_mutex);
3067 return submit_bh(rw, bh);
3068}
3069
3070void btrfsic_submit_bio(int rw, struct bio *bio)
3071{
3072 struct btrfsic_dev_state *dev_state;
3073
3074 if (!btrfsic_is_initialized) {
3075 submit_bio(rw, bio);
3076 return;
3077 }
3078
3079 mutex_lock(&btrfsic_mutex);
3080 /* since btrfsic_submit_bio() is also called before
3081 * btrfsic_mount(), this might return NULL */
3082 dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
3083 if (NULL != dev_state &&
3084 (rw & WRITE) && NULL != bio->bi_io_vec) {
3085 unsigned int i;
3086 u64 dev_bytenr;
3087 int bio_is_patched;
e06baab4 3088 char **mapped_datav;
5db02760
SB
3089
3090 dev_bytenr = 512 * bio->bi_sector;
3091 bio_is_patched = 0;
3092 if (dev_state->state->print_mask &
3093 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3094 printk(KERN_INFO
3095 "submit_bio(rw=0x%x, bi_vcnt=%u,"
3096 " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
0b485143 3097 rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
5db02760
SB
3098 (unsigned long long)dev_bytenr,
3099 bio->bi_bdev);
3100
e06baab4
SB
3101 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3102 GFP_NOFS);
3103 if (!mapped_datav)
3104 goto leave;
5db02760 3105 for (i = 0; i < bio->bi_vcnt; i++) {
e06baab4
SB
3106 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
3107 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
3108 if (!mapped_datav[i]) {
3109 while (i > 0) {
3110 i--;
3111 kunmap(bio->bi_io_vec[i].bv_page);
3112 }
3113 kfree(mapped_datav);
3114 goto leave;
3115 }
5db02760
SB
3116 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3117 BTRFSIC_PRINT_MASK_VERBOSE) ==
3118 (dev_state->state->print_mask &
3119 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3120 BTRFSIC_PRINT_MASK_VERBOSE)))
3121 printk(KERN_INFO
e06baab4 3122 "#%u: page=%p, len=%u, offset=%u\n",
5db02760 3123 i, bio->bi_io_vec[i].bv_page,
5db02760
SB
3124 bio->bi_io_vec[i].bv_len,
3125 bio->bi_io_vec[i].bv_offset);
e06baab4
SB
3126 }
3127 btrfsic_process_written_block(dev_state, dev_bytenr,
3128 mapped_datav, bio->bi_vcnt,
3129 bio, &bio_is_patched,
3130 NULL, rw);
3131 while (i > 0) {
3132 i--;
5db02760 3133 kunmap(bio->bi_io_vec[i].bv_page);
5db02760 3134 }
e06baab4 3135 kfree(mapped_datav);
5db02760
SB
3136 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3137 if (dev_state->state->print_mask &
3138 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3139 printk(KERN_INFO
e06baab4 3140 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
5db02760
SB
3141 rw, bio->bi_bdev);
3142 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3143 if ((dev_state->state->print_mask &
3144 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3145 BTRFSIC_PRINT_MASK_VERBOSE)))
3146 printk(KERN_INFO
3147 "btrfsic_submit_bio(%s) with FLUSH"
3148 " but dummy block already in use"
3149 " (ignored)!\n",
3150 dev_state->name);
3151 } else {
3152 struct btrfsic_block *const block =
3153 &dev_state->dummy_block_for_bio_bh_flush;
3154
3155 block->is_iodone = 0;
3156 block->never_written = 0;
3157 block->iodone_w_error = 0;
3158 block->flush_gen = dev_state->last_flush_gen + 1;
3159 block->submit_bio_bh_rw = rw;
3160 block->orig_bio_bh_private = bio->bi_private;
3161 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3162 block->next_in_same_bio = NULL;
3163 bio->bi_private = block;
3164 bio->bi_end_io = btrfsic_bio_end_io;
3165 }
3166 }
e06baab4 3167leave:
5db02760
SB
3168 mutex_unlock(&btrfsic_mutex);
3169
3170 submit_bio(rw, bio);
3171}
3172
3173int btrfsic_mount(struct btrfs_root *root,
3174 struct btrfs_fs_devices *fs_devices,
3175 int including_extent_data, u32 print_mask)
3176{
3177 int ret;
3178 struct btrfsic_state *state;
3179 struct list_head *dev_head = &fs_devices->devices;
3180 struct btrfs_device *device;
3181
e06baab4
SB
3182 if (root->nodesize != root->leafsize) {
3183 printk(KERN_INFO
3184 "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3185 root->nodesize, root->leafsize);
3186 return -1;
3187 }
3188 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3189 printk(KERN_INFO
3190 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3191 root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
3192 return -1;
3193 }
3194 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3195 printk(KERN_INFO
3196 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3197 root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
3198 return -1;
3199 }
3200 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3201 printk(KERN_INFO
3202 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3203 root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
3204 return -1;
3205 }
5db02760
SB
3206 state = kzalloc(sizeof(*state), GFP_NOFS);
3207 if (NULL == state) {
3208 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
3209 return -1;
3210 }
3211
3212 if (!btrfsic_is_initialized) {
3213 mutex_init(&btrfsic_mutex);
3214 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3215 btrfsic_is_initialized = 1;
3216 }
3217 mutex_lock(&btrfsic_mutex);
3218 state->root = root;
3219 state->print_mask = print_mask;
3220 state->include_extent_data = including_extent_data;
3221 state->csum_size = 0;
e06baab4
SB
3222 state->metablock_size = root->nodesize;
3223 state->datablock_size = root->sectorsize;
5db02760
SB
3224 INIT_LIST_HEAD(&state->all_blocks_list);
3225 btrfsic_block_hashtable_init(&state->block_hashtable);
3226 btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3227 state->max_superblock_generation = 0;
3228 state->latest_superblock = NULL;
3229
3230 list_for_each_entry(device, dev_head, dev_list) {
3231 struct btrfsic_dev_state *ds;
3232 char *p;
3233
3234 if (!device->bdev || !device->name)
3235 continue;
3236
3237 ds = btrfsic_dev_state_alloc();
3238 if (NULL == ds) {
3239 printk(KERN_INFO
3240 "btrfs check-integrity: kmalloc() failed!\n");
3241 mutex_unlock(&btrfsic_mutex);
3242 return -1;
3243 }
3244 ds->bdev = device->bdev;
3245 ds->state = state;
3246 bdevname(ds->bdev, ds->name);
3247 ds->name[BDEVNAME_SIZE - 1] = '\0';
3248 for (p = ds->name; *p != '\0'; p++);
3249 while (p > ds->name && *p != '/')
3250 p--;
3251 if (*p == '/')
3252 p++;
3253 strlcpy(ds->name, p, sizeof(ds->name));
3254 btrfsic_dev_state_hashtable_add(ds,
3255 &btrfsic_dev_state_hashtable);
3256 }
3257
3258 ret = btrfsic_process_superblock(state, fs_devices);
3259 if (0 != ret) {
3260 mutex_unlock(&btrfsic_mutex);
3261 btrfsic_unmount(root, fs_devices);
3262 return ret;
3263 }
3264
3265 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3266 btrfsic_dump_database(state);
3267 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3268 btrfsic_dump_tree(state);
3269
3270 mutex_unlock(&btrfsic_mutex);
3271 return 0;
3272}
3273
3274void btrfsic_unmount(struct btrfs_root *root,
3275 struct btrfs_fs_devices *fs_devices)
3276{
3277 struct list_head *elem_all;
3278 struct list_head *tmp_all;
3279 struct btrfsic_state *state;
3280 struct list_head *dev_head = &fs_devices->devices;
3281 struct btrfs_device *device;
3282
3283 if (!btrfsic_is_initialized)
3284 return;
3285
3286 mutex_lock(&btrfsic_mutex);
3287
3288 state = NULL;
3289 list_for_each_entry(device, dev_head, dev_list) {
3290 struct btrfsic_dev_state *ds;
3291
3292 if (!device->bdev || !device->name)
3293 continue;
3294
3295 ds = btrfsic_dev_state_hashtable_lookup(
3296 device->bdev,
3297 &btrfsic_dev_state_hashtable);
3298 if (NULL != ds) {
3299 state = ds->state;
3300 btrfsic_dev_state_hashtable_remove(ds);
3301 btrfsic_dev_state_free(ds);
3302 }
3303 }
3304
3305 if (NULL == state) {
3306 printk(KERN_INFO
3307 "btrfsic: error, cannot find state information"
3308 " on umount!\n");
3309 mutex_unlock(&btrfsic_mutex);
3310 return;
3311 }
3312
3313 /*
3314 * Don't care about keeping the lists' state up to date,
3315 * just free all memory that was allocated dynamically.
3316 * Free the blocks and the block_links.
3317 */
3318 list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
3319 struct btrfsic_block *const b_all =
3320 list_entry(elem_all, struct btrfsic_block,
3321 all_blocks_node);
3322 struct list_head *elem_ref_to;
3323 struct list_head *tmp_ref_to;
3324
3325 list_for_each_safe(elem_ref_to, tmp_ref_to,
3326 &b_all->ref_to_list) {
3327 struct btrfsic_block_link *const l =
3328 list_entry(elem_ref_to,
3329 struct btrfsic_block_link,
3330 node_ref_to);
3331
3332 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3333 btrfsic_print_rem_link(state, l);
3334
3335 l->ref_cnt--;
3336 if (0 == l->ref_cnt)
3337 btrfsic_block_link_free(l);
3338 }
3339
3340 if (b_all->is_iodone)
3341 btrfsic_block_free(b_all);
3342 else
3343 printk(KERN_INFO "btrfs: attempt to free %c-block"
3344 " @%llu (%s/%llu/%d) on umount which is"
3345 " not yet iodone!\n",
3346 btrfsic_get_block_type(state, b_all),
3347 (unsigned long long)b_all->logical_bytenr,
3348 b_all->dev_state->name,
3349 (unsigned long long)b_all->dev_bytenr,
3350 b_all->mirror_num);
3351 }
3352
3353 mutex_unlock(&btrfsic_mutex);
3354
3355 kfree(state);
3356}