Btrfs: Add an extent buffer LRU to reduce radix tree hits
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / btrfs / ctree.c
CommitLineData
6cbd5570
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
5f39d397 19#include <linux/highmem.h>
eb60ceac
CM
20#include "ctree.h"
21#include "disk-io.h"
7f5c1516 22#include "transaction.h"
5f39d397 23#include "print-tree.h"
9a8dd150 24
e089f05c
CM
25static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26 *root, struct btrfs_path *path, int level);
27static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
d4dbff95
CM
28 *root, struct btrfs_key *ins_key,
29 struct btrfs_path *path, int data_size);
5f39d397
CM
30static int push_node_left(struct btrfs_trans_handle *trans,
31 struct btrfs_root *root, struct extent_buffer *dst,
32 struct extent_buffer *src);
33static int balance_node_right(struct btrfs_trans_handle *trans,
34 struct btrfs_root *root,
35 struct extent_buffer *dst_buf,
36 struct extent_buffer *src_buf);
e089f05c
CM
37static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38 struct btrfs_path *path, int level, int slot);
d97e63b6 39
df24a2b9 40inline void btrfs_init_path(struct btrfs_path *p)
2c90e5d6 41{
df24a2b9 42 memset(p, 0, sizeof(*p));
2c90e5d6
CM
43}
44
df24a2b9 45struct btrfs_path *btrfs_alloc_path(void)
2c90e5d6 46{
df24a2b9
CM
47 struct btrfs_path *path;
48 path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
2cc58cf2 49 if (path) {
df24a2b9 50 btrfs_init_path(path);
2cc58cf2
CM
51 path->reada = 1;
52 }
df24a2b9 53 return path;
2c90e5d6
CM
54}
55
df24a2b9 56void btrfs_free_path(struct btrfs_path *p)
be0e5c09 57{
df24a2b9
CM
58 btrfs_release_path(NULL, p);
59 kmem_cache_free(btrfs_path_cachep, p);
be0e5c09
CM
60}
61
234b63a0 62void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
eb60ceac
CM
63{
64 int i;
234b63a0 65 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
eb60ceac
CM
66 if (!p->nodes[i])
67 break;
5f39d397 68 free_extent_buffer(p->nodes[i]);
eb60ceac 69 }
aa5d6bed 70 memset(p, 0, sizeof(*p));
eb60ceac
CM
71}
72
5f39d397
CM
73static int __btrfs_cow_block(struct btrfs_trans_handle *trans,
74 struct btrfs_root *root,
75 struct extent_buffer *buf,
76 struct extent_buffer *parent, int parent_slot,
77 struct extent_buffer **cow_ret,
78 u64 search_start, u64 empty_size)
02217ed2 79{
5f39d397 80 struct extent_buffer *cow;
6702ed49
CM
81 int ret = 0;
82 int different_trans = 0;
02217ed2 83
6702ed49 84 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
5f39d397 85
db94535d
CM
86 cow = btrfs_alloc_free_block(trans, root, buf->len,
87 search_start, empty_size);
54aa1f4d
CM
88 if (IS_ERR(cow))
89 return PTR_ERR(cow);
6702ed49 90
5f39d397 91 copy_extent_buffer(cow, buf, 0, 0, cow->len);
db94535d 92 btrfs_set_header_bytenr(cow, cow->start);
5f39d397
CM
93 btrfs_set_header_generation(cow, trans->transid);
94 btrfs_set_header_owner(cow, root->root_key.objectid);
6702ed49 95
5f39d397
CM
96 WARN_ON(btrfs_header_generation(buf) > trans->transid);
97 if (btrfs_header_generation(buf) != trans->transid) {
6702ed49
CM
98 different_trans = 1;
99 ret = btrfs_inc_ref(trans, root, buf);
100 if (ret)
101 return ret;
102 } else {
6702ed49
CM
103 clean_tree_block(trans, root, buf);
104 }
105
02217ed2
CM
106 if (buf == root->node) {
107 root->node = cow;
5f39d397 108 extent_buffer_get(cow);
2c90e5d6 109 if (buf != root->commit_root) {
db94535d
CM
110 btrfs_free_extent(trans, root, buf->start,
111 buf->len, 1);
2c90e5d6 112 }
5f39d397 113 free_extent_buffer(buf);
02217ed2 114 } else {
5f39d397 115 btrfs_set_node_blockptr(parent, parent_slot,
db94535d 116 cow->start);
d6025579 117 btrfs_mark_buffer_dirty(parent);
5f39d397 118 WARN_ON(btrfs_header_generation(parent) != trans->transid);
db94535d 119 btrfs_free_extent(trans, root, buf->start, buf->len, 1);
02217ed2 120 }
5f39d397 121 free_extent_buffer(buf);
ccd467d6 122 btrfs_mark_buffer_dirty(cow);
2c90e5d6 123 *cow_ret = cow;
02217ed2
CM
124 return 0;
125}
126
5f39d397
CM
127int btrfs_cow_block(struct btrfs_trans_handle *trans,
128 struct btrfs_root *root, struct extent_buffer *buf,
129 struct extent_buffer *parent, int parent_slot,
130 struct extent_buffer **cow_ret)
6702ed49
CM
131{
132 u64 search_start;
f510cfec 133 int ret;
6702ed49
CM
134 if (trans->transaction != root->fs_info->running_transaction) {
135 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
136 root->fs_info->running_transaction->transid);
137 WARN_ON(1);
138 }
139 if (trans->transid != root->fs_info->generation) {
140 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
141 root->fs_info->generation);
142 WARN_ON(1);
143 }
5f39d397 144 if (btrfs_header_generation(buf) == trans->transid) {
6702ed49
CM
145 *cow_ret = buf;
146 return 0;
147 }
148
db94535d 149 search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
f510cfec 150 ret = __btrfs_cow_block(trans, root, buf, parent,
6702ed49 151 parent_slot, cow_ret, search_start, 0);
f510cfec 152 return ret;
6702ed49
CM
153}
154
6b80053d 155static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
6702ed49 156{
6b80053d 157 if (blocknr < other && other - (blocknr + blocksize) < 32768)
6702ed49 158 return 1;
6b80053d 159 if (blocknr > other && blocknr - (other + blocksize) < 32768)
6702ed49
CM
160 return 1;
161 return 0;
162}
163
6b80053d 164static int should_defrag_leaf(struct extent_buffer *leaf)
2cc58cf2 165{
6b80053d 166 struct btrfs_key key;
2cc58cf2
CM
167 u32 nritems;
168
6b80053d 169 if (btrfs_buffer_defrag(leaf))
2cc58cf2
CM
170 return 1;
171
6b80053d 172 nritems = btrfs_header_nritems(leaf);
2cc58cf2
CM
173 if (nritems == 0)
174 return 0;
175
6b80053d
CM
176 btrfs_item_key_to_cpu(leaf, &key, 0);
177 if (key.type == BTRFS_DIR_ITEM_KEY)
2cc58cf2
CM
178 return 1;
179
6b80053d
CM
180
181 btrfs_item_key_to_cpu(leaf, &key, nritems - 1);
182 if (key.type == BTRFS_DIR_ITEM_KEY)
2cc58cf2
CM
183 return 1;
184 if (nritems > 4) {
6b80053d
CM
185 btrfs_item_key_to_cpu(leaf, &key, nritems / 2);
186 if (key.type == BTRFS_DIR_ITEM_KEY)
2cc58cf2
CM
187 return 1;
188 }
189 return 0;
190}
191
6702ed49 192int btrfs_realloc_node(struct btrfs_trans_handle *trans,
5f39d397 193 struct btrfs_root *root, struct extent_buffer *parent,
e9d0b13b 194 int cache_only, u64 *last_ret)
6702ed49 195{
6b80053d
CM
196 struct extent_buffer *cur;
197 struct extent_buffer *tmp;
6702ed49 198 u64 blocknr;
e9d0b13b
CM
199 u64 search_start = *last_ret;
200 u64 last_block = 0;
6702ed49
CM
201 u64 other;
202 u32 parent_nritems;
203 int start_slot;
204 int end_slot;
205 int i;
206 int err = 0;
f2183bde 207 int parent_level;
6b80053d
CM
208 int uptodate;
209 u32 blocksize;
6702ed49
CM
210
211 if (trans->transaction != root->fs_info->running_transaction) {
212 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
213 root->fs_info->running_transaction->transid);
214 WARN_ON(1);
215 }
216 if (trans->transid != root->fs_info->generation) {
217 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
218 root->fs_info->generation);
219 WARN_ON(1);
220 }
6b80053d 221 if (btrfs_buffer_defrag_done(parent))
86479a04
CM
222 return 0;
223
6b80053d
CM
224 parent_nritems = btrfs_header_nritems(parent);
225 parent_level = btrfs_header_level(parent);
226 blocksize = btrfs_level_size(root, parent_level - 1);
6702ed49
CM
227
228 start_slot = 0;
229 end_slot = parent_nritems;
230
231 if (parent_nritems == 1)
232 return 0;
233
234 for (i = start_slot; i < end_slot; i++) {
235 int close = 1;
6b80053d 236 blocknr = btrfs_node_blockptr(parent, i);
e9d0b13b
CM
237 if (last_block == 0)
238 last_block = blocknr;
6702ed49 239 if (i > 0) {
6b80053d
CM
240 other = btrfs_node_blockptr(parent, i - 1);
241 close = close_blocks(blocknr, other, blocksize);
6702ed49
CM
242 }
243 if (close && i < end_slot - 1) {
6b80053d
CM
244 other = btrfs_node_blockptr(parent, i + 1);
245 close = close_blocks(blocknr, other, blocksize);
6702ed49 246 }
e9d0b13b
CM
247 if (close) {
248 last_block = blocknr;
6702ed49 249 continue;
e9d0b13b 250 }
6702ed49 251
6b80053d
CM
252 cur = btrfs_find_tree_block(root, blocknr, blocksize);
253 if (cur)
254 uptodate = btrfs_buffer_uptodate(cur);
255 else
256 uptodate = 0;
257 if (!cur || !uptodate ||
258 (parent_level != 1 && !btrfs_buffer_defrag(cur)) ||
259 (parent_level == 1 && !should_defrag_leaf(cur))) {
6702ed49 260 if (cache_only) {
6b80053d 261 free_extent_buffer(cur);
6702ed49
CM
262 continue;
263 }
6b80053d
CM
264 if (!cur) {
265 cur = read_tree_block(root, blocknr,
266 blocksize);
267 } else if (!uptodate) {
268 btrfs_read_buffer(cur);
f2183bde 269 }
6702ed49 270 }
e9d0b13b 271 if (search_start == 0)
6b80053d 272 search_start = last_block;
e9d0b13b 273
6b80053d
CM
274 err = __btrfs_cow_block(trans, root, cur, parent, i,
275 &tmp, search_start,
276 min(16 * blocksize,
277 (end_slot - i) * blocksize));
252c38f0 278 if (err) {
6b80053d 279 free_extent_buffer(cur);
6702ed49 280 break;
252c38f0 281 }
6b80053d 282 search_start = tmp->start;
f2183bde
CM
283 *last_ret = search_start;
284 if (parent_level == 1)
6b80053d
CM
285 btrfs_clear_buffer_defrag(tmp);
286 btrfs_set_buffer_defrag_done(tmp);
287 free_extent_buffer(tmp);
6702ed49
CM
288 }
289 return err;
290}
291
74123bd7
CM
292/*
293 * The leaf data grows from end-to-front in the node.
294 * this returns the address of the start of the last item,
295 * which is the stop of the leaf data stack
296 */
123abc88 297static inline unsigned int leaf_data_end(struct btrfs_root *root,
5f39d397 298 struct extent_buffer *leaf)
be0e5c09 299{
5f39d397 300 u32 nr = btrfs_header_nritems(leaf);
be0e5c09 301 if (nr == 0)
123abc88 302 return BTRFS_LEAF_DATA_SIZE(root);
5f39d397 303 return btrfs_item_offset_nr(leaf, nr - 1);
be0e5c09
CM
304}
305
74123bd7
CM
306/*
307 * compare two keys in a memcmp fashion
308 */
9aca1d51 309static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
be0e5c09 310{
e2fa7227
CM
311 struct btrfs_key k1;
312
313 btrfs_disk_key_to_cpu(&k1, disk);
314
315 if (k1.objectid > k2->objectid)
be0e5c09 316 return 1;
e2fa7227 317 if (k1.objectid < k2->objectid)
be0e5c09 318 return -1;
5f39d397 319 if (k1.type > k2->type)
f254e52c 320 return 1;
5f39d397 321 if (k1.type < k2->type)
f254e52c 322 return -1;
70b2befd
CM
323 if (k1.offset > k2->offset)
324 return 1;
325 if (k1.offset < k2->offset)
326 return -1;
be0e5c09
CM
327 return 0;
328}
74123bd7 329
123abc88
CM
330static int check_node(struct btrfs_root *root, struct btrfs_path *path,
331 int level)
aa5d6bed 332{
5f39d397
CM
333 struct extent_buffer *parent = NULL;
334 struct extent_buffer *node = path->nodes[level];
335 struct btrfs_disk_key parent_key;
336 struct btrfs_disk_key node_key;
aa5d6bed 337 int parent_slot;
8d7be552
CM
338 int slot;
339 struct btrfs_key cpukey;
5f39d397 340 u32 nritems = btrfs_header_nritems(node);
aa5d6bed
CM
341
342 if (path->nodes[level + 1])
5f39d397 343 parent = path->nodes[level + 1];
a1f39630 344
8d7be552 345 slot = path->slots[level];
7518a238
CM
346 BUG_ON(nritems == 0);
347 if (parent) {
a1f39630 348 parent_slot = path->slots[level + 1];
5f39d397
CM
349 btrfs_node_key(parent, &parent_key, parent_slot);
350 btrfs_node_key(node, &node_key, 0);
351 BUG_ON(memcmp(&parent_key, &node_key,
e2fa7227 352 sizeof(struct btrfs_disk_key)));
1d4f8a0c 353 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
db94535d 354 btrfs_header_bytenr(node));
aa5d6bed 355 }
123abc88 356 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
8d7be552 357 if (slot != 0) {
5f39d397
CM
358 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
359 btrfs_node_key(node, &node_key, slot);
360 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
8d7be552
CM
361 }
362 if (slot < nritems - 1) {
5f39d397
CM
363 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
364 btrfs_node_key(node, &node_key, slot);
365 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
aa5d6bed
CM
366 }
367 return 0;
368}
369
123abc88
CM
370static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
371 int level)
aa5d6bed 372{
5f39d397
CM
373 struct extent_buffer *leaf = path->nodes[level];
374 struct extent_buffer *parent = NULL;
aa5d6bed 375 int parent_slot;
8d7be552 376 struct btrfs_key cpukey;
5f39d397
CM
377 struct btrfs_disk_key parent_key;
378 struct btrfs_disk_key leaf_key;
379 int slot = path->slots[0];
8d7be552 380
5f39d397 381 u32 nritems = btrfs_header_nritems(leaf);
aa5d6bed
CM
382
383 if (path->nodes[level + 1])
5f39d397 384 parent = path->nodes[level + 1];
7518a238
CM
385
386 if (nritems == 0)
387 return 0;
388
389 if (parent) {
a1f39630 390 parent_slot = path->slots[level + 1];
5f39d397
CM
391 btrfs_node_key(parent, &parent_key, parent_slot);
392 btrfs_item_key(leaf, &leaf_key, 0);
6702ed49 393
5f39d397 394 BUG_ON(memcmp(&parent_key, &leaf_key,
e2fa7227 395 sizeof(struct btrfs_disk_key)));
1d4f8a0c 396 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
db94535d 397 btrfs_header_bytenr(leaf));
5f39d397
CM
398 }
399#if 0
400 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
401 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
402 btrfs_item_key(leaf, &leaf_key, i);
403 if (comp_keys(&leaf_key, &cpukey) >= 0) {
404 btrfs_print_leaf(root, leaf);
405 printk("slot %d offset bad key\n", i);
406 BUG_ON(1);
407 }
408 if (btrfs_item_offset_nr(leaf, i) !=
409 btrfs_item_end_nr(leaf, i + 1)) {
410 btrfs_print_leaf(root, leaf);
411 printk("slot %d offset bad\n", i);
412 BUG_ON(1);
413 }
414 if (i == 0) {
415 if (btrfs_item_offset_nr(leaf, i) +
416 btrfs_item_size_nr(leaf, i) !=
417 BTRFS_LEAF_DATA_SIZE(root)) {
418 btrfs_print_leaf(root, leaf);
419 printk("slot %d first offset bad\n", i);
420 BUG_ON(1);
421 }
422 }
aa5d6bed 423 }
5f39d397
CM
424 if (nritems > 0) {
425 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
426 btrfs_print_leaf(root, leaf);
427 printk("slot %d bad size \n", nritems - 1);
428 BUG_ON(1);
429 }
430 }
431#endif
432 if (slot != 0 && slot < nritems - 1) {
433 btrfs_item_key(leaf, &leaf_key, slot);
434 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
435 if (comp_keys(&leaf_key, &cpukey) <= 0) {
436 btrfs_print_leaf(root, leaf);
437 printk("slot %d offset bad key\n", slot);
438 BUG_ON(1);
439 }
440 if (btrfs_item_offset_nr(leaf, slot - 1) !=
441 btrfs_item_end_nr(leaf, slot)) {
442 btrfs_print_leaf(root, leaf);
443 printk("slot %d offset bad\n", slot);
444 BUG_ON(1);
445 }
8d7be552
CM
446 }
447 if (slot < nritems - 1) {
5f39d397
CM
448 btrfs_item_key(leaf, &leaf_key, slot);
449 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
450 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
451 if (btrfs_item_offset_nr(leaf, slot) !=
452 btrfs_item_end_nr(leaf, slot + 1)) {
453 btrfs_print_leaf(root, leaf);
454 printk("slot %d offset bad\n", slot);
455 BUG_ON(1);
456 }
aa5d6bed 457 }
5f39d397
CM
458 BUG_ON(btrfs_item_offset_nr(leaf, 0) +
459 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
aa5d6bed
CM
460 return 0;
461}
462
123abc88
CM
463static int check_block(struct btrfs_root *root, struct btrfs_path *path,
464 int level)
aa5d6bed 465{
db94535d 466#if 0
5f39d397 467 struct extent_buffer *buf = path->nodes[level];
5f39d397 468
479965d6
CM
469 if (memcmp_extent_buffer(buf, root->fs_info->fsid,
470 (unsigned long)btrfs_header_fsid(buf),
471 BTRFS_FSID_SIZE)) {
5f39d397 472 printk("warning bad block %Lu\n", buf->start);
db94535d 473 return 1;
5f39d397 474 }
db94535d 475#endif
aa5d6bed 476 if (level == 0)
123abc88
CM
477 return check_leaf(root, path, level);
478 return check_node(root, path, level);
aa5d6bed
CM
479}
480
74123bd7 481/*
5f39d397
CM
482 * search for key in the extent_buffer. The items start at offset p,
483 * and they are item_size apart. There are 'max' items in p.
484 *
74123bd7
CM
485 * the slot in the array is returned via slot, and it points to
486 * the place where you would insert key if it is not found in
487 * the array.
488 *
489 * slot may point to max if the key is bigger than all of the keys
490 */
5f39d397
CM
491static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
492 int item_size, struct btrfs_key *key,
493 int max, int *slot)
be0e5c09
CM
494{
495 int low = 0;
496 int high = max;
497 int mid;
498 int ret;
479965d6 499 struct btrfs_disk_key *tmp = NULL;
5f39d397
CM
500 struct btrfs_disk_key unaligned;
501 unsigned long offset;
502 char *map_token = NULL;
503 char *kaddr = NULL;
504 unsigned long map_start = 0;
505 unsigned long map_len = 0;
479965d6 506 int err;
be0e5c09
CM
507
508 while(low < high) {
509 mid = (low + high) / 2;
5f39d397
CM
510 offset = p + mid * item_size;
511
512 if (!map_token || offset < map_start ||
513 (offset + sizeof(struct btrfs_disk_key)) >
514 map_start + map_len) {
479965d6 515 if (map_token) {
5f39d397 516 unmap_extent_buffer(eb, map_token, KM_USER0);
479965d6
CM
517 map_token = NULL;
518 }
519 err = map_extent_buffer(eb, offset,
520 sizeof(struct btrfs_disk_key),
521 &map_token, &kaddr,
522 &map_start, &map_len, KM_USER0);
523
524 if (!err) {
525 tmp = (struct btrfs_disk_key *)(kaddr + offset -
526 map_start);
527 } else {
528 read_extent_buffer(eb, &unaligned,
529 offset, sizeof(unaligned));
530 tmp = &unaligned;
531 }
5f39d397 532
5f39d397
CM
533 } else {
534 tmp = (struct btrfs_disk_key *)(kaddr + offset -
535 map_start);
536 }
be0e5c09
CM
537 ret = comp_keys(tmp, key);
538
539 if (ret < 0)
540 low = mid + 1;
541 else if (ret > 0)
542 high = mid;
543 else {
544 *slot = mid;
479965d6
CM
545 if (map_token)
546 unmap_extent_buffer(eb, map_token, KM_USER0);
be0e5c09
CM
547 return 0;
548 }
549 }
550 *slot = low;
5f39d397
CM
551 if (map_token)
552 unmap_extent_buffer(eb, map_token, KM_USER0);
be0e5c09
CM
553 return 1;
554}
555
97571fd0
CM
556/*
557 * simple bin_search frontend that does the right thing for
558 * leaves vs nodes
559 */
5f39d397
CM
560static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
561 int level, int *slot)
be0e5c09 562{
5f39d397
CM
563 if (level == 0) {
564 return generic_bin_search(eb,
565 offsetof(struct btrfs_leaf, items),
0783fcfc 566 sizeof(struct btrfs_item),
5f39d397 567 key, btrfs_header_nritems(eb),
7518a238 568 slot);
be0e5c09 569 } else {
5f39d397
CM
570 return generic_bin_search(eb,
571 offsetof(struct btrfs_node, ptrs),
123abc88 572 sizeof(struct btrfs_key_ptr),
5f39d397 573 key, btrfs_header_nritems(eb),
7518a238 574 slot);
be0e5c09
CM
575 }
576 return -1;
577}
578
5f39d397
CM
579static struct extent_buffer *read_node_slot(struct btrfs_root *root,
580 struct extent_buffer *parent, int slot)
bb803951 581{
bb803951
CM
582 if (slot < 0)
583 return NULL;
5f39d397 584 if (slot >= btrfs_header_nritems(parent))
bb803951 585 return NULL;
db94535d
CM
586 return read_tree_block(root, btrfs_node_blockptr(parent, slot),
587 btrfs_level_size(root, btrfs_header_level(parent) - 1));
bb803951
CM
588}
589
e089f05c
CM
590static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
591 *root, struct btrfs_path *path, int level)
bb803951 592{
5f39d397
CM
593 struct extent_buffer *right = NULL;
594 struct extent_buffer *mid;
595 struct extent_buffer *left = NULL;
596 struct extent_buffer *parent = NULL;
bb803951
CM
597 int ret = 0;
598 int wret;
599 int pslot;
bb803951 600 int orig_slot = path->slots[level];
54aa1f4d 601 int err_on_enospc = 0;
79f95c82 602 u64 orig_ptr;
bb803951
CM
603
604 if (level == 0)
605 return 0;
606
5f39d397 607 mid = path->nodes[level];
1d4f8a0c 608 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
79f95c82 609
234b63a0 610 if (level < BTRFS_MAX_LEVEL - 1)
5f39d397 611 parent = path->nodes[level + 1];
bb803951
CM
612 pslot = path->slots[level + 1];
613
40689478
CM
614 /*
615 * deal with the case where there is only one pointer in the root
616 * by promoting the node below to a root
617 */
5f39d397
CM
618 if (!parent) {
619 struct extent_buffer *child;
bb803951 620
5f39d397 621 if (btrfs_header_nritems(mid) != 1)
bb803951
CM
622 return 0;
623
624 /* promote the child to a root */
5f39d397 625 child = read_node_slot(root, mid, 0);
bb803951
CM
626 BUG_ON(!child);
627 root->node = child;
628 path->nodes[level] = NULL;
5f39d397
CM
629 clean_tree_block(trans, root, mid);
630 wait_on_tree_block_writeback(root, mid);
bb803951 631 /* once for the path */
5f39d397 632 free_extent_buffer(mid);
db94535d 633 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1);
bb803951 634 /* once for the root ptr */
5f39d397 635 free_extent_buffer(mid);
db94535d 636 return ret;
bb803951 637 }
5f39d397 638 if (btrfs_header_nritems(mid) >
123abc88 639 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
bb803951
CM
640 return 0;
641
5f39d397 642 if (btrfs_header_nritems(mid) < 2)
54aa1f4d
CM
643 err_on_enospc = 1;
644
5f39d397
CM
645 left = read_node_slot(root, parent, pslot - 1);
646 if (left) {
647 wret = btrfs_cow_block(trans, root, left,
648 parent, pslot - 1, &left);
54aa1f4d
CM
649 if (wret) {
650 ret = wret;
651 goto enospc;
652 }
2cc58cf2 653 }
5f39d397
CM
654 right = read_node_slot(root, parent, pslot + 1);
655 if (right) {
656 wret = btrfs_cow_block(trans, root, right,
657 parent, pslot + 1, &right);
2cc58cf2
CM
658 if (wret) {
659 ret = wret;
660 goto enospc;
661 }
662 }
663
664 /* first, try to make some room in the middle buffer */
5f39d397
CM
665 if (left) {
666 orig_slot += btrfs_header_nritems(left);
667 wret = push_node_left(trans, root, left, mid);
79f95c82
CM
668 if (wret < 0)
669 ret = wret;
5f39d397 670 if (btrfs_header_nritems(mid) < 2)
54aa1f4d 671 err_on_enospc = 1;
bb803951 672 }
79f95c82
CM
673
674 /*
675 * then try to empty the right most buffer into the middle
676 */
5f39d397
CM
677 if (right) {
678 wret = push_node_left(trans, root, mid, right);
54aa1f4d 679 if (wret < 0 && wret != -ENOSPC)
79f95c82 680 ret = wret;
5f39d397 681 if (btrfs_header_nritems(right) == 0) {
db94535d
CM
682 u64 bytenr = right->start;
683 u32 blocksize = right->len;
684
5f39d397
CM
685 clean_tree_block(trans, root, right);
686 wait_on_tree_block_writeback(root, right);
687 free_extent_buffer(right);
bb803951 688 right = NULL;
e089f05c
CM
689 wret = del_ptr(trans, root, path, level + 1, pslot +
690 1);
bb803951
CM
691 if (wret)
692 ret = wret;
db94535d
CM
693 wret = btrfs_free_extent(trans, root, bytenr,
694 blocksize, 1);
bb803951
CM
695 if (wret)
696 ret = wret;
697 } else {
5f39d397
CM
698 struct btrfs_disk_key right_key;
699 btrfs_node_key(right, &right_key, 0);
700 btrfs_set_node_key(parent, &right_key, pslot + 1);
701 btrfs_mark_buffer_dirty(parent);
bb803951
CM
702 }
703 }
5f39d397 704 if (btrfs_header_nritems(mid) == 1) {
79f95c82
CM
705 /*
706 * we're not allowed to leave a node with one item in the
707 * tree during a delete. A deletion from lower in the tree
708 * could try to delete the only pointer in this node.
709 * So, pull some keys from the left.
710 * There has to be a left pointer at this point because
711 * otherwise we would have pulled some pointers from the
712 * right
713 */
5f39d397
CM
714 BUG_ON(!left);
715 wret = balance_node_right(trans, root, mid, left);
54aa1f4d 716 if (wret < 0) {
79f95c82 717 ret = wret;
54aa1f4d
CM
718 goto enospc;
719 }
79f95c82
CM
720 BUG_ON(wret == 1);
721 }
5f39d397 722 if (btrfs_header_nritems(mid) == 0) {
79f95c82 723 /* we've managed to empty the middle node, drop it */
db94535d
CM
724 u64 bytenr = mid->start;
725 u32 blocksize = mid->len;
5f39d397
CM
726 clean_tree_block(trans, root, mid);
727 wait_on_tree_block_writeback(root, mid);
728 free_extent_buffer(mid);
bb803951 729 mid = NULL;
e089f05c 730 wret = del_ptr(trans, root, path, level + 1, pslot);
bb803951
CM
731 if (wret)
732 ret = wret;
db94535d 733 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1);
bb803951
CM
734 if (wret)
735 ret = wret;
79f95c82
CM
736 } else {
737 /* update the parent key to reflect our changes */
5f39d397
CM
738 struct btrfs_disk_key mid_key;
739 btrfs_node_key(mid, &mid_key, 0);
740 btrfs_set_node_key(parent, &mid_key, pslot);
741 btrfs_mark_buffer_dirty(parent);
79f95c82 742 }
bb803951 743
79f95c82 744 /* update the path */
5f39d397
CM
745 if (left) {
746 if (btrfs_header_nritems(left) > orig_slot) {
747 extent_buffer_get(left);
748 path->nodes[level] = left;
bb803951
CM
749 path->slots[level + 1] -= 1;
750 path->slots[level] = orig_slot;
5f39d397
CM
751 if (mid)
752 free_extent_buffer(mid);
bb803951 753 } else {
5f39d397 754 orig_slot -= btrfs_header_nritems(left);
bb803951
CM
755 path->slots[level] = orig_slot;
756 }
757 }
79f95c82 758 /* double check we haven't messed things up */
123abc88 759 check_block(root, path, level);
e20d96d6 760 if (orig_ptr !=
5f39d397 761 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
79f95c82 762 BUG();
54aa1f4d 763enospc:
5f39d397
CM
764 if (right)
765 free_extent_buffer(right);
766 if (left)
767 free_extent_buffer(left);
bb803951
CM
768 return ret;
769}
770
e66f709b
CM
771/* returns zero if the push worked, non-zero otherwise */
772static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
773 struct btrfs_root *root,
774 struct btrfs_path *path, int level)
775{
5f39d397
CM
776 struct extent_buffer *right = NULL;
777 struct extent_buffer *mid;
778 struct extent_buffer *left = NULL;
779 struct extent_buffer *parent = NULL;
e66f709b
CM
780 int ret = 0;
781 int wret;
782 int pslot;
783 int orig_slot = path->slots[level];
784 u64 orig_ptr;
785
786 if (level == 0)
787 return 1;
788
5f39d397 789 mid = path->nodes[level];
e66f709b
CM
790 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
791
792 if (level < BTRFS_MAX_LEVEL - 1)
5f39d397 793 parent = path->nodes[level + 1];
e66f709b
CM
794 pslot = path->slots[level + 1];
795
5f39d397 796 if (!parent)
e66f709b 797 return 1;
e66f709b 798
5f39d397 799 left = read_node_slot(root, parent, pslot - 1);
e66f709b
CM
800
801 /* first, try to make some room in the middle buffer */
5f39d397 802 if (left) {
e66f709b 803 u32 left_nr;
5f39d397 804 left_nr = btrfs_header_nritems(left);
33ade1f8
CM
805 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
806 wret = 1;
807 } else {
5f39d397
CM
808 ret = btrfs_cow_block(trans, root, left, parent,
809 pslot - 1, &left);
54aa1f4d
CM
810 if (ret)
811 wret = 1;
812 else {
54aa1f4d 813 wret = push_node_left(trans, root,
5f39d397 814 left, mid);
54aa1f4d 815 }
33ade1f8 816 }
e66f709b
CM
817 if (wret < 0)
818 ret = wret;
819 if (wret == 0) {
5f39d397 820 struct btrfs_disk_key disk_key;
e66f709b 821 orig_slot += left_nr;
5f39d397
CM
822 btrfs_node_key(mid, &disk_key, 0);
823 btrfs_set_node_key(parent, &disk_key, pslot);
824 btrfs_mark_buffer_dirty(parent);
825 if (btrfs_header_nritems(left) > orig_slot) {
826 path->nodes[level] = left;
e66f709b
CM
827 path->slots[level + 1] -= 1;
828 path->slots[level] = orig_slot;
5f39d397 829 free_extent_buffer(mid);
e66f709b
CM
830 } else {
831 orig_slot -=
5f39d397 832 btrfs_header_nritems(left);
e66f709b 833 path->slots[level] = orig_slot;
5f39d397 834 free_extent_buffer(left);
e66f709b 835 }
e66f709b
CM
836 return 0;
837 }
5f39d397 838 free_extent_buffer(left);
e66f709b 839 }
5f39d397 840 right= read_node_slot(root, parent, pslot + 1);
e66f709b
CM
841
842 /*
843 * then try to empty the right most buffer into the middle
844 */
5f39d397 845 if (right) {
33ade1f8 846 u32 right_nr;
5f39d397 847 right_nr = btrfs_header_nritems(right);
33ade1f8
CM
848 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
849 wret = 1;
850 } else {
5f39d397
CM
851 ret = btrfs_cow_block(trans, root, right,
852 parent, pslot + 1,
853 &right);
54aa1f4d
CM
854 if (ret)
855 wret = 1;
856 else {
54aa1f4d 857 wret = balance_node_right(trans, root,
5f39d397 858 right, mid);
54aa1f4d 859 }
33ade1f8 860 }
e66f709b
CM
861 if (wret < 0)
862 ret = wret;
863 if (wret == 0) {
5f39d397
CM
864 struct btrfs_disk_key disk_key;
865
866 btrfs_node_key(right, &disk_key, 0);
867 btrfs_set_node_key(parent, &disk_key, pslot + 1);
868 btrfs_mark_buffer_dirty(parent);
869
870 if (btrfs_header_nritems(mid) <= orig_slot) {
871 path->nodes[level] = right;
e66f709b
CM
872 path->slots[level + 1] += 1;
873 path->slots[level] = orig_slot -
5f39d397
CM
874 btrfs_header_nritems(mid);
875 free_extent_buffer(mid);
e66f709b 876 } else {
5f39d397 877 free_extent_buffer(right);
e66f709b 878 }
e66f709b
CM
879 return 0;
880 }
5f39d397 881 free_extent_buffer(right);
e66f709b 882 }
e66f709b
CM
883 return 1;
884}
885
3c69faec
CM
886/*
887 * readahead one full node of leaves
888 */
889static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
6702ed49 890 int level, int slot)
3c69faec 891{
5f39d397 892 struct extent_buffer *node;
3c69faec 893 u32 nritems;
3c69faec 894 u64 search;
6b80053d
CM
895 u64 lowest_read;
896 u64 highest_read;
897 u64 nread = 0;
3c69faec 898 int direction = path->reada;
5f39d397 899 struct extent_buffer *eb;
6b80053d
CM
900 u32 nr;
901 u32 blocksize;
902 u32 nscan = 0;
db94535d 903
6702ed49
CM
904 if (level == 0)
905 return;
906
907 if (!path->nodes[level])
3c69faec
CM
908 return;
909
5f39d397 910 node = path->nodes[level];
3c69faec 911 search = btrfs_node_blockptr(node, slot);
6b80053d
CM
912 blocksize = btrfs_level_size(root, level - 1);
913 eb = btrfs_find_tree_block(root, search, blocksize);
5f39d397
CM
914 if (eb) {
915 free_extent_buffer(eb);
3c69faec
CM
916 return;
917 }
918
6b80053d
CM
919 highest_read = search;
920 lowest_read = search;
921
5f39d397 922 nritems = btrfs_header_nritems(node);
6b80053d 923 nr = slot;
3c69faec 924 while(1) {
6b80053d
CM
925 if (direction < 0) {
926 if (nr == 0)
927 break;
928 nr--;
929 } else if (direction > 0) {
930 nr++;
931 if (nr >= nritems)
932 break;
3c69faec 933 }
6b80053d
CM
934 search = btrfs_node_blockptr(node, nr);
935 if ((search >= lowest_read && search <= highest_read) ||
936 (search < lowest_read && lowest_read - search <= 32768) ||
937 (search > highest_read && search - highest_read <= 32768)) {
938 readahead_tree_block(root, search, blocksize);
939 nread += blocksize;
940 }
941 nscan++;
942 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
943 break;
944 if(nread > (1024 * 1024) || nscan > 128)
945 break;
946
947 if (search < lowest_read)
948 lowest_read = search;
949 if (search > highest_read)
950 highest_read = search;
3c69faec
CM
951 }
952}
74123bd7
CM
953/*
954 * look for key in the tree. path is filled in with nodes along the way
955 * if key is found, we return zero and you can find the item in the leaf
956 * level of the path (level 0)
957 *
958 * If the key isn't found, the path points to the slot where it should
aa5d6bed
CM
959 * be inserted, and 1 is returned. If there are other errors during the
960 * search a negative error number is returned.
97571fd0
CM
961 *
962 * if ins_len > 0, nodes and leaves will be split as we walk down the
963 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
964 * possible)
74123bd7 965 */
e089f05c
CM
966int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
967 *root, struct btrfs_key *key, struct btrfs_path *p, int
968 ins_len, int cow)
be0e5c09 969{
5f39d397 970 struct extent_buffer *b;
db94535d 971 u64 bytenr;
be0e5c09
CM
972 int slot;
973 int ret;
974 int level;
3c69faec 975 int should_reada = p->reada;
9f3a7427
CM
976 u8 lowest_level = 0;
977
6702ed49
CM
978 lowest_level = p->lowest_level;
979 WARN_ON(lowest_level && ins_len);
22b0ebda
CM
980 WARN_ON(p->nodes[0] != NULL);
981 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
bb803951
CM
982again:
983 b = root->node;
5f39d397 984 extent_buffer_get(b);
eb60ceac 985 while (b) {
5f39d397 986 level = btrfs_header_level(b);
02217ed2
CM
987 if (cow) {
988 int wret;
e20d96d6
CM
989 wret = btrfs_cow_block(trans, root, b,
990 p->nodes[level + 1],
991 p->slots[level + 1],
252c38f0 992 &b);
54aa1f4d 993 if (wret) {
5f39d397 994 free_extent_buffer(b);
54aa1f4d
CM
995 return wret;
996 }
02217ed2
CM
997 }
998 BUG_ON(!cow && ins_len);
5f39d397 999 if (level != btrfs_header_level(b))
2c90e5d6 1000 WARN_ON(1);
5f39d397 1001 level = btrfs_header_level(b);
eb60ceac 1002 p->nodes[level] = b;
123abc88 1003 ret = check_block(root, p, level);
aa5d6bed
CM
1004 if (ret)
1005 return -1;
5f39d397
CM
1006 ret = bin_search(b, key, level, &slot);
1007 if (level != 0) {
be0e5c09
CM
1008 if (ret && slot > 0)
1009 slot -= 1;
1010 p->slots[level] = slot;
5f39d397 1011 if (ins_len > 0 && btrfs_header_nritems(b) >=
d4dbff95 1012 BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
e089f05c 1013 int sret = split_node(trans, root, p, level);
5c680ed6
CM
1014 BUG_ON(sret > 0);
1015 if (sret)
1016 return sret;
1017 b = p->nodes[level];
5c680ed6 1018 slot = p->slots[level];
bb803951 1019 } else if (ins_len < 0) {
e089f05c
CM
1020 int sret = balance_level(trans, root, p,
1021 level);
bb803951
CM
1022 if (sret)
1023 return sret;
1024 b = p->nodes[level];
f510cfec
CM
1025 if (!b) {
1026 btrfs_release_path(NULL, p);
bb803951 1027 goto again;
f510cfec 1028 }
bb803951 1029 slot = p->slots[level];
5f39d397 1030 BUG_ON(btrfs_header_nritems(b) == 1);
5c680ed6 1031 }
9f3a7427
CM
1032 /* this is only true while dropping a snapshot */
1033 if (level == lowest_level)
1034 break;
db94535d 1035 bytenr = btrfs_node_blockptr(b, slot);
6702ed49
CM
1036 if (should_reada)
1037 reada_for_search(root, p, level, slot);
db94535d
CM
1038 b = read_tree_block(root, bytenr,
1039 btrfs_level_size(root, level - 1));
be0e5c09
CM
1040 } else {
1041 p->slots[level] = slot;
5f39d397 1042 if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
0783fcfc 1043 sizeof(struct btrfs_item) + ins_len) {
d4dbff95
CM
1044 int sret = split_leaf(trans, root, key,
1045 p, ins_len);
5c680ed6
CM
1046 BUG_ON(sret > 0);
1047 if (sret)
1048 return sret;
1049 }
be0e5c09
CM
1050 return ret;
1051 }
1052 }
aa5d6bed 1053 return 1;
be0e5c09
CM
1054}
1055
74123bd7
CM
1056/*
1057 * adjust the pointers going up the tree, starting at level
1058 * making sure the right key of each node is points to 'key'.
1059 * This is used after shifting pointers to the left, so it stops
1060 * fixing up pointers when a given leaf/node is not in slot 0 of the
1061 * higher levels
aa5d6bed
CM
1062 *
1063 * If this fails to write a tree block, it returns -1, but continues
1064 * fixing up the blocks in ram so the tree is consistent.
74123bd7 1065 */
5f39d397
CM
1066static int fixup_low_keys(struct btrfs_trans_handle *trans,
1067 struct btrfs_root *root, struct btrfs_path *path,
1068 struct btrfs_disk_key *key, int level)
be0e5c09
CM
1069{
1070 int i;
aa5d6bed 1071 int ret = 0;
5f39d397
CM
1072 struct extent_buffer *t;
1073
234b63a0 1074 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
be0e5c09 1075 int tslot = path->slots[i];
eb60ceac 1076 if (!path->nodes[i])
be0e5c09 1077 break;
5f39d397
CM
1078 t = path->nodes[i];
1079 btrfs_set_node_key(t, key, tslot);
d6025579 1080 btrfs_mark_buffer_dirty(path->nodes[i]);
be0e5c09
CM
1081 if (tslot != 0)
1082 break;
1083 }
aa5d6bed 1084 return ret;
be0e5c09
CM
1085}
1086
74123bd7
CM
1087/*
1088 * try to push data from one node into the next node left in the
79f95c82 1089 * tree.
aa5d6bed
CM
1090 *
1091 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1092 * error, and > 0 if there was no room in the left hand block.
74123bd7 1093 */
e089f05c 1094static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
5f39d397
CM
1095 *root, struct extent_buffer *dst,
1096 struct extent_buffer *src)
be0e5c09 1097{
be0e5c09 1098 int push_items = 0;
bb803951
CM
1099 int src_nritems;
1100 int dst_nritems;
aa5d6bed 1101 int ret = 0;
be0e5c09 1102
5f39d397
CM
1103 src_nritems = btrfs_header_nritems(src);
1104 dst_nritems = btrfs_header_nritems(dst);
123abc88 1105 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
54aa1f4d 1106
eb60ceac 1107 if (push_items <= 0) {
be0e5c09 1108 return 1;
eb60ceac 1109 }
be0e5c09 1110
bb803951 1111 if (src_nritems < push_items)
79f95c82
CM
1112 push_items = src_nritems;
1113
5f39d397
CM
1114 copy_extent_buffer(dst, src,
1115 btrfs_node_key_ptr_offset(dst_nritems),
1116 btrfs_node_key_ptr_offset(0),
1117 push_items * sizeof(struct btrfs_key_ptr));
1118
bb803951 1119 if (push_items < src_nritems) {
5f39d397
CM
1120 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1121 btrfs_node_key_ptr_offset(push_items),
1122 (src_nritems - push_items) *
1123 sizeof(struct btrfs_key_ptr));
1124 }
1125 btrfs_set_header_nritems(src, src_nritems - push_items);
1126 btrfs_set_header_nritems(dst, dst_nritems + push_items);
1127 btrfs_mark_buffer_dirty(src);
1128 btrfs_mark_buffer_dirty(dst);
79f95c82
CM
1129 return ret;
1130}
1131
1132/*
1133 * try to push data from one node into the next node right in the
1134 * tree.
1135 *
1136 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1137 * error, and > 0 if there was no room in the right hand block.
1138 *
1139 * this will only push up to 1/2 the contents of the left node over
1140 */
5f39d397
CM
1141static int balance_node_right(struct btrfs_trans_handle *trans,
1142 struct btrfs_root *root,
1143 struct extent_buffer *dst,
1144 struct extent_buffer *src)
79f95c82 1145{
79f95c82
CM
1146 int push_items = 0;
1147 int max_push;
1148 int src_nritems;
1149 int dst_nritems;
1150 int ret = 0;
79f95c82 1151
5f39d397
CM
1152 src_nritems = btrfs_header_nritems(src);
1153 dst_nritems = btrfs_header_nritems(dst);
123abc88 1154 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
5f39d397 1155 if (push_items <= 0)
79f95c82 1156 return 1;
79f95c82
CM
1157
1158 max_push = src_nritems / 2 + 1;
1159 /* don't try to empty the node */
252c38f0 1160 if (max_push >= src_nritems)
79f95c82 1161 return 1;
252c38f0 1162
79f95c82
CM
1163 if (max_push < push_items)
1164 push_items = max_push;
1165
5f39d397
CM
1166 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1167 btrfs_node_key_ptr_offset(0),
1168 (dst_nritems) *
1169 sizeof(struct btrfs_key_ptr));
d6025579 1170
5f39d397
CM
1171 copy_extent_buffer(dst, src,
1172 btrfs_node_key_ptr_offset(0),
1173 btrfs_node_key_ptr_offset(src_nritems - push_items),
1174 push_items * sizeof(struct btrfs_key_ptr));
79f95c82 1175
5f39d397
CM
1176 btrfs_set_header_nritems(src, src_nritems - push_items);
1177 btrfs_set_header_nritems(dst, dst_nritems + push_items);
79f95c82 1178
5f39d397
CM
1179 btrfs_mark_buffer_dirty(src);
1180 btrfs_mark_buffer_dirty(dst);
aa5d6bed 1181 return ret;
be0e5c09
CM
1182}
1183
97571fd0
CM
1184/*
1185 * helper function to insert a new root level in the tree.
1186 * A new node is allocated, and a single item is inserted to
1187 * point to the existing root
aa5d6bed
CM
1188 *
1189 * returns zero on success or < 0 on failure.
97571fd0 1190 */
5f39d397
CM
1191static int insert_new_root(struct btrfs_trans_handle *trans,
1192 struct btrfs_root *root,
1193 struct btrfs_path *path, int level)
5c680ed6 1194{
5f39d397
CM
1195 struct extent_buffer *lower;
1196 struct extent_buffer *c;
1197 struct btrfs_disk_key lower_key;
5c680ed6
CM
1198
1199 BUG_ON(path->nodes[level]);
1200 BUG_ON(path->nodes[level-1] != root->node);
1201
db94535d
CM
1202 c = btrfs_alloc_free_block(trans, root, root->nodesize,
1203 root->node->start, 0);
5f39d397
CM
1204 if (IS_ERR(c))
1205 return PTR_ERR(c);
1206 memset_extent_buffer(c, 0, 0, root->nodesize);
1207 btrfs_set_header_nritems(c, 1);
1208 btrfs_set_header_level(c, level);
db94535d 1209 btrfs_set_header_bytenr(c, c->start);
5f39d397
CM
1210 btrfs_set_header_generation(c, trans->transid);
1211 btrfs_set_header_owner(c, root->root_key.objectid);
1212 lower = path->nodes[level-1];
1213
1214 write_extent_buffer(c, root->fs_info->fsid,
1215 (unsigned long)btrfs_header_fsid(c),
1216 BTRFS_FSID_SIZE);
1217 if (level == 1)
1218 btrfs_item_key(lower, &lower_key, 0);
5c680ed6 1219 else
5f39d397
CM
1220 btrfs_node_key(lower, &lower_key, 0);
1221 btrfs_set_node_key(c, &lower_key, 0);
db94535d 1222 btrfs_set_node_blockptr(c, 0, lower->start);
d5719762 1223
5f39d397 1224 btrfs_mark_buffer_dirty(c);
d5719762 1225
5c680ed6 1226 /* the super has an extra ref to root->node */
5f39d397
CM
1227 free_extent_buffer(root->node);
1228 root->node = c;
1229 extent_buffer_get(c);
1230 path->nodes[level] = c;
5c680ed6
CM
1231 path->slots[level] = 0;
1232 return 0;
1233}
1234
74123bd7
CM
1235/*
1236 * worker function to insert a single pointer in a node.
1237 * the node should have enough room for the pointer already
97571fd0 1238 *
74123bd7
CM
1239 * slot and level indicate where you want the key to go, and
1240 * blocknr is the block the key points to.
aa5d6bed
CM
1241 *
1242 * returns zero on success and < 0 on any error
74123bd7 1243 */
e089f05c
CM
1244static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1245 *root, struct btrfs_path *path, struct btrfs_disk_key
db94535d 1246 *key, u64 bytenr, int slot, int level)
74123bd7 1247{
5f39d397 1248 struct extent_buffer *lower;
74123bd7 1249 int nritems;
5c680ed6
CM
1250
1251 BUG_ON(!path->nodes[level]);
5f39d397
CM
1252 lower = path->nodes[level];
1253 nritems = btrfs_header_nritems(lower);
74123bd7
CM
1254 if (slot > nritems)
1255 BUG();
123abc88 1256 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
74123bd7
CM
1257 BUG();
1258 if (slot != nritems) {
5f39d397
CM
1259 memmove_extent_buffer(lower,
1260 btrfs_node_key_ptr_offset(slot + 1),
1261 btrfs_node_key_ptr_offset(slot),
d6025579 1262 (nritems - slot) * sizeof(struct btrfs_key_ptr));
74123bd7 1263 }
5f39d397 1264 btrfs_set_node_key(lower, key, slot);
db94535d 1265 btrfs_set_node_blockptr(lower, slot, bytenr);
5f39d397
CM
1266 btrfs_set_header_nritems(lower, nritems + 1);
1267 btrfs_mark_buffer_dirty(lower);
74123bd7
CM
1268 return 0;
1269}
1270
97571fd0
CM
1271/*
1272 * split the node at the specified level in path in two.
1273 * The path is corrected to point to the appropriate node after the split
1274 *
1275 * Before splitting this tries to make some room in the node by pushing
1276 * left and right, if either one works, it returns right away.
aa5d6bed
CM
1277 *
1278 * returns 0 on success and < 0 on failure
97571fd0 1279 */
e089f05c
CM
1280static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1281 *root, struct btrfs_path *path, int level)
be0e5c09 1282{
5f39d397
CM
1283 struct extent_buffer *c;
1284 struct extent_buffer *split;
1285 struct btrfs_disk_key disk_key;
be0e5c09 1286 int mid;
5c680ed6 1287 int ret;
aa5d6bed 1288 int wret;
7518a238 1289 u32 c_nritems;
eb60ceac 1290
5f39d397
CM
1291 c = path->nodes[level];
1292 if (c == root->node) {
5c680ed6 1293 /* trying to split the root, lets make a new one */
e089f05c 1294 ret = insert_new_root(trans, root, path, level + 1);
5c680ed6
CM
1295 if (ret)
1296 return ret;
e66f709b
CM
1297 } else {
1298 ret = push_nodes_for_insert(trans, root, path, level);
5f39d397
CM
1299 c = path->nodes[level];
1300 if (!ret && btrfs_header_nritems(c) <
e66f709b
CM
1301 BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1302 return 0;
54aa1f4d
CM
1303 if (ret < 0)
1304 return ret;
be0e5c09 1305 }
e66f709b 1306
5f39d397 1307 c_nritems = btrfs_header_nritems(c);
db94535d
CM
1308 split = btrfs_alloc_free_block(trans, root, root->nodesize,
1309 c->start, 0);
5f39d397
CM
1310 if (IS_ERR(split))
1311 return PTR_ERR(split);
1312
1313 btrfs_set_header_flags(split, btrfs_header_flags(c));
1314 btrfs_set_header_level(split, btrfs_header_level(c));
db94535d 1315 btrfs_set_header_bytenr(split, split->start);
5f39d397
CM
1316 btrfs_set_header_generation(split, trans->transid);
1317 btrfs_set_header_owner(split, root->root_key.objectid);
1318 write_extent_buffer(split, root->fs_info->fsid,
1319 (unsigned long)btrfs_header_fsid(split),
1320 BTRFS_FSID_SIZE);
54aa1f4d 1321
7518a238 1322 mid = (c_nritems + 1) / 2;
5f39d397
CM
1323
1324 copy_extent_buffer(split, c,
1325 btrfs_node_key_ptr_offset(0),
1326 btrfs_node_key_ptr_offset(mid),
1327 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1328 btrfs_set_header_nritems(split, c_nritems - mid);
1329 btrfs_set_header_nritems(c, mid);
aa5d6bed
CM
1330 ret = 0;
1331
5f39d397
CM
1332 btrfs_mark_buffer_dirty(c);
1333 btrfs_mark_buffer_dirty(split);
1334
1335 btrfs_node_key(split, &disk_key, 0);
db94535d 1336 wret = insert_ptr(trans, root, path, &disk_key, split->start,
5f39d397 1337 path->slots[level + 1] + 1,
123abc88 1338 level + 1);
aa5d6bed
CM
1339 if (wret)
1340 ret = wret;
1341
5de08d7d 1342 if (path->slots[level] >= mid) {
5c680ed6 1343 path->slots[level] -= mid;
5f39d397
CM
1344 free_extent_buffer(c);
1345 path->nodes[level] = split;
5c680ed6
CM
1346 path->slots[level + 1] += 1;
1347 } else {
5f39d397 1348 free_extent_buffer(split);
be0e5c09 1349 }
aa5d6bed 1350 return ret;
be0e5c09
CM
1351}
1352
74123bd7
CM
1353/*
1354 * how many bytes are required to store the items in a leaf. start
1355 * and nr indicate which items in the leaf to check. This totals up the
1356 * space used both by the item structs and the item data
1357 */
5f39d397 1358static int leaf_space_used(struct extent_buffer *l, int start, int nr)
be0e5c09
CM
1359{
1360 int data_len;
5f39d397 1361 int nritems = btrfs_header_nritems(l);
d4dbff95 1362 int end = min(nritems, start + nr) - 1;
be0e5c09
CM
1363
1364 if (!nr)
1365 return 0;
5f39d397
CM
1366 data_len = btrfs_item_end_nr(l, start);
1367 data_len = data_len - btrfs_item_offset_nr(l, end);
0783fcfc 1368 data_len += sizeof(struct btrfs_item) * nr;
d4dbff95 1369 WARN_ON(data_len < 0);
be0e5c09
CM
1370 return data_len;
1371}
1372
d4dbff95
CM
1373/*
1374 * The space between the end of the leaf items and
1375 * the start of the leaf data. IOW, how much room
1376 * the leaf has left for both items and data
1377 */
5f39d397 1378int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
d4dbff95 1379{
5f39d397
CM
1380 int nritems = btrfs_header_nritems(leaf);
1381 int ret;
1382 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1383 if (ret < 0) {
1384 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1385 ret, BTRFS_LEAF_DATA_SIZE(root),
1386 leaf_space_used(leaf, 0, nritems), nritems);
1387 }
1388 return ret;
d4dbff95
CM
1389}
1390
00ec4c51
CM
1391/*
1392 * push some data in the path leaf to the right, trying to free up at
1393 * least data_size bytes. returns zero if the push worked, nonzero otherwise
aa5d6bed
CM
1394 *
1395 * returns 1 if the push failed because the other node didn't have enough
1396 * room, 0 if everything worked out and < 0 if there were major errors.
00ec4c51 1397 */
e089f05c
CM
1398static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1399 *root, struct btrfs_path *path, int data_size)
00ec4c51 1400{
5f39d397
CM
1401 struct extent_buffer *left = path->nodes[0];
1402 struct extent_buffer *right;
1403 struct extent_buffer *upper;
1404 struct btrfs_disk_key disk_key;
00ec4c51
CM
1405 int slot;
1406 int i;
1407 int free_space;
1408 int push_space = 0;
1409 int push_items = 0;
0783fcfc 1410 struct btrfs_item *item;
7518a238
CM
1411 u32 left_nritems;
1412 u32 right_nritems;
5f39d397 1413 u32 data_end;
db94535d 1414 u32 this_item_size;
54aa1f4d 1415 int ret;
00ec4c51
CM
1416
1417 slot = path->slots[1];
1418 if (!path->nodes[1]) {
1419 return 1;
1420 }
1421 upper = path->nodes[1];
5f39d397 1422 if (slot >= btrfs_header_nritems(upper) - 1)
00ec4c51 1423 return 1;
5f39d397 1424
db94535d
CM
1425 right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1426 root->leafsize);
123abc88 1427 free_space = btrfs_leaf_free_space(root, right);
0783fcfc 1428 if (free_space < data_size + sizeof(struct btrfs_item)) {
5f39d397 1429 free_extent_buffer(right);
00ec4c51
CM
1430 return 1;
1431 }
5f39d397 1432
02217ed2 1433 /* cow and double check */
5f39d397
CM
1434 ret = btrfs_cow_block(trans, root, right, upper,
1435 slot + 1, &right);
54aa1f4d 1436 if (ret) {
5f39d397 1437 free_extent_buffer(right);
54aa1f4d
CM
1438 return 1;
1439 }
123abc88 1440 free_space = btrfs_leaf_free_space(root, right);
0783fcfc 1441 if (free_space < data_size + sizeof(struct btrfs_item)) {
5f39d397 1442 free_extent_buffer(right);
02217ed2
CM
1443 return 1;
1444 }
1445
5f39d397 1446 left_nritems = btrfs_header_nritems(left);
a429e513 1447 if (left_nritems == 0) {
5f39d397 1448 free_extent_buffer(right);
a429e513
CM
1449 return 1;
1450 }
5f39d397 1451
a429e513 1452 for (i = left_nritems - 1; i >= 1; i--) {
5f39d397 1453 item = btrfs_item_nr(left, i);
db94535d 1454
00ec4c51
CM
1455 if (path->slots[0] == i)
1456 push_space += data_size + sizeof(*item);
db94535d
CM
1457
1458 if (!left->map_token) {
1459 map_extent_buffer(left, (unsigned long)item,
1460 sizeof(struct btrfs_item),
1461 &left->map_token, &left->kaddr,
1462 &left->map_start, &left->map_len,
1463 KM_USER1);
1464 }
1465
1466 this_item_size = btrfs_item_size(left, item);
1467 if (this_item_size + sizeof(*item) + push_space > free_space)
00ec4c51
CM
1468 break;
1469 push_items++;
db94535d
CM
1470 push_space += this_item_size + sizeof(*item);
1471 }
1472 if (left->map_token) {
1473 unmap_extent_buffer(left, left->map_token, KM_USER1);
1474 left->map_token = NULL;
00ec4c51 1475 }
5f39d397 1476
00ec4c51 1477 if (push_items == 0) {
5f39d397 1478 free_extent_buffer(right);
00ec4c51
CM
1479 return 1;
1480 }
5f39d397 1481
a429e513
CM
1482 if (push_items == left_nritems)
1483 WARN_ON(1);
5f39d397 1484
00ec4c51 1485 /* push left to right */
5f39d397
CM
1486 right_nritems = btrfs_header_nritems(right);
1487 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
123abc88 1488 push_space -= leaf_data_end(root, left);
5f39d397 1489
00ec4c51 1490 /* make room in the right data area */
5f39d397
CM
1491 data_end = leaf_data_end(root, right);
1492 memmove_extent_buffer(right,
1493 btrfs_leaf_data(right) + data_end - push_space,
1494 btrfs_leaf_data(right) + data_end,
1495 BTRFS_LEAF_DATA_SIZE(root) - data_end);
1496
00ec4c51 1497 /* copy from the left data area */
5f39d397 1498 copy_extent_buffer(right, left, btrfs_leaf_data(right) +
d6025579
CM
1499 BTRFS_LEAF_DATA_SIZE(root) - push_space,
1500 btrfs_leaf_data(left) + leaf_data_end(root, left),
1501 push_space);
5f39d397
CM
1502
1503 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1504 btrfs_item_nr_offset(0),
1505 right_nritems * sizeof(struct btrfs_item));
1506
00ec4c51 1507 /* copy the items from left to right */
5f39d397
CM
1508 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1509 btrfs_item_nr_offset(left_nritems - push_items),
1510 push_items * sizeof(struct btrfs_item));
00ec4c51
CM
1511
1512 /* update the item pointers */
7518a238 1513 right_nritems += push_items;
5f39d397 1514 btrfs_set_header_nritems(right, right_nritems);
123abc88 1515 push_space = BTRFS_LEAF_DATA_SIZE(root);
db94535d 1516
7518a238 1517 for (i = 0; i < right_nritems; i++) {
5f39d397 1518 item = btrfs_item_nr(right, i);
db94535d
CM
1519 if (!right->map_token) {
1520 map_extent_buffer(right, (unsigned long)item,
1521 sizeof(struct btrfs_item),
1522 &right->map_token, &right->kaddr,
1523 &right->map_start, &right->map_len,
1524 KM_USER1);
1525 }
1526 push_space -= btrfs_item_size(right, item);
1527 btrfs_set_item_offset(right, item, push_space);
1528 }
1529
1530 if (right->map_token) {
1531 unmap_extent_buffer(right, right->map_token, KM_USER1);
1532 right->map_token = NULL;
00ec4c51 1533 }
7518a238 1534 left_nritems -= push_items;
5f39d397 1535 btrfs_set_header_nritems(left, left_nritems);
00ec4c51 1536
5f39d397
CM
1537 btrfs_mark_buffer_dirty(left);
1538 btrfs_mark_buffer_dirty(right);
a429e513 1539
5f39d397
CM
1540 btrfs_item_key(right, &disk_key, 0);
1541 btrfs_set_node_key(upper, &disk_key, slot + 1);
d6025579 1542 btrfs_mark_buffer_dirty(upper);
02217ed2 1543
00ec4c51 1544 /* then fixup the leaf pointer in the path */
7518a238
CM
1545 if (path->slots[0] >= left_nritems) {
1546 path->slots[0] -= left_nritems;
5f39d397
CM
1547 free_extent_buffer(path->nodes[0]);
1548 path->nodes[0] = right;
00ec4c51
CM
1549 path->slots[1] += 1;
1550 } else {
5f39d397 1551 free_extent_buffer(right);
00ec4c51
CM
1552 }
1553 return 0;
1554}
74123bd7
CM
1555/*
1556 * push some data in the path leaf to the left, trying to free up at
1557 * least data_size bytes. returns zero if the push worked, nonzero otherwise
1558 */
e089f05c
CM
1559static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1560 *root, struct btrfs_path *path, int data_size)
be0e5c09 1561{
5f39d397
CM
1562 struct btrfs_disk_key disk_key;
1563 struct extent_buffer *right = path->nodes[0];
1564 struct extent_buffer *left;
be0e5c09
CM
1565 int slot;
1566 int i;
1567 int free_space;
1568 int push_space = 0;
1569 int push_items = 0;
0783fcfc 1570 struct btrfs_item *item;
7518a238 1571 u32 old_left_nritems;
5f39d397 1572 u32 right_nritems;
aa5d6bed
CM
1573 int ret = 0;
1574 int wret;
db94535d
CM
1575 u32 this_item_size;
1576 u32 old_left_item_size;
be0e5c09
CM
1577
1578 slot = path->slots[1];
5f39d397 1579 if (slot == 0)
be0e5c09 1580 return 1;
5f39d397 1581 if (!path->nodes[1])
be0e5c09 1582 return 1;
5f39d397
CM
1583
1584 left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
db94535d 1585 slot - 1), root->leafsize);
123abc88 1586 free_space = btrfs_leaf_free_space(root, left);
0783fcfc 1587 if (free_space < data_size + sizeof(struct btrfs_item)) {
5f39d397 1588 free_extent_buffer(left);
be0e5c09
CM
1589 return 1;
1590 }
02217ed2
CM
1591
1592 /* cow and double check */
5f39d397
CM
1593 ret = btrfs_cow_block(trans, root, left,
1594 path->nodes[1], slot - 1, &left);
54aa1f4d
CM
1595 if (ret) {
1596 /* we hit -ENOSPC, but it isn't fatal here */
5f39d397 1597 free_extent_buffer(left);
54aa1f4d
CM
1598 return 1;
1599 }
123abc88 1600 free_space = btrfs_leaf_free_space(root, left);
0783fcfc 1601 if (free_space < data_size + sizeof(struct btrfs_item)) {
5f39d397 1602 free_extent_buffer(left);
02217ed2
CM
1603 return 1;
1604 }
1605
5f39d397
CM
1606 right_nritems = btrfs_header_nritems(right);
1607 if (right_nritems == 0) {
1608 free_extent_buffer(left);
a429e513
CM
1609 return 1;
1610 }
1611
5f39d397
CM
1612 for (i = 0; i < right_nritems - 1; i++) {
1613 item = btrfs_item_nr(right, i);
db94535d
CM
1614 if (!right->map_token) {
1615 map_extent_buffer(right, (unsigned long)item,
1616 sizeof(struct btrfs_item),
1617 &right->map_token, &right->kaddr,
1618 &right->map_start, &right->map_len,
1619 KM_USER1);
1620 }
1621
be0e5c09
CM
1622 if (path->slots[0] == i)
1623 push_space += data_size + sizeof(*item);
db94535d
CM
1624
1625 this_item_size = btrfs_item_size(right, item);
1626 if (this_item_size + sizeof(*item) + push_space > free_space)
be0e5c09 1627 break;
db94535d 1628
be0e5c09 1629 push_items++;
db94535d
CM
1630 push_space += this_item_size + sizeof(*item);
1631 }
1632
1633 if (right->map_token) {
1634 unmap_extent_buffer(right, right->map_token, KM_USER1);
1635 right->map_token = NULL;
be0e5c09 1636 }
db94535d 1637
be0e5c09 1638 if (push_items == 0) {
5f39d397 1639 free_extent_buffer(left);
be0e5c09
CM
1640 return 1;
1641 }
5f39d397 1642 if (push_items == btrfs_header_nritems(right))
a429e513 1643 WARN_ON(1);
5f39d397 1644
be0e5c09 1645 /* push data from right to left */
5f39d397
CM
1646 copy_extent_buffer(left, right,
1647 btrfs_item_nr_offset(btrfs_header_nritems(left)),
1648 btrfs_item_nr_offset(0),
1649 push_items * sizeof(struct btrfs_item));
1650
123abc88 1651 push_space = BTRFS_LEAF_DATA_SIZE(root) -
5f39d397
CM
1652 btrfs_item_offset_nr(right, push_items -1);
1653
1654 copy_extent_buffer(left, right, btrfs_leaf_data(left) +
d6025579
CM
1655 leaf_data_end(root, left) - push_space,
1656 btrfs_leaf_data(right) +
5f39d397 1657 btrfs_item_offset_nr(right, push_items - 1),
d6025579 1658 push_space);
5f39d397 1659 old_left_nritems = btrfs_header_nritems(left);
eb60ceac
CM
1660 BUG_ON(old_left_nritems < 0);
1661
db94535d 1662 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
0783fcfc 1663 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
5f39d397 1664 u32 ioff;
db94535d 1665
5f39d397 1666 item = btrfs_item_nr(left, i);
db94535d
CM
1667 if (!left->map_token) {
1668 map_extent_buffer(left, (unsigned long)item,
1669 sizeof(struct btrfs_item),
1670 &left->map_token, &left->kaddr,
1671 &left->map_start, &left->map_len,
1672 KM_USER1);
1673 }
1674
5f39d397
CM
1675 ioff = btrfs_item_offset(left, item);
1676 btrfs_set_item_offset(left, item,
db94535d 1677 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
be0e5c09 1678 }
5f39d397 1679 btrfs_set_header_nritems(left, old_left_nritems + push_items);
db94535d
CM
1680 if (left->map_token) {
1681 unmap_extent_buffer(left, left->map_token, KM_USER1);
1682 left->map_token = NULL;
1683 }
be0e5c09
CM
1684
1685 /* fixup right node */
5f39d397
CM
1686 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1687 leaf_data_end(root, right);
1688 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1689 BTRFS_LEAF_DATA_SIZE(root) - push_space,
1690 btrfs_leaf_data(right) +
1691 leaf_data_end(root, right), push_space);
1692
1693 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1694 btrfs_item_nr_offset(push_items),
1695 (btrfs_header_nritems(right) - push_items) *
1696 sizeof(struct btrfs_item));
1697
1698 right_nritems = btrfs_header_nritems(right) - push_items;
1699 btrfs_set_header_nritems(right, right_nritems);
123abc88 1700 push_space = BTRFS_LEAF_DATA_SIZE(root);
eb60ceac 1701
5f39d397
CM
1702 for (i = 0; i < right_nritems; i++) {
1703 item = btrfs_item_nr(right, i);
db94535d
CM
1704
1705 if (!right->map_token) {
1706 map_extent_buffer(right, (unsigned long)item,
1707 sizeof(struct btrfs_item),
1708 &right->map_token, &right->kaddr,
1709 &right->map_start, &right->map_len,
1710 KM_USER1);
1711 }
1712
1713 push_space = push_space - btrfs_item_size(right, item);
1714 btrfs_set_item_offset(right, item, push_space);
1715 }
1716 if (right->map_token) {
1717 unmap_extent_buffer(right, right->map_token, KM_USER1);
1718 right->map_token = NULL;
be0e5c09 1719 }
eb60ceac 1720
5f39d397
CM
1721 btrfs_mark_buffer_dirty(left);
1722 btrfs_mark_buffer_dirty(right);
098f59c2 1723
5f39d397
CM
1724 btrfs_item_key(right, &disk_key, 0);
1725 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
aa5d6bed
CM
1726 if (wret)
1727 ret = wret;
be0e5c09
CM
1728
1729 /* then fixup the leaf pointer in the path */
1730 if (path->slots[0] < push_items) {
1731 path->slots[0] += old_left_nritems;
5f39d397
CM
1732 free_extent_buffer(path->nodes[0]);
1733 path->nodes[0] = left;
be0e5c09
CM
1734 path->slots[1] -= 1;
1735 } else {
5f39d397 1736 free_extent_buffer(left);
be0e5c09
CM
1737 path->slots[0] -= push_items;
1738 }
eb60ceac 1739 BUG_ON(path->slots[0] < 0);
aa5d6bed 1740 return ret;
be0e5c09
CM
1741}
1742
74123bd7
CM
1743/*
1744 * split the path's leaf in two, making sure there is at least data_size
1745 * available for the resulting leaf level of the path.
aa5d6bed
CM
1746 *
1747 * returns 0 if all went well and < 0 on failure.
74123bd7 1748 */
e089f05c 1749static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
d4dbff95
CM
1750 *root, struct btrfs_key *ins_key,
1751 struct btrfs_path *path, int data_size)
be0e5c09 1752{
5f39d397 1753 struct extent_buffer *l;
7518a238 1754 u32 nritems;
eb60ceac
CM
1755 int mid;
1756 int slot;
5f39d397 1757 struct extent_buffer *right;
0783fcfc 1758 int space_needed = data_size + sizeof(struct btrfs_item);
be0e5c09
CM
1759 int data_copy_size;
1760 int rt_data_off;
1761 int i;
d4dbff95 1762 int ret = 0;
aa5d6bed 1763 int wret;
d4dbff95
CM
1764 int double_split = 0;
1765 struct btrfs_disk_key disk_key;
aa5d6bed 1766
40689478 1767 /* first try to make some room by pushing left and right */
e089f05c 1768 wret = push_leaf_left(trans, root, path, data_size);
eaee50e8
CM
1769 if (wret < 0)
1770 return wret;
1771 if (wret) {
e089f05c 1772 wret = push_leaf_right(trans, root, path, data_size);
eaee50e8
CM
1773 if (wret < 0)
1774 return wret;
1775 }
5f39d397 1776 l = path->nodes[0];
aa5d6bed
CM
1777
1778 /* did the pushes work? */
123abc88
CM
1779 if (btrfs_leaf_free_space(root, l) >=
1780 sizeof(struct btrfs_item) + data_size)
aa5d6bed
CM
1781 return 0;
1782
5c680ed6 1783 if (!path->nodes[1]) {
e089f05c 1784 ret = insert_new_root(trans, root, path, 1);
5c680ed6
CM
1785 if (ret)
1786 return ret;
1787 }
eb60ceac 1788 slot = path->slots[0];
5f39d397 1789 nritems = btrfs_header_nritems(l);
eb60ceac 1790 mid = (nritems + 1)/ 2;
54aa1f4d 1791
db94535d
CM
1792 right = btrfs_alloc_free_block(trans, root, root->leafsize,
1793 l->start, 0);
5f39d397
CM
1794 if (IS_ERR(right))
1795 return PTR_ERR(right);
1796
1797 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
db94535d 1798 btrfs_set_header_bytenr(right, right->start);
5f39d397
CM
1799 btrfs_set_header_generation(right, trans->transid);
1800 btrfs_set_header_owner(right, root->root_key.objectid);
1801 btrfs_set_header_level(right, 0);
1802 write_extent_buffer(right, root->fs_info->fsid,
1803 (unsigned long)btrfs_header_fsid(right),
1804 BTRFS_FSID_SIZE);
1805
d4dbff95
CM
1806 if (mid <= slot) {
1807 if (nritems == 1 ||
1808 leaf_space_used(l, mid, nritems - mid) + space_needed >
1809 BTRFS_LEAF_DATA_SIZE(root)) {
1810 if (slot >= nritems) {
1811 btrfs_cpu_key_to_disk(&disk_key, ins_key);
5f39d397 1812 btrfs_set_header_nritems(right, 0);
d4dbff95 1813 wret = insert_ptr(trans, root, path,
db94535d 1814 &disk_key, right->start,
d4dbff95
CM
1815 path->slots[1] + 1, 1);
1816 if (wret)
1817 ret = wret;
5f39d397
CM
1818 free_extent_buffer(path->nodes[0]);
1819 path->nodes[0] = right;
d4dbff95
CM
1820 path->slots[0] = 0;
1821 path->slots[1] += 1;
1822 return ret;
1823 }
1824 mid = slot;
1825 double_split = 1;
1826 }
1827 } else {
1828 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1829 BTRFS_LEAF_DATA_SIZE(root)) {
1830 if (slot == 0) {
1831 btrfs_cpu_key_to_disk(&disk_key, ins_key);
5f39d397 1832 btrfs_set_header_nritems(right, 0);
d4dbff95
CM
1833 wret = insert_ptr(trans, root, path,
1834 &disk_key,
db94535d 1835 right->start,
098f59c2 1836 path->slots[1], 1);
d4dbff95
CM
1837 if (wret)
1838 ret = wret;
5f39d397
CM
1839 free_extent_buffer(path->nodes[0]);
1840 path->nodes[0] = right;
d4dbff95 1841 path->slots[0] = 0;
a429e513
CM
1842 if (path->slots[1] == 0) {
1843 wret = fixup_low_keys(trans, root,
1844 path, &disk_key, 1);
1845 if (wret)
1846 ret = wret;
1847 }
d4dbff95
CM
1848 return ret;
1849 }
1850 mid = slot;
1851 double_split = 1;
1852 }
1853 }
5f39d397
CM
1854 nritems = nritems - mid;
1855 btrfs_set_header_nritems(right, nritems);
1856 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
1857
1858 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
1859 btrfs_item_nr_offset(mid),
1860 nritems * sizeof(struct btrfs_item));
1861
1862 copy_extent_buffer(right, l,
d6025579
CM
1863 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1864 data_copy_size, btrfs_leaf_data(l) +
1865 leaf_data_end(root, l), data_copy_size);
5f39d397 1866
123abc88 1867 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
5f39d397 1868 btrfs_item_end_nr(l, mid);
74123bd7 1869
5f39d397
CM
1870 for (i = 0; i < nritems; i++) {
1871 struct btrfs_item *item = btrfs_item_nr(right, i);
db94535d
CM
1872 u32 ioff;
1873
1874 if (!right->map_token) {
1875 map_extent_buffer(right, (unsigned long)item,
1876 sizeof(struct btrfs_item),
1877 &right->map_token, &right->kaddr,
1878 &right->map_start, &right->map_len,
1879 KM_USER1);
1880 }
1881
1882 ioff = btrfs_item_offset(right, item);
5f39d397 1883 btrfs_set_item_offset(right, item, ioff + rt_data_off);
0783fcfc 1884 }
74123bd7 1885
db94535d
CM
1886 if (right->map_token) {
1887 unmap_extent_buffer(right, right->map_token, KM_USER1);
1888 right->map_token = NULL;
1889 }
1890
5f39d397 1891 btrfs_set_header_nritems(l, mid);
aa5d6bed 1892 ret = 0;
5f39d397 1893 btrfs_item_key(right, &disk_key, 0);
db94535d
CM
1894 wret = insert_ptr(trans, root, path, &disk_key, right->start,
1895 path->slots[1] + 1, 1);
aa5d6bed
CM
1896 if (wret)
1897 ret = wret;
5f39d397
CM
1898
1899 btrfs_mark_buffer_dirty(right);
1900 btrfs_mark_buffer_dirty(l);
eb60ceac 1901 BUG_ON(path->slots[0] != slot);
5f39d397 1902
be0e5c09 1903 if (mid <= slot) {
5f39d397
CM
1904 free_extent_buffer(path->nodes[0]);
1905 path->nodes[0] = right;
be0e5c09
CM
1906 path->slots[0] -= mid;
1907 path->slots[1] += 1;
eb60ceac 1908 } else
5f39d397
CM
1909 free_extent_buffer(right);
1910
eb60ceac 1911 BUG_ON(path->slots[0] < 0);
d4dbff95
CM
1912
1913 if (!double_split)
1914 return ret;
5f39d397 1915
db94535d
CM
1916 right = btrfs_alloc_free_block(trans, root, root->leafsize,
1917 l->start, 0);
5f39d397
CM
1918 if (IS_ERR(right))
1919 return PTR_ERR(right);
1920
1921 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
db94535d 1922 btrfs_set_header_bytenr(right, right->start);
5f39d397
CM
1923 btrfs_set_header_generation(right, trans->transid);
1924 btrfs_set_header_owner(right, root->root_key.objectid);
1925 btrfs_set_header_level(right, 0);
1926 write_extent_buffer(right, root->fs_info->fsid,
1927 (unsigned long)btrfs_header_fsid(right),
1928 BTRFS_FSID_SIZE);
1929
d4dbff95 1930 btrfs_cpu_key_to_disk(&disk_key, ins_key);
5f39d397 1931 btrfs_set_header_nritems(right, 0);
d4dbff95 1932 wret = insert_ptr(trans, root, path,
db94535d 1933 &disk_key, right->start,
d4dbff95
CM
1934 path->slots[1], 1);
1935 if (wret)
1936 ret = wret;
a429e513
CM
1937 if (path->slots[1] == 0) {
1938 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1939 if (wret)
1940 ret = wret;
1941 }
5f39d397
CM
1942 free_extent_buffer(path->nodes[0]);
1943 path->nodes[0] = right;
d4dbff95 1944 path->slots[0] = 0;
be0e5c09
CM
1945 return ret;
1946}
1947
b18c6685
CM
1948int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1949 struct btrfs_root *root,
1950 struct btrfs_path *path,
1951 u32 new_size)
1952{
1953 int ret = 0;
1954 int slot;
1955 int slot_orig;
5f39d397
CM
1956 struct extent_buffer *leaf;
1957 struct btrfs_item *item;
b18c6685
CM
1958 u32 nritems;
1959 unsigned int data_end;
1960 unsigned int old_data_start;
1961 unsigned int old_size;
1962 unsigned int size_diff;
1963 int i;
1964
1965 slot_orig = path->slots[0];
5f39d397 1966 leaf = path->nodes[0];
b18c6685 1967
5f39d397 1968 nritems = btrfs_header_nritems(leaf);
b18c6685
CM
1969 data_end = leaf_data_end(root, leaf);
1970
1971 slot = path->slots[0];
5f39d397
CM
1972 old_data_start = btrfs_item_offset_nr(leaf, slot);
1973 old_size = btrfs_item_size_nr(leaf, slot);
b18c6685
CM
1974 BUG_ON(old_size <= new_size);
1975 size_diff = old_size - new_size;
1976
1977 BUG_ON(slot < 0);
1978 BUG_ON(slot >= nritems);
1979
1980 /*
1981 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1982 */
1983 /* first correct the data pointers */
1984 for (i = slot; i < nritems; i++) {
5f39d397
CM
1985 u32 ioff;
1986 item = btrfs_item_nr(leaf, i);
db94535d
CM
1987
1988 if (!leaf->map_token) {
1989 map_extent_buffer(leaf, (unsigned long)item,
1990 sizeof(struct btrfs_item),
1991 &leaf->map_token, &leaf->kaddr,
1992 &leaf->map_start, &leaf->map_len,
1993 KM_USER1);
1994 }
1995
5f39d397
CM
1996 ioff = btrfs_item_offset(leaf, item);
1997 btrfs_set_item_offset(leaf, item, ioff + size_diff);
b18c6685 1998 }
db94535d
CM
1999
2000 if (leaf->map_token) {
2001 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2002 leaf->map_token = NULL;
2003 }
2004
b18c6685 2005 /* shift the data */
5f39d397 2006 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
b18c6685
CM
2007 data_end + size_diff, btrfs_leaf_data(leaf) +
2008 data_end, old_data_start + new_size - data_end);
5f39d397
CM
2009
2010 item = btrfs_item_nr(leaf, slot);
2011 btrfs_set_item_size(leaf, item, new_size);
2012 btrfs_mark_buffer_dirty(leaf);
b18c6685
CM
2013
2014 ret = 0;
5f39d397
CM
2015 if (btrfs_leaf_free_space(root, leaf) < 0) {
2016 btrfs_print_leaf(root, leaf);
b18c6685 2017 BUG();
5f39d397 2018 }
b18c6685
CM
2019 return ret;
2020}
2021
5f39d397
CM
2022int btrfs_extend_item(struct btrfs_trans_handle *trans,
2023 struct btrfs_root *root, struct btrfs_path *path,
2024 u32 data_size)
6567e837
CM
2025{
2026 int ret = 0;
2027 int slot;
2028 int slot_orig;
5f39d397
CM
2029 struct extent_buffer *leaf;
2030 struct btrfs_item *item;
6567e837
CM
2031 u32 nritems;
2032 unsigned int data_end;
2033 unsigned int old_data;
2034 unsigned int old_size;
2035 int i;
2036
2037 slot_orig = path->slots[0];
5f39d397 2038 leaf = path->nodes[0];
6567e837 2039
5f39d397 2040 nritems = btrfs_header_nritems(leaf);
6567e837
CM
2041 data_end = leaf_data_end(root, leaf);
2042
5f39d397
CM
2043 if (btrfs_leaf_free_space(root, leaf) < data_size) {
2044 btrfs_print_leaf(root, leaf);
6567e837 2045 BUG();
5f39d397 2046 }
6567e837 2047 slot = path->slots[0];
5f39d397 2048 old_data = btrfs_item_end_nr(leaf, slot);
6567e837
CM
2049
2050 BUG_ON(slot < 0);
2051 BUG_ON(slot >= nritems);
2052
2053 /*
2054 * item0..itemN ... dataN.offset..dataN.size .. data0.size
2055 */
2056 /* first correct the data pointers */
2057 for (i = slot; i < nritems; i++) {
5f39d397
CM
2058 u32 ioff;
2059 item = btrfs_item_nr(leaf, i);
db94535d
CM
2060
2061 if (!leaf->map_token) {
2062 map_extent_buffer(leaf, (unsigned long)item,
2063 sizeof(struct btrfs_item),
2064 &leaf->map_token, &leaf->kaddr,
2065 &leaf->map_start, &leaf->map_len,
2066 KM_USER1);
2067 }
5f39d397
CM
2068 ioff = btrfs_item_offset(leaf, item);
2069 btrfs_set_item_offset(leaf, item, ioff - data_size);
6567e837 2070 }
5f39d397 2071
db94535d
CM
2072 if (leaf->map_token) {
2073 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2074 leaf->map_token = NULL;
2075 }
2076
6567e837 2077 /* shift the data */
5f39d397 2078 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
6567e837
CM
2079 data_end - data_size, btrfs_leaf_data(leaf) +
2080 data_end, old_data - data_end);
5f39d397 2081
6567e837 2082 data_end = old_data;
5f39d397
CM
2083 old_size = btrfs_item_size_nr(leaf, slot);
2084 item = btrfs_item_nr(leaf, slot);
2085 btrfs_set_item_size(leaf, item, old_size + data_size);
2086 btrfs_mark_buffer_dirty(leaf);
6567e837
CM
2087
2088 ret = 0;
5f39d397
CM
2089 if (btrfs_leaf_free_space(root, leaf) < 0) {
2090 btrfs_print_leaf(root, leaf);
6567e837 2091 BUG();
5f39d397 2092 }
6567e837
CM
2093 return ret;
2094}
2095
74123bd7
CM
2096/*
2097 * Given a key and some data, insert an item into the tree.
2098 * This does all the path init required, making room in the tree if needed.
2099 */
5f39d397
CM
2100int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2101 struct btrfs_root *root,
2102 struct btrfs_path *path,
2103 struct btrfs_key *cpu_key, u32 data_size)
be0e5c09 2104{
5f39d397
CM
2105 struct extent_buffer *leaf;
2106 struct btrfs_item *item;
aa5d6bed 2107 int ret = 0;
be0e5c09 2108 int slot;
eb60ceac 2109 int slot_orig;
7518a238 2110 u32 nritems;
be0e5c09 2111 unsigned int data_end;
e2fa7227
CM
2112 struct btrfs_disk_key disk_key;
2113
2114 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
be0e5c09 2115
74123bd7 2116 /* create a root if there isn't one */
5c680ed6 2117 if (!root->node)
cfaa7295 2118 BUG();
5f39d397 2119
e089f05c 2120 ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
eb60ceac 2121 if (ret == 0) {
f0930a37 2122 return -EEXIST;
aa5d6bed 2123 }
ed2ff2cb
CM
2124 if (ret < 0)
2125 goto out;
be0e5c09 2126
62e2749e 2127 slot_orig = path->slots[0];
5f39d397 2128 leaf = path->nodes[0];
74123bd7 2129
5f39d397 2130 nritems = btrfs_header_nritems(leaf);
123abc88 2131 data_end = leaf_data_end(root, leaf);
eb60ceac 2132
123abc88 2133 if (btrfs_leaf_free_space(root, leaf) <
d4dbff95 2134 sizeof(struct btrfs_item) + data_size) {
be0e5c09 2135 BUG();
d4dbff95 2136 }
5f39d397 2137
62e2749e 2138 slot = path->slots[0];
eb60ceac 2139 BUG_ON(slot < 0);
5f39d397 2140
be0e5c09
CM
2141 if (slot != nritems) {
2142 int i;
5f39d397 2143 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
be0e5c09 2144
5f39d397
CM
2145 if (old_data < data_end) {
2146 btrfs_print_leaf(root, leaf);
2147 printk("slot %d old_data %d data_end %d\n",
2148 slot, old_data, data_end);
2149 BUG_ON(1);
2150 }
be0e5c09
CM
2151 /*
2152 * item0..itemN ... dataN.offset..dataN.size .. data0.size
2153 */
2154 /* first correct the data pointers */
db94535d 2155 WARN_ON(leaf->map_token);
0783fcfc 2156 for (i = slot; i < nritems; i++) {
5f39d397 2157 u32 ioff;
db94535d 2158
5f39d397 2159 item = btrfs_item_nr(leaf, i);
db94535d
CM
2160 if (!leaf->map_token) {
2161 map_extent_buffer(leaf, (unsigned long)item,
2162 sizeof(struct btrfs_item),
2163 &leaf->map_token, &leaf->kaddr,
2164 &leaf->map_start, &leaf->map_len,
2165 KM_USER1);
2166 }
2167
5f39d397
CM
2168 ioff = btrfs_item_offset(leaf, item);
2169 btrfs_set_item_offset(leaf, item, ioff - data_size);
0783fcfc 2170 }
db94535d
CM
2171 if (leaf->map_token) {
2172 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2173 leaf->map_token = NULL;
2174 }
be0e5c09
CM
2175
2176 /* shift the items */
5f39d397
CM
2177 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2178 btrfs_item_nr_offset(slot),
d6025579 2179 (nritems - slot) * sizeof(struct btrfs_item));
be0e5c09
CM
2180
2181 /* shift the data */
5f39d397 2182 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
d6025579
CM
2183 data_end - data_size, btrfs_leaf_data(leaf) +
2184 data_end, old_data - data_end);
be0e5c09
CM
2185 data_end = old_data;
2186 }
5f39d397 2187
62e2749e 2188 /* setup the item for the new data */
5f39d397
CM
2189 btrfs_set_item_key(leaf, &disk_key, slot);
2190 item = btrfs_item_nr(leaf, slot);
2191 btrfs_set_item_offset(leaf, item, data_end - data_size);
2192 btrfs_set_item_size(leaf, item, data_size);
2193 btrfs_set_header_nritems(leaf, nritems + 1);
2194 btrfs_mark_buffer_dirty(leaf);
aa5d6bed
CM
2195
2196 ret = 0;
8e19f2cd 2197 if (slot == 0)
e089f05c 2198 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
aa5d6bed 2199
5f39d397
CM
2200 if (btrfs_leaf_free_space(root, leaf) < 0) {
2201 btrfs_print_leaf(root, leaf);
be0e5c09 2202 BUG();
5f39d397 2203 }
ed2ff2cb 2204out:
62e2749e
CM
2205 return ret;
2206}
2207
2208/*
2209 * Given a key and some data, insert an item into the tree.
2210 * This does all the path init required, making room in the tree if needed.
2211 */
e089f05c
CM
2212int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2213 *root, struct btrfs_key *cpu_key, void *data, u32
2214 data_size)
62e2749e
CM
2215{
2216 int ret = 0;
2c90e5d6 2217 struct btrfs_path *path;
5f39d397
CM
2218 struct extent_buffer *leaf;
2219 unsigned long ptr;
62e2749e 2220
2c90e5d6
CM
2221 path = btrfs_alloc_path();
2222 BUG_ON(!path);
2c90e5d6 2223 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
62e2749e 2224 if (!ret) {
5f39d397
CM
2225 leaf = path->nodes[0];
2226 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2227 write_extent_buffer(leaf, data, ptr, data_size);
2228 btrfs_mark_buffer_dirty(leaf);
62e2749e 2229 }
2c90e5d6 2230 btrfs_free_path(path);
aa5d6bed 2231 return ret;
be0e5c09
CM
2232}
2233
74123bd7 2234/*
5de08d7d 2235 * delete the pointer from a given node.
74123bd7
CM
2236 *
2237 * If the delete empties a node, the node is removed from the tree,
2238 * continuing all the way the root if required. The root is converted into
2239 * a leaf if all the nodes are emptied.
2240 */
e089f05c
CM
2241static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2242 struct btrfs_path *path, int level, int slot)
be0e5c09 2243{
5f39d397 2244 struct extent_buffer *parent = path->nodes[level];
7518a238 2245 u32 nritems;
aa5d6bed 2246 int ret = 0;
bb803951 2247 int wret;
be0e5c09 2248
5f39d397 2249 nritems = btrfs_header_nritems(parent);
bb803951 2250 if (slot != nritems -1) {
5f39d397
CM
2251 memmove_extent_buffer(parent,
2252 btrfs_node_key_ptr_offset(slot),
2253 btrfs_node_key_ptr_offset(slot + 1),
d6025579
CM
2254 sizeof(struct btrfs_key_ptr) *
2255 (nritems - slot - 1));
bb803951 2256 }
7518a238 2257 nritems--;
5f39d397 2258 btrfs_set_header_nritems(parent, nritems);
7518a238 2259 if (nritems == 0 && parent == root->node) {
5f39d397 2260 BUG_ON(btrfs_header_level(root->node) != 1);
bb803951 2261 /* just turn the root into a leaf and break */
5f39d397 2262 btrfs_set_header_level(root->node, 0);
bb803951 2263 } else if (slot == 0) {
5f39d397
CM
2264 struct btrfs_disk_key disk_key;
2265
2266 btrfs_node_key(parent, &disk_key, 0);
2267 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
0f70abe2
CM
2268 if (wret)
2269 ret = wret;
be0e5c09 2270 }
d6025579 2271 btrfs_mark_buffer_dirty(parent);
aa5d6bed 2272 return ret;
be0e5c09
CM
2273}
2274
74123bd7
CM
2275/*
2276 * delete the item at the leaf level in path. If that empties
2277 * the leaf, remove it from the tree
2278 */
e089f05c
CM
2279int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2280 struct btrfs_path *path)
be0e5c09 2281{
be0e5c09 2282 int slot;
5f39d397
CM
2283 struct extent_buffer *leaf;
2284 struct btrfs_item *item;
be0e5c09
CM
2285 int doff;
2286 int dsize;
aa5d6bed
CM
2287 int ret = 0;
2288 int wret;
7518a238 2289 u32 nritems;
be0e5c09 2290
5f39d397 2291 leaf = path->nodes[0];
4920c9ac 2292 slot = path->slots[0];
5f39d397
CM
2293 doff = btrfs_item_offset_nr(leaf, slot);
2294 dsize = btrfs_item_size_nr(leaf, slot);
2295 nritems = btrfs_header_nritems(leaf);
be0e5c09 2296
7518a238 2297 if (slot != nritems - 1) {
be0e5c09 2298 int i;
123abc88 2299 int data_end = leaf_data_end(root, leaf);
5f39d397
CM
2300
2301 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
d6025579
CM
2302 data_end + dsize,
2303 btrfs_leaf_data(leaf) + data_end,
2304 doff - data_end);
5f39d397 2305
0783fcfc 2306 for (i = slot + 1; i < nritems; i++) {
5f39d397 2307 u32 ioff;
db94535d 2308
5f39d397 2309 item = btrfs_item_nr(leaf, i);
db94535d
CM
2310 if (!leaf->map_token) {
2311 map_extent_buffer(leaf, (unsigned long)item,
2312 sizeof(struct btrfs_item),
2313 &leaf->map_token, &leaf->kaddr,
2314 &leaf->map_start, &leaf->map_len,
2315 KM_USER1);
2316 }
5f39d397
CM
2317 ioff = btrfs_item_offset(leaf, item);
2318 btrfs_set_item_offset(leaf, item, ioff + dsize);
0783fcfc 2319 }
db94535d
CM
2320
2321 if (leaf->map_token) {
2322 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2323 leaf->map_token = NULL;
2324 }
2325
5f39d397
CM
2326 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2327 btrfs_item_nr_offset(slot + 1),
d6025579
CM
2328 sizeof(struct btrfs_item) *
2329 (nritems - slot - 1));
be0e5c09 2330 }
5f39d397 2331 btrfs_set_header_nritems(leaf, nritems - 1);
7518a238 2332 nritems--;
5f39d397 2333
74123bd7 2334 /* delete the leaf if we've emptied it */
7518a238 2335 if (nritems == 0) {
5f39d397
CM
2336 if (leaf == root->node) {
2337 btrfs_set_header_level(leaf, 0);
9a8dd150 2338 } else {
5f39d397
CM
2339 clean_tree_block(trans, root, leaf);
2340 wait_on_tree_block_writeback(root, leaf);
e089f05c 2341 wret = del_ptr(trans, root, path, 1, path->slots[1]);
aa5d6bed
CM
2342 if (wret)
2343 ret = wret;
e089f05c 2344 wret = btrfs_free_extent(trans, root,
db94535d 2345 leaf->start, leaf->len, 1);
0f70abe2
CM
2346 if (wret)
2347 ret = wret;
9a8dd150 2348 }
be0e5c09 2349 } else {
7518a238 2350 int used = leaf_space_used(leaf, 0, nritems);
aa5d6bed 2351 if (slot == 0) {
5f39d397
CM
2352 struct btrfs_disk_key disk_key;
2353
2354 btrfs_item_key(leaf, &disk_key, 0);
e089f05c 2355 wret = fixup_low_keys(trans, root, path,
5f39d397 2356 &disk_key, 1);
aa5d6bed
CM
2357 if (wret)
2358 ret = wret;
2359 }
aa5d6bed 2360
74123bd7 2361 /* delete the leaf if it is mostly empty */
123abc88 2362 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
be0e5c09
CM
2363 /* push_leaf_left fixes the path.
2364 * make sure the path still points to our leaf
2365 * for possible call to del_ptr below
2366 */
4920c9ac 2367 slot = path->slots[1];
5f39d397
CM
2368 extent_buffer_get(leaf);
2369
e089f05c 2370 wret = push_leaf_left(trans, root, path, 1);
54aa1f4d 2371 if (wret < 0 && wret != -ENOSPC)
aa5d6bed 2372 ret = wret;
5f39d397
CM
2373
2374 if (path->nodes[0] == leaf &&
2375 btrfs_header_nritems(leaf)) {
e089f05c 2376 wret = push_leaf_right(trans, root, path, 1);
54aa1f4d 2377 if (wret < 0 && wret != -ENOSPC)
aa5d6bed
CM
2378 ret = wret;
2379 }
5f39d397
CM
2380
2381 if (btrfs_header_nritems(leaf) == 0) {
db94535d
CM
2382 u64 bytenr = leaf->start;
2383 u32 blocksize = leaf->len;
5f39d397
CM
2384
2385 clean_tree_block(trans, root, leaf);
2386 wait_on_tree_block_writeback(root, leaf);
2387
e089f05c 2388 wret = del_ptr(trans, root, path, 1, slot);
aa5d6bed
CM
2389 if (wret)
2390 ret = wret;
5f39d397
CM
2391
2392 free_extent_buffer(leaf);
db94535d
CM
2393 wret = btrfs_free_extent(trans, root, bytenr,
2394 blocksize, 1);
0f70abe2
CM
2395 if (wret)
2396 ret = wret;
5de08d7d 2397 } else {
5f39d397
CM
2398 btrfs_mark_buffer_dirty(leaf);
2399 free_extent_buffer(leaf);
be0e5c09 2400 }
d5719762 2401 } else {
5f39d397 2402 btrfs_mark_buffer_dirty(leaf);
be0e5c09
CM
2403 }
2404 }
aa5d6bed 2405 return ret;
be0e5c09
CM
2406}
2407
97571fd0
CM
2408/*
2409 * walk up the tree as far as required to find the next leaf.
0f70abe2
CM
2410 * returns 0 if it found something or 1 if there are no greater leaves.
2411 * returns < 0 on io errors.
97571fd0 2412 */
234b63a0 2413int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
d97e63b6
CM
2414{
2415 int slot;
2416 int level = 1;
db94535d 2417 u64 bytenr;
5f39d397
CM
2418 struct extent_buffer *c;
2419 struct extent_buffer *next = NULL;
d97e63b6 2420
234b63a0 2421 while(level < BTRFS_MAX_LEVEL) {
d97e63b6 2422 if (!path->nodes[level])
0f70abe2 2423 return 1;
5f39d397 2424
d97e63b6
CM
2425 slot = path->slots[level] + 1;
2426 c = path->nodes[level];
5f39d397 2427 if (slot >= btrfs_header_nritems(c)) {
d97e63b6
CM
2428 level++;
2429 continue;
2430 }
5f39d397 2431
db94535d 2432 bytenr = btrfs_node_blockptr(c, slot);
cfaa7295 2433 if (next)
5f39d397
CM
2434 free_extent_buffer(next);
2435
6702ed49
CM
2436 if (path->reada)
2437 reada_for_search(root, path, level, slot);
5f39d397 2438
db94535d
CM
2439 next = read_tree_block(root, bytenr,
2440 btrfs_level_size(root, level -1));
d97e63b6
CM
2441 break;
2442 }
2443 path->slots[level] = slot;
2444 while(1) {
2445 level--;
2446 c = path->nodes[level];
5f39d397 2447 free_extent_buffer(c);
d97e63b6
CM
2448 path->nodes[level] = next;
2449 path->slots[level] = 0;
2450 if (!level)
2451 break;
6702ed49 2452 if (path->reada)
32020611 2453 reada_for_search(root, path, level, 0);
db94535d
CM
2454 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2455 btrfs_level_size(root, level - 1));
d97e63b6
CM
2456 }
2457 return 0;
2458}