Btrfs: write out free space cache
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / btrfs / free-space-cache.c
CommitLineData
0f9dd46c
JB
1/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
96303081 19#include <linux/pagemap.h>
0f9dd46c 20#include <linux/sched.h>
5a0e3ad6 21#include <linux/slab.h>
96303081 22#include <linux/math64.h>
0f9dd46c 23#include "ctree.h"
fa9c0d79
CM
24#include "free-space-cache.h"
25#include "transaction.h"
0af3d00b 26#include "disk-io.h"
fa9c0d79 27
96303081
JB
28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
0f9dd46c 30
0cb59c99
JB
31static void recalculate_thresholds(struct btrfs_block_group_cache
32 *block_group);
33static int link_free_space(struct btrfs_block_group_cache *block_group,
34 struct btrfs_free_space *info);
35
0af3d00b
JB
36struct inode *lookup_free_space_inode(struct btrfs_root *root,
37 struct btrfs_block_group_cache
38 *block_group, struct btrfs_path *path)
39{
40 struct btrfs_key key;
41 struct btrfs_key location;
42 struct btrfs_disk_key disk_key;
43 struct btrfs_free_space_header *header;
44 struct extent_buffer *leaf;
45 struct inode *inode = NULL;
46 int ret;
47
48 spin_lock(&block_group->lock);
49 if (block_group->inode)
50 inode = igrab(block_group->inode);
51 spin_unlock(&block_group->lock);
52 if (inode)
53 return inode;
54
55 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
56 key.offset = block_group->key.objectid;
57 key.type = 0;
58
59 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
60 if (ret < 0)
61 return ERR_PTR(ret);
62 if (ret > 0) {
63 btrfs_release_path(root, path);
64 return ERR_PTR(-ENOENT);
65 }
66
67 leaf = path->nodes[0];
68 header = btrfs_item_ptr(leaf, path->slots[0],
69 struct btrfs_free_space_header);
70 btrfs_free_space_key(leaf, header, &disk_key);
71 btrfs_disk_key_to_cpu(&location, &disk_key);
72 btrfs_release_path(root, path);
73
74 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
75 if (!inode)
76 return ERR_PTR(-ENOENT);
77 if (IS_ERR(inode))
78 return inode;
79 if (is_bad_inode(inode)) {
80 iput(inode);
81 return ERR_PTR(-ENOENT);
82 }
83
84 spin_lock(&block_group->lock);
85 if (!root->fs_info->closing) {
86 block_group->inode = igrab(inode);
87 block_group->iref = 1;
88 }
89 spin_unlock(&block_group->lock);
90
91 return inode;
92}
93
94int create_free_space_inode(struct btrfs_root *root,
95 struct btrfs_trans_handle *trans,
96 struct btrfs_block_group_cache *block_group,
97 struct btrfs_path *path)
98{
99 struct btrfs_key key;
100 struct btrfs_disk_key disk_key;
101 struct btrfs_free_space_header *header;
102 struct btrfs_inode_item *inode_item;
103 struct extent_buffer *leaf;
104 u64 objectid;
105 int ret;
106
107 ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
108 if (ret < 0)
109 return ret;
110
111 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
112 if (ret)
113 return ret;
114
115 leaf = path->nodes[0];
116 inode_item = btrfs_item_ptr(leaf, path->slots[0],
117 struct btrfs_inode_item);
118 btrfs_item_key(leaf, &disk_key, path->slots[0]);
119 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
120 sizeof(*inode_item));
121 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
122 btrfs_set_inode_size(leaf, inode_item, 0);
123 btrfs_set_inode_nbytes(leaf, inode_item, 0);
124 btrfs_set_inode_uid(leaf, inode_item, 0);
125 btrfs_set_inode_gid(leaf, inode_item, 0);
126 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
127 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
128 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
129 btrfs_set_inode_nlink(leaf, inode_item, 1);
130 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
131 btrfs_set_inode_block_group(leaf, inode_item,
132 block_group->key.objectid);
133 btrfs_mark_buffer_dirty(leaf);
134 btrfs_release_path(root, path);
135
136 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
137 key.offset = block_group->key.objectid;
138 key.type = 0;
139
140 ret = btrfs_insert_empty_item(trans, root, path, &key,
141 sizeof(struct btrfs_free_space_header));
142 if (ret < 0) {
143 btrfs_release_path(root, path);
144 return ret;
145 }
146 leaf = path->nodes[0];
147 header = btrfs_item_ptr(leaf, path->slots[0],
148 struct btrfs_free_space_header);
149 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
150 btrfs_set_free_space_key(leaf, header, &disk_key);
151 btrfs_mark_buffer_dirty(leaf);
152 btrfs_release_path(root, path);
153
154 return 0;
155}
156
157int btrfs_truncate_free_space_cache(struct btrfs_root *root,
158 struct btrfs_trans_handle *trans,
159 struct btrfs_path *path,
160 struct inode *inode)
161{
162 loff_t oldsize;
163 int ret = 0;
164
165 trans->block_rsv = root->orphan_block_rsv;
166 ret = btrfs_block_rsv_check(trans, root,
167 root->orphan_block_rsv,
168 0, 5);
169 if (ret)
170 return ret;
171
172 oldsize = i_size_read(inode);
173 btrfs_i_size_write(inode, 0);
174 truncate_pagecache(inode, oldsize, 0);
175
176 /*
177 * We don't need an orphan item because truncating the free space cache
178 * will never be split across transactions.
179 */
180 ret = btrfs_truncate_inode_items(trans, root, inode,
181 0, BTRFS_EXTENT_DATA_KEY);
182 if (ret) {
183 WARN_ON(1);
184 return ret;
185 }
186
187 return btrfs_update_inode(trans, root, inode);
188}
189
0cb59c99
JB
190int btrfs_write_out_cache(struct btrfs_root *root,
191 struct btrfs_trans_handle *trans,
192 struct btrfs_block_group_cache *block_group,
193 struct btrfs_path *path)
194{
195 struct btrfs_free_space_header *header;
196 struct extent_buffer *leaf;
197 struct inode *inode;
198 struct rb_node *node;
199 struct list_head *pos, *n;
200 struct page *page;
201 struct extent_state *cached_state = NULL;
202 struct list_head bitmap_list;
203 struct btrfs_key key;
204 u64 bytes = 0;
205 u32 *crc, *checksums;
206 pgoff_t index = 0, last_index = 0;
207 unsigned long first_page_offset;
208 int num_checksums;
209 int entries = 0;
210 int bitmaps = 0;
211 int ret = 0;
212
213 root = root->fs_info->tree_root;
214
215 INIT_LIST_HEAD(&bitmap_list);
216
217 spin_lock(&block_group->lock);
218 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
219 spin_unlock(&block_group->lock);
220 return 0;
221 }
222 spin_unlock(&block_group->lock);
223
224 inode = lookup_free_space_inode(root, block_group, path);
225 if (IS_ERR(inode))
226 return 0;
227
228 if (!i_size_read(inode)) {
229 iput(inode);
230 return 0;
231 }
232
233 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
234 filemap_write_and_wait(inode->i_mapping);
235 btrfs_wait_ordered_range(inode, inode->i_size &
236 ~(root->sectorsize - 1), (u64)-1);
237
238 /* We need a checksum per page. */
239 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
240 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
241 if (!crc) {
242 iput(inode);
243 return 0;
244 }
245
246 /* Since the first page has all of our checksums and our generation we
247 * need to calculate the offset into the page that we can start writing
248 * our entries.
249 */
250 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
251
252 node = rb_first(&block_group->free_space_offset);
253 if (!node)
254 goto out_free;
255
256 /*
257 * Lock all pages first so we can lock the extent safely.
258 *
259 * NOTE: Because we hold the ref the entire time we're going to write to
260 * the page find_get_page should never fail, so we don't do a check
261 * after find_get_page at this point. Just putting this here so people
262 * know and don't freak out.
263 */
264 while (index <= last_index) {
265 page = grab_cache_page(inode->i_mapping, index);
266 if (!page) {
267 pgoff_t i = 0;
268
269 while (i < index) {
270 page = find_get_page(inode->i_mapping, i);
271 unlock_page(page);
272 page_cache_release(page);
273 page_cache_release(page);
274 i++;
275 }
276 goto out_free;
277 }
278 index++;
279 }
280
281 index = 0;
282 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
283 0, &cached_state, GFP_NOFS);
284
285 /* Write out the extent entries */
286 do {
287 struct btrfs_free_space_entry *entry;
288 void *addr;
289 unsigned long offset = 0;
290 unsigned long start_offset = 0;
291
292 if (index == 0) {
293 start_offset = first_page_offset;
294 offset = start_offset;
295 }
296
297 page = find_get_page(inode->i_mapping, index);
298
299 addr = kmap(page);
300 entry = addr + start_offset;
301
302 memset(addr, 0, PAGE_CACHE_SIZE);
303 while (1) {
304 struct btrfs_free_space *e;
305
306 e = rb_entry(node, struct btrfs_free_space, offset_index);
307 entries++;
308
309 entry->offset = cpu_to_le64(e->offset);
310 entry->bytes = cpu_to_le64(e->bytes);
311 if (e->bitmap) {
312 entry->type = BTRFS_FREE_SPACE_BITMAP;
313 list_add_tail(&e->list, &bitmap_list);
314 bitmaps++;
315 } else {
316 entry->type = BTRFS_FREE_SPACE_EXTENT;
317 }
318 node = rb_next(node);
319 if (!node)
320 break;
321 offset += sizeof(struct btrfs_free_space_entry);
322 if (offset + sizeof(struct btrfs_free_space_entry) >=
323 PAGE_CACHE_SIZE)
324 break;
325 entry++;
326 }
327 *crc = ~(u32)0;
328 *crc = btrfs_csum_data(root, addr + start_offset, *crc,
329 PAGE_CACHE_SIZE - start_offset);
330 kunmap(page);
331
332 btrfs_csum_final(*crc, (char *)crc);
333 crc++;
334
335 bytes += PAGE_CACHE_SIZE;
336
337 ClearPageChecked(page);
338 set_page_extent_mapped(page);
339 SetPageUptodate(page);
340 set_page_dirty(page);
341
342 /*
343 * We need to release our reference we got for grab_cache_page,
344 * except for the first page which will hold our checksums, we
345 * do that below.
346 */
347 if (index != 0) {
348 unlock_page(page);
349 page_cache_release(page);
350 }
351
352 page_cache_release(page);
353
354 index++;
355 } while (node);
356
357 /* Write out the bitmaps */
358 list_for_each_safe(pos, n, &bitmap_list) {
359 void *addr;
360 struct btrfs_free_space *entry =
361 list_entry(pos, struct btrfs_free_space, list);
362
363 page = find_get_page(inode->i_mapping, index);
364
365 addr = kmap(page);
366 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
367 *crc = ~(u32)0;
368 *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
369 kunmap(page);
370 btrfs_csum_final(*crc, (char *)crc);
371 crc++;
372 bytes += PAGE_CACHE_SIZE;
373
374 ClearPageChecked(page);
375 set_page_extent_mapped(page);
376 SetPageUptodate(page);
377 set_page_dirty(page);
378 unlock_page(page);
379 page_cache_release(page);
380 page_cache_release(page);
381 list_del_init(&entry->list);
382 index++;
383 }
384
385 /* Zero out the rest of the pages just to make sure */
386 while (index <= last_index) {
387 void *addr;
388
389 page = find_get_page(inode->i_mapping, index);
390
391 addr = kmap(page);
392 memset(addr, 0, PAGE_CACHE_SIZE);
393 kunmap(page);
394 ClearPageChecked(page);
395 set_page_extent_mapped(page);
396 SetPageUptodate(page);
397 set_page_dirty(page);
398 unlock_page(page);
399 page_cache_release(page);
400 page_cache_release(page);
401 bytes += PAGE_CACHE_SIZE;
402 index++;
403 }
404
405 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
406
407 /* Write the checksums and trans id to the first page */
408 {
409 void *addr;
410 u64 *gen;
411
412 page = find_get_page(inode->i_mapping, 0);
413
414 addr = kmap(page);
415 memcpy(addr, checksums, sizeof(u32) * num_checksums);
416 gen = addr + (sizeof(u32) * num_checksums);
417 *gen = trans->transid;
418 kunmap(page);
419 ClearPageChecked(page);
420 set_page_extent_mapped(page);
421 SetPageUptodate(page);
422 set_page_dirty(page);
423 unlock_page(page);
424 page_cache_release(page);
425 page_cache_release(page);
426 }
427 BTRFS_I(inode)->generation = trans->transid;
428
429 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
430 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
431
432 filemap_write_and_wait(inode->i_mapping);
433
434 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
435 key.offset = block_group->key.objectid;
436 key.type = 0;
437
438 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
439 if (ret < 0) {
440 ret = 0;
441 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
442 EXTENT_DIRTY | EXTENT_DELALLOC |
443 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
444 goto out_free;
445 }
446 leaf = path->nodes[0];
447 if (ret > 0) {
448 struct btrfs_key found_key;
449 BUG_ON(!path->slots[0]);
450 path->slots[0]--;
451 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
452 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
453 found_key.offset != block_group->key.objectid) {
454 ret = 0;
455 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
456 EXTENT_DIRTY | EXTENT_DELALLOC |
457 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
458 GFP_NOFS);
459 btrfs_release_path(root, path);
460 goto out_free;
461 }
462 }
463 header = btrfs_item_ptr(leaf, path->slots[0],
464 struct btrfs_free_space_header);
465 btrfs_set_free_space_entries(leaf, header, entries);
466 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
467 btrfs_set_free_space_generation(leaf, header, trans->transid);
468 btrfs_mark_buffer_dirty(leaf);
469 btrfs_release_path(root, path);
470
471 ret = 1;
472
473out_free:
474 if (ret == 0) {
475 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
476 spin_lock(&block_group->lock);
477 block_group->disk_cache_state = BTRFS_DC_ERROR;
478 spin_unlock(&block_group->lock);
479 BTRFS_I(inode)->generation = 0;
480 }
481 kfree(checksums);
482 btrfs_update_inode(trans, root, inode);
483 iput(inode);
484 return ret;
485}
486
96303081
JB
487static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
488 u64 offset)
0f9dd46c 489{
96303081
JB
490 BUG_ON(offset < bitmap_start);
491 offset -= bitmap_start;
492 return (unsigned long)(div64_u64(offset, sectorsize));
493}
0f9dd46c 494
96303081
JB
495static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
496{
497 return (unsigned long)(div64_u64(bytes, sectorsize));
498}
0f9dd46c 499
96303081
JB
500static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
501 u64 offset)
502{
503 u64 bitmap_start;
504 u64 bytes_per_bitmap;
0f9dd46c 505
96303081
JB
506 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
507 bitmap_start = offset - block_group->key.objectid;
508 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
509 bitmap_start *= bytes_per_bitmap;
510 bitmap_start += block_group->key.objectid;
0f9dd46c 511
96303081 512 return bitmap_start;
0f9dd46c
JB
513}
514
96303081
JB
515static int tree_insert_offset(struct rb_root *root, u64 offset,
516 struct rb_node *node, int bitmap)
0f9dd46c
JB
517{
518 struct rb_node **p = &root->rb_node;
519 struct rb_node *parent = NULL;
520 struct btrfs_free_space *info;
521
522 while (*p) {
523 parent = *p;
96303081 524 info = rb_entry(parent, struct btrfs_free_space, offset_index);
0f9dd46c 525
96303081 526 if (offset < info->offset) {
0f9dd46c 527 p = &(*p)->rb_left;
96303081 528 } else if (offset > info->offset) {
0f9dd46c 529 p = &(*p)->rb_right;
96303081
JB
530 } else {
531 /*
532 * we could have a bitmap entry and an extent entry
533 * share the same offset. If this is the case, we want
534 * the extent entry to always be found first if we do a
535 * linear search through the tree, since we want to have
536 * the quickest allocation time, and allocating from an
537 * extent is faster than allocating from a bitmap. So
538 * if we're inserting a bitmap and we find an entry at
539 * this offset, we want to go right, or after this entry
540 * logically. If we are inserting an extent and we've
541 * found a bitmap, we want to go left, or before
542 * logically.
543 */
544 if (bitmap) {
545 WARN_ON(info->bitmap);
546 p = &(*p)->rb_right;
547 } else {
548 WARN_ON(!info->bitmap);
549 p = &(*p)->rb_left;
550 }
551 }
0f9dd46c
JB
552 }
553
554 rb_link_node(node, parent, p);
555 rb_insert_color(node, root);
556
557 return 0;
558}
559
560/*
70cb0743
JB
561 * searches the tree for the given offset.
562 *
96303081
JB
563 * fuzzy - If this is set, then we are trying to make an allocation, and we just
564 * want a section that has at least bytes size and comes at or after the given
565 * offset.
0f9dd46c 566 */
96303081
JB
567static struct btrfs_free_space *
568tree_search_offset(struct btrfs_block_group_cache *block_group,
569 u64 offset, int bitmap_only, int fuzzy)
0f9dd46c 570{
96303081
JB
571 struct rb_node *n = block_group->free_space_offset.rb_node;
572 struct btrfs_free_space *entry, *prev = NULL;
573
574 /* find entry that is closest to the 'offset' */
575 while (1) {
576 if (!n) {
577 entry = NULL;
578 break;
579 }
0f9dd46c 580
0f9dd46c 581 entry = rb_entry(n, struct btrfs_free_space, offset_index);
96303081 582 prev = entry;
0f9dd46c 583
96303081 584 if (offset < entry->offset)
0f9dd46c 585 n = n->rb_left;
96303081 586 else if (offset > entry->offset)
0f9dd46c 587 n = n->rb_right;
96303081 588 else
0f9dd46c 589 break;
0f9dd46c
JB
590 }
591
96303081
JB
592 if (bitmap_only) {
593 if (!entry)
594 return NULL;
595 if (entry->bitmap)
596 return entry;
0f9dd46c 597
96303081
JB
598 /*
599 * bitmap entry and extent entry may share same offset,
600 * in that case, bitmap entry comes after extent entry.
601 */
602 n = rb_next(n);
603 if (!n)
604 return NULL;
605 entry = rb_entry(n, struct btrfs_free_space, offset_index);
606 if (entry->offset != offset)
607 return NULL;
0f9dd46c 608
96303081
JB
609 WARN_ON(!entry->bitmap);
610 return entry;
611 } else if (entry) {
612 if (entry->bitmap) {
0f9dd46c 613 /*
96303081
JB
614 * if previous extent entry covers the offset,
615 * we should return it instead of the bitmap entry
0f9dd46c 616 */
96303081
JB
617 n = &entry->offset_index;
618 while (1) {
619 n = rb_prev(n);
620 if (!n)
621 break;
622 prev = rb_entry(n, struct btrfs_free_space,
623 offset_index);
624 if (!prev->bitmap) {
625 if (prev->offset + prev->bytes > offset)
626 entry = prev;
627 break;
628 }
0f9dd46c 629 }
96303081
JB
630 }
631 return entry;
632 }
633
634 if (!prev)
635 return NULL;
636
637 /* find last entry before the 'offset' */
638 entry = prev;
639 if (entry->offset > offset) {
640 n = rb_prev(&entry->offset_index);
641 if (n) {
642 entry = rb_entry(n, struct btrfs_free_space,
643 offset_index);
644 BUG_ON(entry->offset > offset);
0f9dd46c 645 } else {
96303081
JB
646 if (fuzzy)
647 return entry;
648 else
649 return NULL;
0f9dd46c
JB
650 }
651 }
652
96303081
JB
653 if (entry->bitmap) {
654 n = &entry->offset_index;
655 while (1) {
656 n = rb_prev(n);
657 if (!n)
658 break;
659 prev = rb_entry(n, struct btrfs_free_space,
660 offset_index);
661 if (!prev->bitmap) {
662 if (prev->offset + prev->bytes > offset)
663 return prev;
664 break;
665 }
666 }
667 if (entry->offset + BITS_PER_BITMAP *
668 block_group->sectorsize > offset)
669 return entry;
670 } else if (entry->offset + entry->bytes > offset)
671 return entry;
672
673 if (!fuzzy)
674 return NULL;
675
676 while (1) {
677 if (entry->bitmap) {
678 if (entry->offset + BITS_PER_BITMAP *
679 block_group->sectorsize > offset)
680 break;
681 } else {
682 if (entry->offset + entry->bytes > offset)
683 break;
684 }
685
686 n = rb_next(&entry->offset_index);
687 if (!n)
688 return NULL;
689 entry = rb_entry(n, struct btrfs_free_space, offset_index);
690 }
691 return entry;
0f9dd46c
JB
692}
693
694static void unlink_free_space(struct btrfs_block_group_cache *block_group,
695 struct btrfs_free_space *info)
696{
697 rb_erase(&info->offset_index, &block_group->free_space_offset);
96303081 698 block_group->free_extents--;
817d52f8 699 block_group->free_space -= info->bytes;
0f9dd46c
JB
700}
701
702static int link_free_space(struct btrfs_block_group_cache *block_group,
703 struct btrfs_free_space *info)
704{
705 int ret = 0;
706
96303081 707 BUG_ON(!info->bitmap && !info->bytes);
0f9dd46c 708 ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
96303081 709 &info->offset_index, (info->bitmap != NULL));
0f9dd46c
JB
710 if (ret)
711 return ret;
712
817d52f8 713 block_group->free_space += info->bytes;
96303081
JB
714 block_group->free_extents++;
715 return ret;
716}
717
718static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
719{
25891f79
JB
720 u64 max_bytes;
721 u64 bitmap_bytes;
722 u64 extent_bytes;
96303081
JB
723
724 /*
725 * The goal is to keep the total amount of memory used per 1gb of space
726 * at or below 32k, so we need to adjust how much memory we allow to be
727 * used by extent based free space tracking
728 */
729 max_bytes = MAX_CACHE_BYTES_PER_GIG *
730 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
731
25891f79
JB
732 /*
733 * we want to account for 1 more bitmap than what we have so we can make
734 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
735 * we add more bitmaps.
736 */
737 bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
96303081 738
25891f79
JB
739 if (bitmap_bytes >= max_bytes) {
740 block_group->extents_thresh = 0;
741 return;
742 }
96303081 743
25891f79
JB
744 /*
745 * we want the extent entry threshold to always be at most 1/2 the maxw
746 * bytes we can have, or whatever is less than that.
747 */
748 extent_bytes = max_bytes - bitmap_bytes;
749 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
96303081 750
25891f79
JB
751 block_group->extents_thresh =
752 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
96303081
JB
753}
754
817d52f8
JB
755static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
756 struct btrfs_free_space *info, u64 offset,
757 u64 bytes)
96303081
JB
758{
759 unsigned long start, end;
760 unsigned long i;
761
817d52f8
JB
762 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
763 end = start + bytes_to_bits(bytes, block_group->sectorsize);
96303081
JB
764 BUG_ON(end > BITS_PER_BITMAP);
765
766 for (i = start; i < end; i++)
767 clear_bit(i, info->bitmap);
768
769 info->bytes -= bytes;
817d52f8 770 block_group->free_space -= bytes;
96303081
JB
771}
772
817d52f8
JB
773static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
774 struct btrfs_free_space *info, u64 offset,
775 u64 bytes)
96303081
JB
776{
777 unsigned long start, end;
778 unsigned long i;
779
817d52f8
JB
780 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
781 end = start + bytes_to_bits(bytes, block_group->sectorsize);
96303081
JB
782 BUG_ON(end > BITS_PER_BITMAP);
783
784 for (i = start; i < end; i++)
785 set_bit(i, info->bitmap);
786
787 info->bytes += bytes;
817d52f8 788 block_group->free_space += bytes;
96303081
JB
789}
790
791static int search_bitmap(struct btrfs_block_group_cache *block_group,
792 struct btrfs_free_space *bitmap_info, u64 *offset,
793 u64 *bytes)
794{
795 unsigned long found_bits = 0;
796 unsigned long bits, i;
797 unsigned long next_zero;
798
799 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
800 max_t(u64, *offset, bitmap_info->offset));
801 bits = bytes_to_bits(*bytes, block_group->sectorsize);
802
803 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
804 i < BITS_PER_BITMAP;
805 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
806 next_zero = find_next_zero_bit(bitmap_info->bitmap,
807 BITS_PER_BITMAP, i);
808 if ((next_zero - i) >= bits) {
809 found_bits = next_zero - i;
810 break;
811 }
812 i = next_zero;
813 }
814
815 if (found_bits) {
816 *offset = (u64)(i * block_group->sectorsize) +
817 bitmap_info->offset;
818 *bytes = (u64)(found_bits) * block_group->sectorsize;
819 return 0;
820 }
821
822 return -1;
823}
824
825static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
826 *block_group, u64 *offset,
827 u64 *bytes, int debug)
828{
829 struct btrfs_free_space *entry;
830 struct rb_node *node;
831 int ret;
832
833 if (!block_group->free_space_offset.rb_node)
834 return NULL;
835
836 entry = tree_search_offset(block_group,
837 offset_to_bitmap(block_group, *offset),
838 0, 1);
839 if (!entry)
840 return NULL;
841
842 for (node = &entry->offset_index; node; node = rb_next(node)) {
843 entry = rb_entry(node, struct btrfs_free_space, offset_index);
844 if (entry->bytes < *bytes)
845 continue;
846
847 if (entry->bitmap) {
848 ret = search_bitmap(block_group, entry, offset, bytes);
849 if (!ret)
850 return entry;
851 continue;
852 }
853
854 *offset = entry->offset;
855 *bytes = entry->bytes;
856 return entry;
857 }
858
859 return NULL;
860}
861
862static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
863 struct btrfs_free_space *info, u64 offset)
864{
865 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
866 int max_bitmaps = (int)div64_u64(block_group->key.offset +
867 bytes_per_bg - 1, bytes_per_bg);
868 BUG_ON(block_group->total_bitmaps >= max_bitmaps);
869
870 info->offset = offset_to_bitmap(block_group, offset);
f019f426 871 info->bytes = 0;
96303081
JB
872 link_free_space(block_group, info);
873 block_group->total_bitmaps++;
874
875 recalculate_thresholds(block_group);
876}
877
878static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
879 struct btrfs_free_space *bitmap_info,
880 u64 *offset, u64 *bytes)
881{
882 u64 end;
6606bb97
JB
883 u64 search_start, search_bytes;
884 int ret;
96303081
JB
885
886again:
887 end = bitmap_info->offset +
888 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
889
6606bb97
JB
890 /*
891 * XXX - this can go away after a few releases.
892 *
893 * since the only user of btrfs_remove_free_space is the tree logging
894 * stuff, and the only way to test that is under crash conditions, we
895 * want to have this debug stuff here just in case somethings not
896 * working. Search the bitmap for the space we are trying to use to
897 * make sure its actually there. If its not there then we need to stop
898 * because something has gone wrong.
899 */
900 search_start = *offset;
901 search_bytes = *bytes;
902 ret = search_bitmap(block_group, bitmap_info, &search_start,
903 &search_bytes);
904 BUG_ON(ret < 0 || search_start != *offset);
905
96303081 906 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
817d52f8
JB
907 bitmap_clear_bits(block_group, bitmap_info, *offset,
908 end - *offset + 1);
96303081
JB
909 *bytes -= end - *offset + 1;
910 *offset = end + 1;
911 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
817d52f8 912 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
96303081
JB
913 *bytes = 0;
914 }
915
916 if (*bytes) {
6606bb97 917 struct rb_node *next = rb_next(&bitmap_info->offset_index);
96303081
JB
918 if (!bitmap_info->bytes) {
919 unlink_free_space(block_group, bitmap_info);
920 kfree(bitmap_info->bitmap);
921 kfree(bitmap_info);
922 block_group->total_bitmaps--;
923 recalculate_thresholds(block_group);
924 }
925
6606bb97
JB
926 /*
927 * no entry after this bitmap, but we still have bytes to
928 * remove, so something has gone wrong.
929 */
930 if (!next)
96303081
JB
931 return -EINVAL;
932
6606bb97
JB
933 bitmap_info = rb_entry(next, struct btrfs_free_space,
934 offset_index);
935
936 /*
937 * if the next entry isn't a bitmap we need to return to let the
938 * extent stuff do its work.
939 */
96303081
JB
940 if (!bitmap_info->bitmap)
941 return -EAGAIN;
942
6606bb97
JB
943 /*
944 * Ok the next item is a bitmap, but it may not actually hold
945 * the information for the rest of this free space stuff, so
946 * look for it, and if we don't find it return so we can try
947 * everything over again.
948 */
949 search_start = *offset;
950 search_bytes = *bytes;
951 ret = search_bitmap(block_group, bitmap_info, &search_start,
952 &search_bytes);
953 if (ret < 0 || search_start != *offset)
954 return -EAGAIN;
955
96303081
JB
956 goto again;
957 } else if (!bitmap_info->bytes) {
958 unlink_free_space(block_group, bitmap_info);
959 kfree(bitmap_info->bitmap);
960 kfree(bitmap_info);
961 block_group->total_bitmaps--;
962 recalculate_thresholds(block_group);
963 }
964
965 return 0;
966}
967
968static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
969 struct btrfs_free_space *info)
970{
971 struct btrfs_free_space *bitmap_info;
972 int added = 0;
973 u64 bytes, offset, end;
974 int ret;
975
976 /*
977 * If we are below the extents threshold then we can add this as an
978 * extent, and don't have to deal with the bitmap
979 */
980 if (block_group->free_extents < block_group->extents_thresh &&
981 info->bytes > block_group->sectorsize * 4)
982 return 0;
983
984 /*
985 * some block groups are so tiny they can't be enveloped by a bitmap, so
986 * don't even bother to create a bitmap for this
987 */
988 if (BITS_PER_BITMAP * block_group->sectorsize >
989 block_group->key.offset)
990 return 0;
991
992 bytes = info->bytes;
993 offset = info->offset;
994
995again:
996 bitmap_info = tree_search_offset(block_group,
997 offset_to_bitmap(block_group, offset),
998 1, 0);
999 if (!bitmap_info) {
1000 BUG_ON(added);
1001 goto new_bitmap;
1002 }
1003
1004 end = bitmap_info->offset +
1005 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
1006
1007 if (offset >= bitmap_info->offset && offset + bytes > end) {
817d52f8
JB
1008 bitmap_set_bits(block_group, bitmap_info, offset,
1009 end - offset);
96303081
JB
1010 bytes -= end - offset;
1011 offset = end;
1012 added = 0;
1013 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
817d52f8 1014 bitmap_set_bits(block_group, bitmap_info, offset, bytes);
96303081
JB
1015 bytes = 0;
1016 } else {
1017 BUG();
1018 }
1019
1020 if (!bytes) {
1021 ret = 1;
1022 goto out;
1023 } else
1024 goto again;
1025
1026new_bitmap:
1027 if (info && info->bitmap) {
1028 add_new_bitmap(block_group, info, offset);
1029 added = 1;
1030 info = NULL;
1031 goto again;
1032 } else {
1033 spin_unlock(&block_group->tree_lock);
1034
1035 /* no pre-allocated info, allocate a new one */
1036 if (!info) {
1037 info = kzalloc(sizeof(struct btrfs_free_space),
1038 GFP_NOFS);
1039 if (!info) {
1040 spin_lock(&block_group->tree_lock);
1041 ret = -ENOMEM;
1042 goto out;
1043 }
1044 }
1045
1046 /* allocate the bitmap */
1047 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1048 spin_lock(&block_group->tree_lock);
1049 if (!info->bitmap) {
1050 ret = -ENOMEM;
1051 goto out;
1052 }
1053 goto again;
1054 }
1055
1056out:
1057 if (info) {
1058 if (info->bitmap)
1059 kfree(info->bitmap);
1060 kfree(info);
1061 }
0f9dd46c
JB
1062
1063 return ret;
1064}
1065
6226cb0a
JB
1066int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1067 u64 offset, u64 bytes)
0f9dd46c 1068{
96303081
JB
1069 struct btrfs_free_space *right_info = NULL;
1070 struct btrfs_free_space *left_info = NULL;
0f9dd46c 1071 struct btrfs_free_space *info = NULL;
0f9dd46c
JB
1072 int ret = 0;
1073
6226cb0a
JB
1074 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
1075 if (!info)
1076 return -ENOMEM;
1077
1078 info->offset = offset;
1079 info->bytes = bytes;
1080
1081 spin_lock(&block_group->tree_lock);
1082
0f9dd46c
JB
1083 /*
1084 * first we want to see if there is free space adjacent to the range we
1085 * are adding, if there is remove that struct and add a new one to
1086 * cover the entire range
1087 */
96303081
JB
1088 right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
1089 if (right_info && rb_prev(&right_info->offset_index))
1090 left_info = rb_entry(rb_prev(&right_info->offset_index),
1091 struct btrfs_free_space, offset_index);
1092 else
1093 left_info = tree_search_offset(block_group, offset - 1, 0, 0);
0f9dd46c 1094
96303081
JB
1095 /*
1096 * If there was no extent directly to the left or right of this new
1097 * extent then we know we're going to have to allocate a new extent, so
1098 * before we do that see if we need to drop this into a bitmap
1099 */
1100 if ((!left_info || left_info->bitmap) &&
1101 (!right_info || right_info->bitmap)) {
1102 ret = insert_into_bitmap(block_group, info);
1103
1104 if (ret < 0) {
1105 goto out;
1106 } else if (ret) {
1107 ret = 0;
1108 goto out;
1109 }
1110 }
1111
1112 if (right_info && !right_info->bitmap) {
0f9dd46c 1113 unlink_free_space(block_group, right_info);
6226cb0a
JB
1114 info->bytes += right_info->bytes;
1115 kfree(right_info);
0f9dd46c
JB
1116 }
1117
96303081
JB
1118 if (left_info && !left_info->bitmap &&
1119 left_info->offset + left_info->bytes == offset) {
0f9dd46c 1120 unlink_free_space(block_group, left_info);
6226cb0a
JB
1121 info->offset = left_info->offset;
1122 info->bytes += left_info->bytes;
1123 kfree(left_info);
0f9dd46c
JB
1124 }
1125
0f9dd46c
JB
1126 ret = link_free_space(block_group, info);
1127 if (ret)
1128 kfree(info);
96303081 1129out:
6226cb0a
JB
1130 spin_unlock(&block_group->tree_lock);
1131
0f9dd46c 1132 if (ret) {
96303081 1133 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
c293498b 1134 BUG_ON(ret == -EEXIST);
0f9dd46c
JB
1135 }
1136
0f9dd46c
JB
1137 return ret;
1138}
1139
6226cb0a
JB
1140int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1141 u64 offset, u64 bytes)
0f9dd46c
JB
1142{
1143 struct btrfs_free_space *info;
96303081 1144 struct btrfs_free_space *next_info = NULL;
0f9dd46c
JB
1145 int ret = 0;
1146
6226cb0a
JB
1147 spin_lock(&block_group->tree_lock);
1148
96303081
JB
1149again:
1150 info = tree_search_offset(block_group, offset, 0, 0);
1151 if (!info) {
6606bb97
JB
1152 /*
1153 * oops didn't find an extent that matched the space we wanted
1154 * to remove, look for a bitmap instead
1155 */
1156 info = tree_search_offset(block_group,
1157 offset_to_bitmap(block_group, offset),
1158 1, 0);
1159 if (!info) {
1160 WARN_ON(1);
1161 goto out_lock;
1162 }
96303081
JB
1163 }
1164
1165 if (info->bytes < bytes && rb_next(&info->offset_index)) {
1166 u64 end;
1167 next_info = rb_entry(rb_next(&info->offset_index),
1168 struct btrfs_free_space,
1169 offset_index);
1170
1171 if (next_info->bitmap)
1172 end = next_info->offset + BITS_PER_BITMAP *
1173 block_group->sectorsize - 1;
1174 else
1175 end = next_info->offset + next_info->bytes;
1176
1177 if (next_info->bytes < bytes ||
1178 next_info->offset > offset || offset > end) {
1179 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1180 " trying to use %llu\n",
1181 (unsigned long long)info->offset,
1182 (unsigned long long)info->bytes,
1183 (unsigned long long)bytes);
0f9dd46c
JB
1184 WARN_ON(1);
1185 ret = -EINVAL;
96303081 1186 goto out_lock;
0f9dd46c 1187 }
0f9dd46c 1188
96303081
JB
1189 info = next_info;
1190 }
1191
1192 if (info->bytes == bytes) {
1193 unlink_free_space(block_group, info);
1194 if (info->bitmap) {
1195 kfree(info->bitmap);
1196 block_group->total_bitmaps--;
0f9dd46c 1197 }
96303081
JB
1198 kfree(info);
1199 goto out_lock;
1200 }
0f9dd46c 1201
96303081
JB
1202 if (!info->bitmap && info->offset == offset) {
1203 unlink_free_space(block_group, info);
0f9dd46c
JB
1204 info->offset += bytes;
1205 info->bytes -= bytes;
96303081
JB
1206 link_free_space(block_group, info);
1207 goto out_lock;
1208 }
0f9dd46c 1209
96303081
JB
1210 if (!info->bitmap && info->offset <= offset &&
1211 info->offset + info->bytes >= offset + bytes) {
9b49c9b9
CM
1212 u64 old_start = info->offset;
1213 /*
1214 * we're freeing space in the middle of the info,
1215 * this can happen during tree log replay
1216 *
1217 * first unlink the old info and then
1218 * insert it again after the hole we're creating
1219 */
1220 unlink_free_space(block_group, info);
1221 if (offset + bytes < info->offset + info->bytes) {
1222 u64 old_end = info->offset + info->bytes;
1223
1224 info->offset = offset + bytes;
1225 info->bytes = old_end - info->offset;
1226 ret = link_free_space(block_group, info);
96303081
JB
1227 WARN_ON(ret);
1228 if (ret)
1229 goto out_lock;
9b49c9b9
CM
1230 } else {
1231 /* the hole we're creating ends at the end
1232 * of the info struct, just free the info
1233 */
1234 kfree(info);
1235 }
6226cb0a 1236 spin_unlock(&block_group->tree_lock);
96303081
JB
1237
1238 /* step two, insert a new info struct to cover
1239 * anything before the hole
9b49c9b9 1240 */
6226cb0a
JB
1241 ret = btrfs_add_free_space(block_group, old_start,
1242 offset - old_start);
96303081
JB
1243 WARN_ON(ret);
1244 goto out;
0f9dd46c 1245 }
96303081
JB
1246
1247 ret = remove_from_bitmap(block_group, info, &offset, &bytes);
1248 if (ret == -EAGAIN)
1249 goto again;
1250 BUG_ON(ret);
1251out_lock:
1252 spin_unlock(&block_group->tree_lock);
0f9dd46c 1253out:
25179201
JB
1254 return ret;
1255}
1256
0f9dd46c
JB
1257void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1258 u64 bytes)
1259{
1260 struct btrfs_free_space *info;
1261 struct rb_node *n;
1262 int count = 0;
1263
1264 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
1265 info = rb_entry(n, struct btrfs_free_space, offset_index);
1266 if (info->bytes >= bytes)
1267 count++;
96303081 1268 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
21380931 1269 (unsigned long long)info->offset,
96303081
JB
1270 (unsigned long long)info->bytes,
1271 (info->bitmap) ? "yes" : "no");
0f9dd46c 1272 }
96303081
JB
1273 printk(KERN_INFO "block group has cluster?: %s\n",
1274 list_empty(&block_group->cluster_list) ? "no" : "yes");
0f9dd46c
JB
1275 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1276 "\n", count);
1277}
1278
1279u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
1280{
1281 struct btrfs_free_space *info;
1282 struct rb_node *n;
1283 u64 ret = 0;
1284
1285 for (n = rb_first(&block_group->free_space_offset); n;
1286 n = rb_next(n)) {
1287 info = rb_entry(n, struct btrfs_free_space, offset_index);
1288 ret += info->bytes;
1289 }
1290
1291 return ret;
1292}
1293
fa9c0d79
CM
1294/*
1295 * for a given cluster, put all of its extents back into the free
1296 * space cache. If the block group passed doesn't match the block group
1297 * pointed to by the cluster, someone else raced in and freed the
1298 * cluster already. In that case, we just return without changing anything
1299 */
1300static int
1301__btrfs_return_cluster_to_free_space(
1302 struct btrfs_block_group_cache *block_group,
1303 struct btrfs_free_cluster *cluster)
1304{
1305 struct btrfs_free_space *entry;
1306 struct rb_node *node;
96303081 1307 bool bitmap;
fa9c0d79
CM
1308
1309 spin_lock(&cluster->lock);
1310 if (cluster->block_group != block_group)
1311 goto out;
1312
96303081
JB
1313 bitmap = cluster->points_to_bitmap;
1314 cluster->block_group = NULL;
fa9c0d79 1315 cluster->window_start = 0;
96303081
JB
1316 list_del_init(&cluster->block_group_list);
1317 cluster->points_to_bitmap = false;
1318
1319 if (bitmap)
1320 goto out;
1321
fa9c0d79 1322 node = rb_first(&cluster->root);
96303081 1323 while (node) {
fa9c0d79
CM
1324 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1325 node = rb_next(&entry->offset_index);
1326 rb_erase(&entry->offset_index, &cluster->root);
96303081
JB
1327 BUG_ON(entry->bitmap);
1328 tree_insert_offset(&block_group->free_space_offset,
1329 entry->offset, &entry->offset_index, 0);
fa9c0d79 1330 }
6bef4d31 1331 cluster->root = RB_ROOT;
96303081 1332
fa9c0d79
CM
1333out:
1334 spin_unlock(&cluster->lock);
96303081 1335 btrfs_put_block_group(block_group);
fa9c0d79
CM
1336 return 0;
1337}
1338
0f9dd46c
JB
1339void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1340{
1341 struct btrfs_free_space *info;
1342 struct rb_node *node;
fa9c0d79 1343 struct btrfs_free_cluster *cluster;
96303081 1344 struct list_head *head;
0f9dd46c 1345
6226cb0a 1346 spin_lock(&block_group->tree_lock);
96303081
JB
1347 while ((head = block_group->cluster_list.next) !=
1348 &block_group->cluster_list) {
1349 cluster = list_entry(head, struct btrfs_free_cluster,
1350 block_group_list);
fa9c0d79
CM
1351
1352 WARN_ON(cluster->block_group != block_group);
1353 __btrfs_return_cluster_to_free_space(block_group, cluster);
96303081
JB
1354 if (need_resched()) {
1355 spin_unlock(&block_group->tree_lock);
1356 cond_resched();
1357 spin_lock(&block_group->tree_lock);
1358 }
fa9c0d79
CM
1359 }
1360
96303081
JB
1361 while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
1362 info = rb_entry(node, struct btrfs_free_space, offset_index);
0f9dd46c 1363 unlink_free_space(block_group, info);
96303081
JB
1364 if (info->bitmap)
1365 kfree(info->bitmap);
0f9dd46c
JB
1366 kfree(info);
1367 if (need_resched()) {
6226cb0a 1368 spin_unlock(&block_group->tree_lock);
0f9dd46c 1369 cond_resched();
6226cb0a 1370 spin_lock(&block_group->tree_lock);
0f9dd46c
JB
1371 }
1372 }
96303081 1373
6226cb0a 1374 spin_unlock(&block_group->tree_lock);
0f9dd46c
JB
1375}
1376
6226cb0a
JB
1377u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1378 u64 offset, u64 bytes, u64 empty_size)
0f9dd46c 1379{
6226cb0a 1380 struct btrfs_free_space *entry = NULL;
96303081 1381 u64 bytes_search = bytes + empty_size;
6226cb0a 1382 u64 ret = 0;
0f9dd46c 1383
6226cb0a 1384 spin_lock(&block_group->tree_lock);
96303081 1385 entry = find_free_space(block_group, &offset, &bytes_search, 0);
6226cb0a 1386 if (!entry)
96303081
JB
1387 goto out;
1388
1389 ret = offset;
1390 if (entry->bitmap) {
817d52f8 1391 bitmap_clear_bits(block_group, entry, offset, bytes);
96303081
JB
1392 if (!entry->bytes) {
1393 unlink_free_space(block_group, entry);
1394 kfree(entry->bitmap);
1395 kfree(entry);
1396 block_group->total_bitmaps--;
1397 recalculate_thresholds(block_group);
1398 }
1399 } else {
6226cb0a 1400 unlink_free_space(block_group, entry);
6226cb0a
JB
1401 entry->offset += bytes;
1402 entry->bytes -= bytes;
6226cb0a
JB
1403 if (!entry->bytes)
1404 kfree(entry);
1405 else
1406 link_free_space(block_group, entry);
1407 }
0f9dd46c 1408
96303081
JB
1409out:
1410 spin_unlock(&block_group->tree_lock);
817d52f8 1411
0f9dd46c
JB
1412 return ret;
1413}
fa9c0d79
CM
1414
1415/*
1416 * given a cluster, put all of its extents back into the free space
1417 * cache. If a block group is passed, this function will only free
1418 * a cluster that belongs to the passed block group.
1419 *
1420 * Otherwise, it'll get a reference on the block group pointed to by the
1421 * cluster and remove the cluster from it.
1422 */
1423int btrfs_return_cluster_to_free_space(
1424 struct btrfs_block_group_cache *block_group,
1425 struct btrfs_free_cluster *cluster)
1426{
1427 int ret;
1428
1429 /* first, get a safe pointer to the block group */
1430 spin_lock(&cluster->lock);
1431 if (!block_group) {
1432 block_group = cluster->block_group;
1433 if (!block_group) {
1434 spin_unlock(&cluster->lock);
1435 return 0;
1436 }
1437 } else if (cluster->block_group != block_group) {
1438 /* someone else has already freed it don't redo their work */
1439 spin_unlock(&cluster->lock);
1440 return 0;
1441 }
1442 atomic_inc(&block_group->count);
1443 spin_unlock(&cluster->lock);
1444
1445 /* now return any extents the cluster had on it */
1446 spin_lock(&block_group->tree_lock);
1447 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
1448 spin_unlock(&block_group->tree_lock);
1449
1450 /* finally drop our ref */
1451 btrfs_put_block_group(block_group);
1452 return ret;
1453}
1454
96303081
JB
1455static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1456 struct btrfs_free_cluster *cluster,
1457 u64 bytes, u64 min_start)
1458{
1459 struct btrfs_free_space *entry;
1460 int err;
1461 u64 search_start = cluster->window_start;
1462 u64 search_bytes = bytes;
1463 u64 ret = 0;
1464
1465 spin_lock(&block_group->tree_lock);
1466 spin_lock(&cluster->lock);
1467
1468 if (!cluster->points_to_bitmap)
1469 goto out;
1470
1471 if (cluster->block_group != block_group)
1472 goto out;
1473
6606bb97
JB
1474 /*
1475 * search_start is the beginning of the bitmap, but at some point it may
1476 * be a good idea to point to the actual start of the free area in the
1477 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1478 * to 1 to make sure we get the bitmap entry
1479 */
1480 entry = tree_search_offset(block_group,
1481 offset_to_bitmap(block_group, search_start),
1482 1, 0);
96303081
JB
1483 if (!entry || !entry->bitmap)
1484 goto out;
1485
1486 search_start = min_start;
1487 search_bytes = bytes;
1488
1489 err = search_bitmap(block_group, entry, &search_start,
1490 &search_bytes);
1491 if (err)
1492 goto out;
1493
1494 ret = search_start;
817d52f8 1495 bitmap_clear_bits(block_group, entry, ret, bytes);
96303081
JB
1496out:
1497 spin_unlock(&cluster->lock);
1498 spin_unlock(&block_group->tree_lock);
1499
1500 return ret;
1501}
1502
fa9c0d79
CM
1503/*
1504 * given a cluster, try to allocate 'bytes' from it, returns 0
1505 * if it couldn't find anything suitably large, or a logical disk offset
1506 * if things worked out
1507 */
1508u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1509 struct btrfs_free_cluster *cluster, u64 bytes,
1510 u64 min_start)
1511{
1512 struct btrfs_free_space *entry = NULL;
1513 struct rb_node *node;
1514 u64 ret = 0;
1515
96303081
JB
1516 if (cluster->points_to_bitmap)
1517 return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1518 min_start);
1519
fa9c0d79
CM
1520 spin_lock(&cluster->lock);
1521 if (bytes > cluster->max_size)
1522 goto out;
1523
1524 if (cluster->block_group != block_group)
1525 goto out;
1526
1527 node = rb_first(&cluster->root);
1528 if (!node)
1529 goto out;
1530
1531 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1532
1533 while(1) {
1534 if (entry->bytes < bytes || entry->offset < min_start) {
1535 struct rb_node *node;
1536
1537 node = rb_next(&entry->offset_index);
1538 if (!node)
1539 break;
1540 entry = rb_entry(node, struct btrfs_free_space,
1541 offset_index);
1542 continue;
1543 }
1544 ret = entry->offset;
1545
1546 entry->offset += bytes;
1547 entry->bytes -= bytes;
1548
1549 if (entry->bytes == 0) {
1550 rb_erase(&entry->offset_index, &cluster->root);
1551 kfree(entry);
1552 }
1553 break;
1554 }
1555out:
1556 spin_unlock(&cluster->lock);
96303081 1557
fa9c0d79
CM
1558 return ret;
1559}
1560
96303081
JB
1561static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1562 struct btrfs_free_space *entry,
1563 struct btrfs_free_cluster *cluster,
1564 u64 offset, u64 bytes, u64 min_bytes)
1565{
1566 unsigned long next_zero;
1567 unsigned long i;
1568 unsigned long search_bits;
1569 unsigned long total_bits;
1570 unsigned long found_bits;
1571 unsigned long start = 0;
1572 unsigned long total_found = 0;
1573 bool found = false;
1574
1575 i = offset_to_bit(entry->offset, block_group->sectorsize,
1576 max_t(u64, offset, entry->offset));
1577 search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1578 total_bits = bytes_to_bits(bytes, block_group->sectorsize);
1579
1580again:
1581 found_bits = 0;
1582 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
1583 i < BITS_PER_BITMAP;
1584 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
1585 next_zero = find_next_zero_bit(entry->bitmap,
1586 BITS_PER_BITMAP, i);
1587 if (next_zero - i >= search_bits) {
1588 found_bits = next_zero - i;
1589 break;
1590 }
1591 i = next_zero;
1592 }
1593
1594 if (!found_bits)
1595 return -1;
1596
1597 if (!found) {
1598 start = i;
1599 found = true;
1600 }
1601
1602 total_found += found_bits;
1603
1604 if (cluster->max_size < found_bits * block_group->sectorsize)
1605 cluster->max_size = found_bits * block_group->sectorsize;
1606
1607 if (total_found < total_bits) {
1608 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
1609 if (i - start > total_bits * 2) {
1610 total_found = 0;
1611 cluster->max_size = 0;
1612 found = false;
1613 }
1614 goto again;
1615 }
1616
1617 cluster->window_start = start * block_group->sectorsize +
1618 entry->offset;
1619 cluster->points_to_bitmap = true;
1620
1621 return 0;
1622}
1623
fa9c0d79
CM
1624/*
1625 * here we try to find a cluster of blocks in a block group. The goal
1626 * is to find at least bytes free and up to empty_size + bytes free.
1627 * We might not find them all in one contiguous area.
1628 *
1629 * returns zero and sets up cluster if things worked out, otherwise
1630 * it returns -enospc
1631 */
1632int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
451d7585 1633 struct btrfs_root *root,
fa9c0d79
CM
1634 struct btrfs_block_group_cache *block_group,
1635 struct btrfs_free_cluster *cluster,
1636 u64 offset, u64 bytes, u64 empty_size)
1637{
1638 struct btrfs_free_space *entry = NULL;
1639 struct rb_node *node;
1640 struct btrfs_free_space *next;
96303081 1641 struct btrfs_free_space *last = NULL;
fa9c0d79
CM
1642 u64 min_bytes;
1643 u64 window_start;
1644 u64 window_free;
1645 u64 max_extent = 0;
96303081 1646 bool found_bitmap = false;
fa9c0d79
CM
1647 int ret;
1648
1649 /* for metadata, allow allocates with more holes */
451d7585
CM
1650 if (btrfs_test_opt(root, SSD_SPREAD)) {
1651 min_bytes = bytes + empty_size;
1652 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
fa9c0d79
CM
1653 /*
1654 * we want to do larger allocations when we are
1655 * flushing out the delayed refs, it helps prevent
1656 * making more work as we go along.
1657 */
1658 if (trans->transaction->delayed_refs.flushing)
1659 min_bytes = max(bytes, (bytes + empty_size) >> 1);
1660 else
1661 min_bytes = max(bytes, (bytes + empty_size) >> 4);
1662 } else
1663 min_bytes = max(bytes, (bytes + empty_size) >> 2);
1664
1665 spin_lock(&block_group->tree_lock);
1666 spin_lock(&cluster->lock);
1667
1668 /* someone already found a cluster, hooray */
1669 if (cluster->block_group) {
1670 ret = 0;
1671 goto out;
1672 }
1673again:
96303081 1674 entry = tree_search_offset(block_group, offset, found_bitmap, 1);
fa9c0d79
CM
1675 if (!entry) {
1676 ret = -ENOSPC;
1677 goto out;
1678 }
96303081
JB
1679
1680 /*
1681 * If found_bitmap is true, we exhausted our search for extent entries,
1682 * and we just want to search all of the bitmaps that we can find, and
1683 * ignore any extent entries we find.
1684 */
1685 while (entry->bitmap || found_bitmap ||
1686 (!entry->bitmap && entry->bytes < min_bytes)) {
1687 struct rb_node *node = rb_next(&entry->offset_index);
1688
1689 if (entry->bitmap && entry->bytes > bytes + empty_size) {
1690 ret = btrfs_bitmap_cluster(block_group, entry, cluster,
1691 offset, bytes + empty_size,
1692 min_bytes);
1693 if (!ret)
1694 goto got_it;
1695 }
1696
1697 if (!node) {
1698 ret = -ENOSPC;
1699 goto out;
1700 }
1701 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1702 }
1703
1704 /*
1705 * We already searched all the extent entries from the passed in offset
1706 * to the end and didn't find enough space for the cluster, and we also
1707 * didn't find any bitmaps that met our criteria, just go ahead and exit
1708 */
1709 if (found_bitmap) {
1710 ret = -ENOSPC;
1711 goto out;
1712 }
1713
1714 cluster->points_to_bitmap = false;
fa9c0d79
CM
1715 window_start = entry->offset;
1716 window_free = entry->bytes;
1717 last = entry;
1718 max_extent = entry->bytes;
1719
96303081 1720 while (1) {
fa9c0d79
CM
1721 /* out window is just right, lets fill it */
1722 if (window_free >= bytes + empty_size)
1723 break;
1724
1725 node = rb_next(&last->offset_index);
1726 if (!node) {
96303081
JB
1727 if (found_bitmap)
1728 goto again;
fa9c0d79
CM
1729 ret = -ENOSPC;
1730 goto out;
1731 }
1732 next = rb_entry(node, struct btrfs_free_space, offset_index);
1733
96303081
JB
1734 /*
1735 * we found a bitmap, so if this search doesn't result in a
1736 * cluster, we know to go and search again for the bitmaps and
1737 * start looking for space there
1738 */
1739 if (next->bitmap) {
1740 if (!found_bitmap)
1741 offset = next->offset;
1742 found_bitmap = true;
1743 last = next;
1744 continue;
1745 }
1746
fa9c0d79
CM
1747 /*
1748 * we haven't filled the empty size and the window is
1749 * very large. reset and try again
1750 */
c6044801
CM
1751 if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
1752 next->offset - window_start > (bytes + empty_size) * 2) {
fa9c0d79
CM
1753 entry = next;
1754 window_start = entry->offset;
1755 window_free = entry->bytes;
1756 last = entry;
01dea1ef 1757 max_extent = entry->bytes;
fa9c0d79
CM
1758 } else {
1759 last = next;
1760 window_free += next->bytes;
1761 if (entry->bytes > max_extent)
1762 max_extent = entry->bytes;
1763 }
1764 }
1765
1766 cluster->window_start = entry->offset;
1767
1768 /*
1769 * now we've found our entries, pull them out of the free space
1770 * cache and put them into the cluster rbtree
1771 *
1772 * The cluster includes an rbtree, but only uses the offset index
1773 * of each free space cache entry.
1774 */
96303081 1775 while (1) {
fa9c0d79 1776 node = rb_next(&entry->offset_index);
96303081
JB
1777 if (entry->bitmap && node) {
1778 entry = rb_entry(node, struct btrfs_free_space,
1779 offset_index);
1780 continue;
1781 } else if (entry->bitmap && !node) {
1782 break;
1783 }
1784
1785 rb_erase(&entry->offset_index, &block_group->free_space_offset);
fa9c0d79 1786 ret = tree_insert_offset(&cluster->root, entry->offset,
96303081 1787 &entry->offset_index, 0);
fa9c0d79
CM
1788 BUG_ON(ret);
1789
1790 if (!node || entry == last)
1791 break;
1792
1793 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1794 }
96303081 1795
fa9c0d79 1796 cluster->max_size = max_extent;
96303081
JB
1797got_it:
1798 ret = 0;
fa9c0d79
CM
1799 atomic_inc(&block_group->count);
1800 list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
1801 cluster->block_group = block_group;
1802out:
1803 spin_unlock(&cluster->lock);
1804 spin_unlock(&block_group->tree_lock);
1805
1806 return ret;
1807}
1808
1809/*
1810 * simple code to zero out a cluster
1811 */
1812void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
1813{
1814 spin_lock_init(&cluster->lock);
1815 spin_lock_init(&cluster->refill_lock);
6bef4d31 1816 cluster->root = RB_ROOT;
fa9c0d79 1817 cluster->max_size = 0;
96303081 1818 cluster->points_to_bitmap = false;
fa9c0d79
CM
1819 INIT_LIST_HEAD(&cluster->block_group_list);
1820 cluster->block_group = NULL;
1821}
1822