include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit...
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / btrfs / free-space-cache.c
1 /*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include "ctree.h"
24 #include "free-space-cache.h"
25 #include "transaction.h"
26
27 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
28 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
29
30 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
31 u64 offset)
32 {
33 BUG_ON(offset < bitmap_start);
34 offset -= bitmap_start;
35 return (unsigned long)(div64_u64(offset, sectorsize));
36 }
37
38 static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
39 {
40 return (unsigned long)(div64_u64(bytes, sectorsize));
41 }
42
43 static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
44 u64 offset)
45 {
46 u64 bitmap_start;
47 u64 bytes_per_bitmap;
48
49 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
50 bitmap_start = offset - block_group->key.objectid;
51 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
52 bitmap_start *= bytes_per_bitmap;
53 bitmap_start += block_group->key.objectid;
54
55 return bitmap_start;
56 }
57
58 static int tree_insert_offset(struct rb_root *root, u64 offset,
59 struct rb_node *node, int bitmap)
60 {
61 struct rb_node **p = &root->rb_node;
62 struct rb_node *parent = NULL;
63 struct btrfs_free_space *info;
64
65 while (*p) {
66 parent = *p;
67 info = rb_entry(parent, struct btrfs_free_space, offset_index);
68
69 if (offset < info->offset) {
70 p = &(*p)->rb_left;
71 } else if (offset > info->offset) {
72 p = &(*p)->rb_right;
73 } else {
74 /*
75 * we could have a bitmap entry and an extent entry
76 * share the same offset. If this is the case, we want
77 * the extent entry to always be found first if we do a
78 * linear search through the tree, since we want to have
79 * the quickest allocation time, and allocating from an
80 * extent is faster than allocating from a bitmap. So
81 * if we're inserting a bitmap and we find an entry at
82 * this offset, we want to go right, or after this entry
83 * logically. If we are inserting an extent and we've
84 * found a bitmap, we want to go left, or before
85 * logically.
86 */
87 if (bitmap) {
88 WARN_ON(info->bitmap);
89 p = &(*p)->rb_right;
90 } else {
91 WARN_ON(!info->bitmap);
92 p = &(*p)->rb_left;
93 }
94 }
95 }
96
97 rb_link_node(node, parent, p);
98 rb_insert_color(node, root);
99
100 return 0;
101 }
102
103 /*
104 * searches the tree for the given offset.
105 *
106 * fuzzy - If this is set, then we are trying to make an allocation, and we just
107 * want a section that has at least bytes size and comes at or after the given
108 * offset.
109 */
110 static struct btrfs_free_space *
111 tree_search_offset(struct btrfs_block_group_cache *block_group,
112 u64 offset, int bitmap_only, int fuzzy)
113 {
114 struct rb_node *n = block_group->free_space_offset.rb_node;
115 struct btrfs_free_space *entry, *prev = NULL;
116
117 /* find entry that is closest to the 'offset' */
118 while (1) {
119 if (!n) {
120 entry = NULL;
121 break;
122 }
123
124 entry = rb_entry(n, struct btrfs_free_space, offset_index);
125 prev = entry;
126
127 if (offset < entry->offset)
128 n = n->rb_left;
129 else if (offset > entry->offset)
130 n = n->rb_right;
131 else
132 break;
133 }
134
135 if (bitmap_only) {
136 if (!entry)
137 return NULL;
138 if (entry->bitmap)
139 return entry;
140
141 /*
142 * bitmap entry and extent entry may share same offset,
143 * in that case, bitmap entry comes after extent entry.
144 */
145 n = rb_next(n);
146 if (!n)
147 return NULL;
148 entry = rb_entry(n, struct btrfs_free_space, offset_index);
149 if (entry->offset != offset)
150 return NULL;
151
152 WARN_ON(!entry->bitmap);
153 return entry;
154 } else if (entry) {
155 if (entry->bitmap) {
156 /*
157 * if previous extent entry covers the offset,
158 * we should return it instead of the bitmap entry
159 */
160 n = &entry->offset_index;
161 while (1) {
162 n = rb_prev(n);
163 if (!n)
164 break;
165 prev = rb_entry(n, struct btrfs_free_space,
166 offset_index);
167 if (!prev->bitmap) {
168 if (prev->offset + prev->bytes > offset)
169 entry = prev;
170 break;
171 }
172 }
173 }
174 return entry;
175 }
176
177 if (!prev)
178 return NULL;
179
180 /* find last entry before the 'offset' */
181 entry = prev;
182 if (entry->offset > offset) {
183 n = rb_prev(&entry->offset_index);
184 if (n) {
185 entry = rb_entry(n, struct btrfs_free_space,
186 offset_index);
187 BUG_ON(entry->offset > offset);
188 } else {
189 if (fuzzy)
190 return entry;
191 else
192 return NULL;
193 }
194 }
195
196 if (entry->bitmap) {
197 n = &entry->offset_index;
198 while (1) {
199 n = rb_prev(n);
200 if (!n)
201 break;
202 prev = rb_entry(n, struct btrfs_free_space,
203 offset_index);
204 if (!prev->bitmap) {
205 if (prev->offset + prev->bytes > offset)
206 return prev;
207 break;
208 }
209 }
210 if (entry->offset + BITS_PER_BITMAP *
211 block_group->sectorsize > offset)
212 return entry;
213 } else if (entry->offset + entry->bytes > offset)
214 return entry;
215
216 if (!fuzzy)
217 return NULL;
218
219 while (1) {
220 if (entry->bitmap) {
221 if (entry->offset + BITS_PER_BITMAP *
222 block_group->sectorsize > offset)
223 break;
224 } else {
225 if (entry->offset + entry->bytes > offset)
226 break;
227 }
228
229 n = rb_next(&entry->offset_index);
230 if (!n)
231 return NULL;
232 entry = rb_entry(n, struct btrfs_free_space, offset_index);
233 }
234 return entry;
235 }
236
237 static void unlink_free_space(struct btrfs_block_group_cache *block_group,
238 struct btrfs_free_space *info)
239 {
240 rb_erase(&info->offset_index, &block_group->free_space_offset);
241 block_group->free_extents--;
242 block_group->free_space -= info->bytes;
243 }
244
245 static int link_free_space(struct btrfs_block_group_cache *block_group,
246 struct btrfs_free_space *info)
247 {
248 int ret = 0;
249
250 BUG_ON(!info->bitmap && !info->bytes);
251 ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
252 &info->offset_index, (info->bitmap != NULL));
253 if (ret)
254 return ret;
255
256 block_group->free_space += info->bytes;
257 block_group->free_extents++;
258 return ret;
259 }
260
261 static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
262 {
263 u64 max_bytes;
264 u64 bitmap_bytes;
265 u64 extent_bytes;
266
267 /*
268 * The goal is to keep the total amount of memory used per 1gb of space
269 * at or below 32k, so we need to adjust how much memory we allow to be
270 * used by extent based free space tracking
271 */
272 max_bytes = MAX_CACHE_BYTES_PER_GIG *
273 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
274
275 /*
276 * we want to account for 1 more bitmap than what we have so we can make
277 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
278 * we add more bitmaps.
279 */
280 bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
281
282 if (bitmap_bytes >= max_bytes) {
283 block_group->extents_thresh = 0;
284 return;
285 }
286
287 /*
288 * we want the extent entry threshold to always be at most 1/2 the maxw
289 * bytes we can have, or whatever is less than that.
290 */
291 extent_bytes = max_bytes - bitmap_bytes;
292 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
293
294 block_group->extents_thresh =
295 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
296 }
297
298 static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
299 struct btrfs_free_space *info, u64 offset,
300 u64 bytes)
301 {
302 unsigned long start, end;
303 unsigned long i;
304
305 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
306 end = start + bytes_to_bits(bytes, block_group->sectorsize);
307 BUG_ON(end > BITS_PER_BITMAP);
308
309 for (i = start; i < end; i++)
310 clear_bit(i, info->bitmap);
311
312 info->bytes -= bytes;
313 block_group->free_space -= bytes;
314 }
315
316 static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
317 struct btrfs_free_space *info, u64 offset,
318 u64 bytes)
319 {
320 unsigned long start, end;
321 unsigned long i;
322
323 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
324 end = start + bytes_to_bits(bytes, block_group->sectorsize);
325 BUG_ON(end > BITS_PER_BITMAP);
326
327 for (i = start; i < end; i++)
328 set_bit(i, info->bitmap);
329
330 info->bytes += bytes;
331 block_group->free_space += bytes;
332 }
333
334 static int search_bitmap(struct btrfs_block_group_cache *block_group,
335 struct btrfs_free_space *bitmap_info, u64 *offset,
336 u64 *bytes)
337 {
338 unsigned long found_bits = 0;
339 unsigned long bits, i;
340 unsigned long next_zero;
341
342 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
343 max_t(u64, *offset, bitmap_info->offset));
344 bits = bytes_to_bits(*bytes, block_group->sectorsize);
345
346 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
347 i < BITS_PER_BITMAP;
348 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
349 next_zero = find_next_zero_bit(bitmap_info->bitmap,
350 BITS_PER_BITMAP, i);
351 if ((next_zero - i) >= bits) {
352 found_bits = next_zero - i;
353 break;
354 }
355 i = next_zero;
356 }
357
358 if (found_bits) {
359 *offset = (u64)(i * block_group->sectorsize) +
360 bitmap_info->offset;
361 *bytes = (u64)(found_bits) * block_group->sectorsize;
362 return 0;
363 }
364
365 return -1;
366 }
367
368 static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
369 *block_group, u64 *offset,
370 u64 *bytes, int debug)
371 {
372 struct btrfs_free_space *entry;
373 struct rb_node *node;
374 int ret;
375
376 if (!block_group->free_space_offset.rb_node)
377 return NULL;
378
379 entry = tree_search_offset(block_group,
380 offset_to_bitmap(block_group, *offset),
381 0, 1);
382 if (!entry)
383 return NULL;
384
385 for (node = &entry->offset_index; node; node = rb_next(node)) {
386 entry = rb_entry(node, struct btrfs_free_space, offset_index);
387 if (entry->bytes < *bytes)
388 continue;
389
390 if (entry->bitmap) {
391 ret = search_bitmap(block_group, entry, offset, bytes);
392 if (!ret)
393 return entry;
394 continue;
395 }
396
397 *offset = entry->offset;
398 *bytes = entry->bytes;
399 return entry;
400 }
401
402 return NULL;
403 }
404
405 static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
406 struct btrfs_free_space *info, u64 offset)
407 {
408 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
409 int max_bitmaps = (int)div64_u64(block_group->key.offset +
410 bytes_per_bg - 1, bytes_per_bg);
411 BUG_ON(block_group->total_bitmaps >= max_bitmaps);
412
413 info->offset = offset_to_bitmap(block_group, offset);
414 info->bytes = 0;
415 link_free_space(block_group, info);
416 block_group->total_bitmaps++;
417
418 recalculate_thresholds(block_group);
419 }
420
421 static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
422 struct btrfs_free_space *bitmap_info,
423 u64 *offset, u64 *bytes)
424 {
425 u64 end;
426 u64 search_start, search_bytes;
427 int ret;
428
429 again:
430 end = bitmap_info->offset +
431 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
432
433 /*
434 * XXX - this can go away after a few releases.
435 *
436 * since the only user of btrfs_remove_free_space is the tree logging
437 * stuff, and the only way to test that is under crash conditions, we
438 * want to have this debug stuff here just in case somethings not
439 * working. Search the bitmap for the space we are trying to use to
440 * make sure its actually there. If its not there then we need to stop
441 * because something has gone wrong.
442 */
443 search_start = *offset;
444 search_bytes = *bytes;
445 ret = search_bitmap(block_group, bitmap_info, &search_start,
446 &search_bytes);
447 BUG_ON(ret < 0 || search_start != *offset);
448
449 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
450 bitmap_clear_bits(block_group, bitmap_info, *offset,
451 end - *offset + 1);
452 *bytes -= end - *offset + 1;
453 *offset = end + 1;
454 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
455 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
456 *bytes = 0;
457 }
458
459 if (*bytes) {
460 struct rb_node *next = rb_next(&bitmap_info->offset_index);
461 if (!bitmap_info->bytes) {
462 unlink_free_space(block_group, bitmap_info);
463 kfree(bitmap_info->bitmap);
464 kfree(bitmap_info);
465 block_group->total_bitmaps--;
466 recalculate_thresholds(block_group);
467 }
468
469 /*
470 * no entry after this bitmap, but we still have bytes to
471 * remove, so something has gone wrong.
472 */
473 if (!next)
474 return -EINVAL;
475
476 bitmap_info = rb_entry(next, struct btrfs_free_space,
477 offset_index);
478
479 /*
480 * if the next entry isn't a bitmap we need to return to let the
481 * extent stuff do its work.
482 */
483 if (!bitmap_info->bitmap)
484 return -EAGAIN;
485
486 /*
487 * Ok the next item is a bitmap, but it may not actually hold
488 * the information for the rest of this free space stuff, so
489 * look for it, and if we don't find it return so we can try
490 * everything over again.
491 */
492 search_start = *offset;
493 search_bytes = *bytes;
494 ret = search_bitmap(block_group, bitmap_info, &search_start,
495 &search_bytes);
496 if (ret < 0 || search_start != *offset)
497 return -EAGAIN;
498
499 goto again;
500 } else if (!bitmap_info->bytes) {
501 unlink_free_space(block_group, bitmap_info);
502 kfree(bitmap_info->bitmap);
503 kfree(bitmap_info);
504 block_group->total_bitmaps--;
505 recalculate_thresholds(block_group);
506 }
507
508 return 0;
509 }
510
511 static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
512 struct btrfs_free_space *info)
513 {
514 struct btrfs_free_space *bitmap_info;
515 int added = 0;
516 u64 bytes, offset, end;
517 int ret;
518
519 /*
520 * If we are below the extents threshold then we can add this as an
521 * extent, and don't have to deal with the bitmap
522 */
523 if (block_group->free_extents < block_group->extents_thresh &&
524 info->bytes > block_group->sectorsize * 4)
525 return 0;
526
527 /*
528 * some block groups are so tiny they can't be enveloped by a bitmap, so
529 * don't even bother to create a bitmap for this
530 */
531 if (BITS_PER_BITMAP * block_group->sectorsize >
532 block_group->key.offset)
533 return 0;
534
535 bytes = info->bytes;
536 offset = info->offset;
537
538 again:
539 bitmap_info = tree_search_offset(block_group,
540 offset_to_bitmap(block_group, offset),
541 1, 0);
542 if (!bitmap_info) {
543 BUG_ON(added);
544 goto new_bitmap;
545 }
546
547 end = bitmap_info->offset +
548 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
549
550 if (offset >= bitmap_info->offset && offset + bytes > end) {
551 bitmap_set_bits(block_group, bitmap_info, offset,
552 end - offset);
553 bytes -= end - offset;
554 offset = end;
555 added = 0;
556 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
557 bitmap_set_bits(block_group, bitmap_info, offset, bytes);
558 bytes = 0;
559 } else {
560 BUG();
561 }
562
563 if (!bytes) {
564 ret = 1;
565 goto out;
566 } else
567 goto again;
568
569 new_bitmap:
570 if (info && info->bitmap) {
571 add_new_bitmap(block_group, info, offset);
572 added = 1;
573 info = NULL;
574 goto again;
575 } else {
576 spin_unlock(&block_group->tree_lock);
577
578 /* no pre-allocated info, allocate a new one */
579 if (!info) {
580 info = kzalloc(sizeof(struct btrfs_free_space),
581 GFP_NOFS);
582 if (!info) {
583 spin_lock(&block_group->tree_lock);
584 ret = -ENOMEM;
585 goto out;
586 }
587 }
588
589 /* allocate the bitmap */
590 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
591 spin_lock(&block_group->tree_lock);
592 if (!info->bitmap) {
593 ret = -ENOMEM;
594 goto out;
595 }
596 goto again;
597 }
598
599 out:
600 if (info) {
601 if (info->bitmap)
602 kfree(info->bitmap);
603 kfree(info);
604 }
605
606 return ret;
607 }
608
609 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
610 u64 offset, u64 bytes)
611 {
612 struct btrfs_free_space *right_info = NULL;
613 struct btrfs_free_space *left_info = NULL;
614 struct btrfs_free_space *info = NULL;
615 int ret = 0;
616
617 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
618 if (!info)
619 return -ENOMEM;
620
621 info->offset = offset;
622 info->bytes = bytes;
623
624 spin_lock(&block_group->tree_lock);
625
626 /*
627 * first we want to see if there is free space adjacent to the range we
628 * are adding, if there is remove that struct and add a new one to
629 * cover the entire range
630 */
631 right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
632 if (right_info && rb_prev(&right_info->offset_index))
633 left_info = rb_entry(rb_prev(&right_info->offset_index),
634 struct btrfs_free_space, offset_index);
635 else
636 left_info = tree_search_offset(block_group, offset - 1, 0, 0);
637
638 /*
639 * If there was no extent directly to the left or right of this new
640 * extent then we know we're going to have to allocate a new extent, so
641 * before we do that see if we need to drop this into a bitmap
642 */
643 if ((!left_info || left_info->bitmap) &&
644 (!right_info || right_info->bitmap)) {
645 ret = insert_into_bitmap(block_group, info);
646
647 if (ret < 0) {
648 goto out;
649 } else if (ret) {
650 ret = 0;
651 goto out;
652 }
653 }
654
655 if (right_info && !right_info->bitmap) {
656 unlink_free_space(block_group, right_info);
657 info->bytes += right_info->bytes;
658 kfree(right_info);
659 }
660
661 if (left_info && !left_info->bitmap &&
662 left_info->offset + left_info->bytes == offset) {
663 unlink_free_space(block_group, left_info);
664 info->offset = left_info->offset;
665 info->bytes += left_info->bytes;
666 kfree(left_info);
667 }
668
669 ret = link_free_space(block_group, info);
670 if (ret)
671 kfree(info);
672 out:
673 spin_unlock(&block_group->tree_lock);
674
675 if (ret) {
676 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
677 BUG_ON(ret == -EEXIST);
678 }
679
680 return ret;
681 }
682
683 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
684 u64 offset, u64 bytes)
685 {
686 struct btrfs_free_space *info;
687 struct btrfs_free_space *next_info = NULL;
688 int ret = 0;
689
690 spin_lock(&block_group->tree_lock);
691
692 again:
693 info = tree_search_offset(block_group, offset, 0, 0);
694 if (!info) {
695 /*
696 * oops didn't find an extent that matched the space we wanted
697 * to remove, look for a bitmap instead
698 */
699 info = tree_search_offset(block_group,
700 offset_to_bitmap(block_group, offset),
701 1, 0);
702 if (!info) {
703 WARN_ON(1);
704 goto out_lock;
705 }
706 }
707
708 if (info->bytes < bytes && rb_next(&info->offset_index)) {
709 u64 end;
710 next_info = rb_entry(rb_next(&info->offset_index),
711 struct btrfs_free_space,
712 offset_index);
713
714 if (next_info->bitmap)
715 end = next_info->offset + BITS_PER_BITMAP *
716 block_group->sectorsize - 1;
717 else
718 end = next_info->offset + next_info->bytes;
719
720 if (next_info->bytes < bytes ||
721 next_info->offset > offset || offset > end) {
722 printk(KERN_CRIT "Found free space at %llu, size %llu,"
723 " trying to use %llu\n",
724 (unsigned long long)info->offset,
725 (unsigned long long)info->bytes,
726 (unsigned long long)bytes);
727 WARN_ON(1);
728 ret = -EINVAL;
729 goto out_lock;
730 }
731
732 info = next_info;
733 }
734
735 if (info->bytes == bytes) {
736 unlink_free_space(block_group, info);
737 if (info->bitmap) {
738 kfree(info->bitmap);
739 block_group->total_bitmaps--;
740 }
741 kfree(info);
742 goto out_lock;
743 }
744
745 if (!info->bitmap && info->offset == offset) {
746 unlink_free_space(block_group, info);
747 info->offset += bytes;
748 info->bytes -= bytes;
749 link_free_space(block_group, info);
750 goto out_lock;
751 }
752
753 if (!info->bitmap && info->offset <= offset &&
754 info->offset + info->bytes >= offset + bytes) {
755 u64 old_start = info->offset;
756 /*
757 * we're freeing space in the middle of the info,
758 * this can happen during tree log replay
759 *
760 * first unlink the old info and then
761 * insert it again after the hole we're creating
762 */
763 unlink_free_space(block_group, info);
764 if (offset + bytes < info->offset + info->bytes) {
765 u64 old_end = info->offset + info->bytes;
766
767 info->offset = offset + bytes;
768 info->bytes = old_end - info->offset;
769 ret = link_free_space(block_group, info);
770 WARN_ON(ret);
771 if (ret)
772 goto out_lock;
773 } else {
774 /* the hole we're creating ends at the end
775 * of the info struct, just free the info
776 */
777 kfree(info);
778 }
779 spin_unlock(&block_group->tree_lock);
780
781 /* step two, insert a new info struct to cover
782 * anything before the hole
783 */
784 ret = btrfs_add_free_space(block_group, old_start,
785 offset - old_start);
786 WARN_ON(ret);
787 goto out;
788 }
789
790 ret = remove_from_bitmap(block_group, info, &offset, &bytes);
791 if (ret == -EAGAIN)
792 goto again;
793 BUG_ON(ret);
794 out_lock:
795 spin_unlock(&block_group->tree_lock);
796 out:
797 return ret;
798 }
799
800 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
801 u64 bytes)
802 {
803 struct btrfs_free_space *info;
804 struct rb_node *n;
805 int count = 0;
806
807 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
808 info = rb_entry(n, struct btrfs_free_space, offset_index);
809 if (info->bytes >= bytes)
810 count++;
811 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
812 (unsigned long long)info->offset,
813 (unsigned long long)info->bytes,
814 (info->bitmap) ? "yes" : "no");
815 }
816 printk(KERN_INFO "block group has cluster?: %s\n",
817 list_empty(&block_group->cluster_list) ? "no" : "yes");
818 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
819 "\n", count);
820 }
821
822 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
823 {
824 struct btrfs_free_space *info;
825 struct rb_node *n;
826 u64 ret = 0;
827
828 for (n = rb_first(&block_group->free_space_offset); n;
829 n = rb_next(n)) {
830 info = rb_entry(n, struct btrfs_free_space, offset_index);
831 ret += info->bytes;
832 }
833
834 return ret;
835 }
836
837 /*
838 * for a given cluster, put all of its extents back into the free
839 * space cache. If the block group passed doesn't match the block group
840 * pointed to by the cluster, someone else raced in and freed the
841 * cluster already. In that case, we just return without changing anything
842 */
843 static int
844 __btrfs_return_cluster_to_free_space(
845 struct btrfs_block_group_cache *block_group,
846 struct btrfs_free_cluster *cluster)
847 {
848 struct btrfs_free_space *entry;
849 struct rb_node *node;
850 bool bitmap;
851
852 spin_lock(&cluster->lock);
853 if (cluster->block_group != block_group)
854 goto out;
855
856 bitmap = cluster->points_to_bitmap;
857 cluster->block_group = NULL;
858 cluster->window_start = 0;
859 list_del_init(&cluster->block_group_list);
860 cluster->points_to_bitmap = false;
861
862 if (bitmap)
863 goto out;
864
865 node = rb_first(&cluster->root);
866 while (node) {
867 entry = rb_entry(node, struct btrfs_free_space, offset_index);
868 node = rb_next(&entry->offset_index);
869 rb_erase(&entry->offset_index, &cluster->root);
870 BUG_ON(entry->bitmap);
871 tree_insert_offset(&block_group->free_space_offset,
872 entry->offset, &entry->offset_index, 0);
873 }
874 cluster->root = RB_ROOT;
875
876 out:
877 spin_unlock(&cluster->lock);
878 btrfs_put_block_group(block_group);
879 return 0;
880 }
881
882 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
883 {
884 struct btrfs_free_space *info;
885 struct rb_node *node;
886 struct btrfs_free_cluster *cluster;
887 struct list_head *head;
888
889 spin_lock(&block_group->tree_lock);
890 while ((head = block_group->cluster_list.next) !=
891 &block_group->cluster_list) {
892 cluster = list_entry(head, struct btrfs_free_cluster,
893 block_group_list);
894
895 WARN_ON(cluster->block_group != block_group);
896 __btrfs_return_cluster_to_free_space(block_group, cluster);
897 if (need_resched()) {
898 spin_unlock(&block_group->tree_lock);
899 cond_resched();
900 spin_lock(&block_group->tree_lock);
901 }
902 }
903
904 while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
905 info = rb_entry(node, struct btrfs_free_space, offset_index);
906 unlink_free_space(block_group, info);
907 if (info->bitmap)
908 kfree(info->bitmap);
909 kfree(info);
910 if (need_resched()) {
911 spin_unlock(&block_group->tree_lock);
912 cond_resched();
913 spin_lock(&block_group->tree_lock);
914 }
915 }
916
917 spin_unlock(&block_group->tree_lock);
918 }
919
920 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
921 u64 offset, u64 bytes, u64 empty_size)
922 {
923 struct btrfs_free_space *entry = NULL;
924 u64 bytes_search = bytes + empty_size;
925 u64 ret = 0;
926
927 spin_lock(&block_group->tree_lock);
928 entry = find_free_space(block_group, &offset, &bytes_search, 0);
929 if (!entry)
930 goto out;
931
932 ret = offset;
933 if (entry->bitmap) {
934 bitmap_clear_bits(block_group, entry, offset, bytes);
935 if (!entry->bytes) {
936 unlink_free_space(block_group, entry);
937 kfree(entry->bitmap);
938 kfree(entry);
939 block_group->total_bitmaps--;
940 recalculate_thresholds(block_group);
941 }
942 } else {
943 unlink_free_space(block_group, entry);
944 entry->offset += bytes;
945 entry->bytes -= bytes;
946 if (!entry->bytes)
947 kfree(entry);
948 else
949 link_free_space(block_group, entry);
950 }
951
952 out:
953 spin_unlock(&block_group->tree_lock);
954
955 return ret;
956 }
957
958 /*
959 * given a cluster, put all of its extents back into the free space
960 * cache. If a block group is passed, this function will only free
961 * a cluster that belongs to the passed block group.
962 *
963 * Otherwise, it'll get a reference on the block group pointed to by the
964 * cluster and remove the cluster from it.
965 */
966 int btrfs_return_cluster_to_free_space(
967 struct btrfs_block_group_cache *block_group,
968 struct btrfs_free_cluster *cluster)
969 {
970 int ret;
971
972 /* first, get a safe pointer to the block group */
973 spin_lock(&cluster->lock);
974 if (!block_group) {
975 block_group = cluster->block_group;
976 if (!block_group) {
977 spin_unlock(&cluster->lock);
978 return 0;
979 }
980 } else if (cluster->block_group != block_group) {
981 /* someone else has already freed it don't redo their work */
982 spin_unlock(&cluster->lock);
983 return 0;
984 }
985 atomic_inc(&block_group->count);
986 spin_unlock(&cluster->lock);
987
988 /* now return any extents the cluster had on it */
989 spin_lock(&block_group->tree_lock);
990 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
991 spin_unlock(&block_group->tree_lock);
992
993 /* finally drop our ref */
994 btrfs_put_block_group(block_group);
995 return ret;
996 }
997
998 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
999 struct btrfs_free_cluster *cluster,
1000 u64 bytes, u64 min_start)
1001 {
1002 struct btrfs_free_space *entry;
1003 int err;
1004 u64 search_start = cluster->window_start;
1005 u64 search_bytes = bytes;
1006 u64 ret = 0;
1007
1008 spin_lock(&block_group->tree_lock);
1009 spin_lock(&cluster->lock);
1010
1011 if (!cluster->points_to_bitmap)
1012 goto out;
1013
1014 if (cluster->block_group != block_group)
1015 goto out;
1016
1017 /*
1018 * search_start is the beginning of the bitmap, but at some point it may
1019 * be a good idea to point to the actual start of the free area in the
1020 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1021 * to 1 to make sure we get the bitmap entry
1022 */
1023 entry = tree_search_offset(block_group,
1024 offset_to_bitmap(block_group, search_start),
1025 1, 0);
1026 if (!entry || !entry->bitmap)
1027 goto out;
1028
1029 search_start = min_start;
1030 search_bytes = bytes;
1031
1032 err = search_bitmap(block_group, entry, &search_start,
1033 &search_bytes);
1034 if (err)
1035 goto out;
1036
1037 ret = search_start;
1038 bitmap_clear_bits(block_group, entry, ret, bytes);
1039 out:
1040 spin_unlock(&cluster->lock);
1041 spin_unlock(&block_group->tree_lock);
1042
1043 return ret;
1044 }
1045
1046 /*
1047 * given a cluster, try to allocate 'bytes' from it, returns 0
1048 * if it couldn't find anything suitably large, or a logical disk offset
1049 * if things worked out
1050 */
1051 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1052 struct btrfs_free_cluster *cluster, u64 bytes,
1053 u64 min_start)
1054 {
1055 struct btrfs_free_space *entry = NULL;
1056 struct rb_node *node;
1057 u64 ret = 0;
1058
1059 if (cluster->points_to_bitmap)
1060 return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1061 min_start);
1062
1063 spin_lock(&cluster->lock);
1064 if (bytes > cluster->max_size)
1065 goto out;
1066
1067 if (cluster->block_group != block_group)
1068 goto out;
1069
1070 node = rb_first(&cluster->root);
1071 if (!node)
1072 goto out;
1073
1074 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1075
1076 while(1) {
1077 if (entry->bytes < bytes || entry->offset < min_start) {
1078 struct rb_node *node;
1079
1080 node = rb_next(&entry->offset_index);
1081 if (!node)
1082 break;
1083 entry = rb_entry(node, struct btrfs_free_space,
1084 offset_index);
1085 continue;
1086 }
1087 ret = entry->offset;
1088
1089 entry->offset += bytes;
1090 entry->bytes -= bytes;
1091
1092 if (entry->bytes == 0) {
1093 rb_erase(&entry->offset_index, &cluster->root);
1094 kfree(entry);
1095 }
1096 break;
1097 }
1098 out:
1099 spin_unlock(&cluster->lock);
1100
1101 return ret;
1102 }
1103
1104 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1105 struct btrfs_free_space *entry,
1106 struct btrfs_free_cluster *cluster,
1107 u64 offset, u64 bytes, u64 min_bytes)
1108 {
1109 unsigned long next_zero;
1110 unsigned long i;
1111 unsigned long search_bits;
1112 unsigned long total_bits;
1113 unsigned long found_bits;
1114 unsigned long start = 0;
1115 unsigned long total_found = 0;
1116 bool found = false;
1117
1118 i = offset_to_bit(entry->offset, block_group->sectorsize,
1119 max_t(u64, offset, entry->offset));
1120 search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1121 total_bits = bytes_to_bits(bytes, block_group->sectorsize);
1122
1123 again:
1124 found_bits = 0;
1125 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
1126 i < BITS_PER_BITMAP;
1127 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
1128 next_zero = find_next_zero_bit(entry->bitmap,
1129 BITS_PER_BITMAP, i);
1130 if (next_zero - i >= search_bits) {
1131 found_bits = next_zero - i;
1132 break;
1133 }
1134 i = next_zero;
1135 }
1136
1137 if (!found_bits)
1138 return -1;
1139
1140 if (!found) {
1141 start = i;
1142 found = true;
1143 }
1144
1145 total_found += found_bits;
1146
1147 if (cluster->max_size < found_bits * block_group->sectorsize)
1148 cluster->max_size = found_bits * block_group->sectorsize;
1149
1150 if (total_found < total_bits) {
1151 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
1152 if (i - start > total_bits * 2) {
1153 total_found = 0;
1154 cluster->max_size = 0;
1155 found = false;
1156 }
1157 goto again;
1158 }
1159
1160 cluster->window_start = start * block_group->sectorsize +
1161 entry->offset;
1162 cluster->points_to_bitmap = true;
1163
1164 return 0;
1165 }
1166
1167 /*
1168 * here we try to find a cluster of blocks in a block group. The goal
1169 * is to find at least bytes free and up to empty_size + bytes free.
1170 * We might not find them all in one contiguous area.
1171 *
1172 * returns zero and sets up cluster if things worked out, otherwise
1173 * it returns -enospc
1174 */
1175 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
1176 struct btrfs_root *root,
1177 struct btrfs_block_group_cache *block_group,
1178 struct btrfs_free_cluster *cluster,
1179 u64 offset, u64 bytes, u64 empty_size)
1180 {
1181 struct btrfs_free_space *entry = NULL;
1182 struct rb_node *node;
1183 struct btrfs_free_space *next;
1184 struct btrfs_free_space *last = NULL;
1185 u64 min_bytes;
1186 u64 window_start;
1187 u64 window_free;
1188 u64 max_extent = 0;
1189 bool found_bitmap = false;
1190 int ret;
1191
1192 /* for metadata, allow allocates with more holes */
1193 if (btrfs_test_opt(root, SSD_SPREAD)) {
1194 min_bytes = bytes + empty_size;
1195 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
1196 /*
1197 * we want to do larger allocations when we are
1198 * flushing out the delayed refs, it helps prevent
1199 * making more work as we go along.
1200 */
1201 if (trans->transaction->delayed_refs.flushing)
1202 min_bytes = max(bytes, (bytes + empty_size) >> 1);
1203 else
1204 min_bytes = max(bytes, (bytes + empty_size) >> 4);
1205 } else
1206 min_bytes = max(bytes, (bytes + empty_size) >> 2);
1207
1208 spin_lock(&block_group->tree_lock);
1209 spin_lock(&cluster->lock);
1210
1211 /* someone already found a cluster, hooray */
1212 if (cluster->block_group) {
1213 ret = 0;
1214 goto out;
1215 }
1216 again:
1217 entry = tree_search_offset(block_group, offset, found_bitmap, 1);
1218 if (!entry) {
1219 ret = -ENOSPC;
1220 goto out;
1221 }
1222
1223 /*
1224 * If found_bitmap is true, we exhausted our search for extent entries,
1225 * and we just want to search all of the bitmaps that we can find, and
1226 * ignore any extent entries we find.
1227 */
1228 while (entry->bitmap || found_bitmap ||
1229 (!entry->bitmap && entry->bytes < min_bytes)) {
1230 struct rb_node *node = rb_next(&entry->offset_index);
1231
1232 if (entry->bitmap && entry->bytes > bytes + empty_size) {
1233 ret = btrfs_bitmap_cluster(block_group, entry, cluster,
1234 offset, bytes + empty_size,
1235 min_bytes);
1236 if (!ret)
1237 goto got_it;
1238 }
1239
1240 if (!node) {
1241 ret = -ENOSPC;
1242 goto out;
1243 }
1244 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1245 }
1246
1247 /*
1248 * We already searched all the extent entries from the passed in offset
1249 * to the end and didn't find enough space for the cluster, and we also
1250 * didn't find any bitmaps that met our criteria, just go ahead and exit
1251 */
1252 if (found_bitmap) {
1253 ret = -ENOSPC;
1254 goto out;
1255 }
1256
1257 cluster->points_to_bitmap = false;
1258 window_start = entry->offset;
1259 window_free = entry->bytes;
1260 last = entry;
1261 max_extent = entry->bytes;
1262
1263 while (1) {
1264 /* out window is just right, lets fill it */
1265 if (window_free >= bytes + empty_size)
1266 break;
1267
1268 node = rb_next(&last->offset_index);
1269 if (!node) {
1270 if (found_bitmap)
1271 goto again;
1272 ret = -ENOSPC;
1273 goto out;
1274 }
1275 next = rb_entry(node, struct btrfs_free_space, offset_index);
1276
1277 /*
1278 * we found a bitmap, so if this search doesn't result in a
1279 * cluster, we know to go and search again for the bitmaps and
1280 * start looking for space there
1281 */
1282 if (next->bitmap) {
1283 if (!found_bitmap)
1284 offset = next->offset;
1285 found_bitmap = true;
1286 last = next;
1287 continue;
1288 }
1289
1290 /*
1291 * we haven't filled the empty size and the window is
1292 * very large. reset and try again
1293 */
1294 if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
1295 next->offset - window_start > (bytes + empty_size) * 2) {
1296 entry = next;
1297 window_start = entry->offset;
1298 window_free = entry->bytes;
1299 last = entry;
1300 max_extent = entry->bytes;
1301 } else {
1302 last = next;
1303 window_free += next->bytes;
1304 if (entry->bytes > max_extent)
1305 max_extent = entry->bytes;
1306 }
1307 }
1308
1309 cluster->window_start = entry->offset;
1310
1311 /*
1312 * now we've found our entries, pull them out of the free space
1313 * cache and put them into the cluster rbtree
1314 *
1315 * The cluster includes an rbtree, but only uses the offset index
1316 * of each free space cache entry.
1317 */
1318 while (1) {
1319 node = rb_next(&entry->offset_index);
1320 if (entry->bitmap && node) {
1321 entry = rb_entry(node, struct btrfs_free_space,
1322 offset_index);
1323 continue;
1324 } else if (entry->bitmap && !node) {
1325 break;
1326 }
1327
1328 rb_erase(&entry->offset_index, &block_group->free_space_offset);
1329 ret = tree_insert_offset(&cluster->root, entry->offset,
1330 &entry->offset_index, 0);
1331 BUG_ON(ret);
1332
1333 if (!node || entry == last)
1334 break;
1335
1336 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1337 }
1338
1339 cluster->max_size = max_extent;
1340 got_it:
1341 ret = 0;
1342 atomic_inc(&block_group->count);
1343 list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
1344 cluster->block_group = block_group;
1345 out:
1346 spin_unlock(&cluster->lock);
1347 spin_unlock(&block_group->tree_lock);
1348
1349 return ret;
1350 }
1351
1352 /*
1353 * simple code to zero out a cluster
1354 */
1355 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
1356 {
1357 spin_lock_init(&cluster->lock);
1358 spin_lock_init(&cluster->refill_lock);
1359 cluster->root = RB_ROOT;
1360 cluster->max_size = 0;
1361 cluster->points_to_bitmap = false;
1362 INIT_LIST_HEAD(&cluster->block_group_list);
1363 cluster->block_group = NULL;
1364 }
1365