Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 May 2013 20:07:40 +0000 (13:07 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 May 2013 20:07:40 +0000 (13:07 -0700)
Pull btrfs update from Chris Mason:
 "These are mostly fixes.  The biggest exceptions are Josef's skinny
  extents and Jan Schmidt's code to rebuild our quota indexes if they
  get out of sync (or you enable quotas on an existing filesystem).

  The skinny extents are off by default because they are a new variation
  on the extent allocation tree format.  btrfstune -x enables them, and
  the new format makes the extent allocation tree about 30% smaller.

  I rebased this a few days ago to rework Dave Sterba's crc checks on
  the super block, but almost all of these go back to rc6, since I
  though 3.9 was due any minute.

  The biggest missing fix is the tracepoint bug that was hit late in
  3.9.  I ran into problems with that in overnight testing and I'm still
  tracking it down.  I'll definitely have that fixed for rc2."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (101 commits)
  Btrfs: allow superblock mismatch from older mkfs
  btrfs: enhance superblock checks
  btrfs: fix misleading variable name for flags
  btrfs: use unsigned long type for extent state bits
  Btrfs: improve the loop of scrub_stripe
  btrfs: read entire device info under lock
  btrfs: remove unused gfp mask parameter from release_extent_buffer callchain
  btrfs: handle errors returned from get_tree_block_key
  btrfs: make static code static & remove dead code
  Btrfs: deal with errors in write_dev_supers
  Btrfs: remove almost all of the BUG()'s from tree-log.c
  Btrfs: deal with free space cache errors while replaying log
  Btrfs: automatic rescan after "quota enable" command
  Btrfs: rescan for qgroups
  Btrfs: split btrfs_qgroup_account_ref into four functions
  Btrfs: allocate new chunks if the space is not enough for global rsv
  Btrfs: separate sequence numbers for delayed ref tracking and tree mod log
  btrfs: move leak debug code to functions
  Btrfs: return free space in cow error path
  Btrfs: set UUID in root_item for created trees
  ...

1  2 
fs/btrfs/extent_io.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/volumes.c

diff --combined fs/btrfs/extent_io.c
index 73f2bfe3ac9302091608beae85b4aecf28622240,d2ac518f90e41229aa6fdbca5c2f3a5dc26466c5..32d67a822e93c0cf53f5c0409bd73ff863bf90bc
  static struct kmem_cache *extent_state_cache;
  static struct kmem_cache *extent_buffer_cache;
  
+ #ifdef CONFIG_BTRFS_DEBUG
  static LIST_HEAD(buffers);
  static LIST_HEAD(states);
  
- #define LEAK_DEBUG 0
- #if LEAK_DEBUG
  static DEFINE_SPINLOCK(leak_lock);
+ static inline
+ void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
+ {
+       unsigned long flags;
+       spin_lock_irqsave(&leak_lock, flags);
+       list_add(new, head);
+       spin_unlock_irqrestore(&leak_lock, flags);
+ }
+ static inline
+ void btrfs_leak_debug_del(struct list_head *entry)
+ {
+       unsigned long flags;
+       spin_lock_irqsave(&leak_lock, flags);
+       list_del(entry);
+       spin_unlock_irqrestore(&leak_lock, flags);
+ }
+ static inline
+ void btrfs_leak_debug_check(void)
+ {
+       struct extent_state *state;
+       struct extent_buffer *eb;
+       while (!list_empty(&states)) {
+               state = list_entry(states.next, struct extent_state, leak_list);
+               printk(KERN_ERR "btrfs state leak: start %llu end %llu "
+                      "state %lu in tree %p refs %d\n",
+                      (unsigned long long)state->start,
+                      (unsigned long long)state->end,
+                      state->state, state->tree, atomic_read(&state->refs));
+               list_del(&state->leak_list);
+               kmem_cache_free(extent_state_cache, state);
+       }
+       while (!list_empty(&buffers)) {
+               eb = list_entry(buffers.next, struct extent_buffer, leak_list);
+               printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
+                      "refs %d\n", (unsigned long long)eb->start,
+                      eb->len, atomic_read(&eb->refs));
+               list_del(&eb->leak_list);
+               kmem_cache_free(extent_buffer_cache, eb);
+       }
+ }
+ #else
+ #define btrfs_leak_debug_add(new, head)       do {} while (0)
+ #define btrfs_leak_debug_del(entry)   do {} while (0)
+ #define btrfs_leak_debug_check()      do {} while (0)
  #endif
  
  #define BUFFER_LRU_MAX 64
@@@ -84,29 -134,7 +134,7 @@@ free_state_cache
  
  void extent_io_exit(void)
  {
-       struct extent_state *state;
-       struct extent_buffer *eb;
-       while (!list_empty(&states)) {
-               state = list_entry(states.next, struct extent_state, leak_list);
-               printk(KERN_ERR "btrfs state leak: start %llu end %llu "
-                      "state %lu in tree %p refs %d\n",
-                      (unsigned long long)state->start,
-                      (unsigned long long)state->end,
-                      state->state, state->tree, atomic_read(&state->refs));
-               list_del(&state->leak_list);
-               kmem_cache_free(extent_state_cache, state);
-       }
-       while (!list_empty(&buffers)) {
-               eb = list_entry(buffers.next, struct extent_buffer, leak_list);
-               printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
-                      "refs %d\n", (unsigned long long)eb->start,
-                      eb->len, atomic_read(&eb->refs));
-               list_del(&eb->leak_list);
-               kmem_cache_free(extent_buffer_cache, eb);
-       }
+       btrfs_leak_debug_check();
  
        /*
         * Make sure all delayed rcu free are flushed before we
@@@ -134,9 -162,6 +162,6 @@@ void extent_io_tree_init(struct extent_
  static struct extent_state *alloc_extent_state(gfp_t mask)
  {
        struct extent_state *state;
- #if LEAK_DEBUG
-       unsigned long flags;
- #endif
  
        state = kmem_cache_alloc(extent_state_cache, mask);
        if (!state)
        state->state = 0;
        state->private = 0;
        state->tree = NULL;
- #if LEAK_DEBUG
-       spin_lock_irqsave(&leak_lock, flags);
-       list_add(&state->leak_list, &states);
-       spin_unlock_irqrestore(&leak_lock, flags);
- #endif
+       btrfs_leak_debug_add(&state->leak_list, &states);
        atomic_set(&state->refs, 1);
        init_waitqueue_head(&state->wq);
        trace_alloc_extent_state(state, mask, _RET_IP_);
@@@ -160,15 -181,8 +181,8 @@@ void free_extent_state(struct extent_st
        if (!state)
                return;
        if (atomic_dec_and_test(&state->refs)) {
- #if LEAK_DEBUG
-               unsigned long flags;
- #endif
                WARN_ON(state->tree);
- #if LEAK_DEBUG
-               spin_lock_irqsave(&leak_lock, flags);
-               list_del(&state->leak_list);
-               spin_unlock_irqrestore(&leak_lock, flags);
- #endif
+               btrfs_leak_debug_del(&state->leak_list);
                trace_free_extent_state(state, _RET_IP_);
                kmem_cache_free(extent_state_cache, state);
        }
@@@ -308,21 -322,21 +322,21 @@@ static void merge_state(struct extent_i
  }
  
  static void set_state_cb(struct extent_io_tree *tree,
-                        struct extent_state *state, int *bits)
+                        struct extent_state *state, unsigned long *bits)
  {
        if (tree->ops && tree->ops->set_bit_hook)
                tree->ops->set_bit_hook(tree->mapping->host, state, bits);
  }
  
  static void clear_state_cb(struct extent_io_tree *tree,
-                          struct extent_state *state, int *bits)
+                          struct extent_state *state, unsigned long *bits)
  {
        if (tree->ops && tree->ops->clear_bit_hook)
                tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
  }
  
  static void set_state_bits(struct extent_io_tree *tree,
-                          struct extent_state *state, int *bits);
+                          struct extent_state *state, unsigned long *bits);
  
  /*
   * insert an extent_state struct into the tree.  'bits' are set on the
   */
  static int insert_state(struct extent_io_tree *tree,
                        struct extent_state *state, u64 start, u64 end,
-                       int *bits)
+                       unsigned long *bits)
  {
        struct rb_node *node;
  
@@@ -424,10 -438,10 +438,10 @@@ static struct extent_state *next_state(
   */
  static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
                                            struct extent_state *state,
-                                           int *bits, int wake)
+                                           unsigned long *bits, int wake)
  {
        struct extent_state *next;
-       int bits_to_clear = *bits & ~EXTENT_CTLBITS;
+       unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS;
  
        if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
                u64 range = state->end - state->start + 1;
@@@ -463,7 -477,7 +477,7 @@@ alloc_extent_state_atomic(struct extent
        return prealloc;
  }
  
- void extent_io_tree_panic(struct extent_io_tree *tree, int err)
static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
  {
        btrfs_panic(tree_fs_info(tree), err, "Locking error: "
                    "Extent tree was modified by another "
   * This takes the tree lock, and returns 0 on success and < 0 on error.
   */
  int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                    int bits, int wake, int delete,
+                    unsigned long bits, int wake, int delete,
                     struct extent_state **cached_state,
                     gfp_t mask)
  {
@@@ -644,7 -658,8 +658,8 @@@ static void wait_on_state(struct extent
   * The range [start, end] is inclusive.
   * The tree lock is taken by this function
   */
- void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+                           unsigned long bits)
  {
        struct extent_state *state;
        struct rb_node *node;
@@@ -685,9 -700,9 +700,9 @@@ out
  
  static void set_state_bits(struct extent_io_tree *tree,
                           struct extent_state *state,
-                          int *bits)
+                          unsigned long *bits)
  {
-       int bits_to_set = *bits & ~EXTENT_CTLBITS;
+       unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS;
  
        set_state_cb(tree, state, bits);
        if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
@@@ -730,8 -745,9 +745,9 @@@ static void uncache_state(struct extent
  
  static int __must_check
  __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                int bits, int exclusive_bits, u64 *failed_start,
-                struct extent_state **cached_state, gfp_t mask)
+                unsigned long bits, unsigned long exclusive_bits,
+                u64 *failed_start, struct extent_state **cached_state,
+                gfp_t mask)
  {
        struct extent_state *state;
        struct extent_state *prealloc = NULL;
@@@ -923,9 -939,9 +939,9 @@@ search_again
        goto again;
  }
  
- int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
-                  u64 *failed_start, struct extent_state **cached_state,
-                  gfp_t mask)
+ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+                  unsigned long bits, u64 * failed_start,
+                  struct extent_state **cached_state, gfp_t mask)
  {
        return __set_extent_bit(tree, start, end, bits, 0, failed_start,
                                cached_state, mask);
   * boundary bits like LOCK.
   */
  int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                      int bits, int clear_bits,
+                      unsigned long bits, unsigned long clear_bits,
                       struct extent_state **cached_state, gfp_t mask)
  {
        struct extent_state *state;
@@@ -1143,14 -1159,14 +1159,14 @@@ int set_extent_dirty(struct extent_io_t
  }
  
  int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                   int bits, gfp_t mask)
+                   unsigned long bits, gfp_t mask)
  {
        return set_extent_bit(tree, start, end, bits, NULL,
                              NULL, mask);
  }
  
  int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                     int bits, gfp_t mask)
+                     unsigned long bits, gfp_t mask)
  {
        return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
  }
@@@ -1189,7 -1205,7 +1205,7 @@@ int set_extent_new(struct extent_io_tre
  int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
                        struct extent_state **cached_state, gfp_t mask)
  {
-       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
+       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
                              cached_state, mask);
  }
  
@@@ -1205,7 -1221,7 +1221,7 @@@ int clear_extent_uptodate(struct extent
   * us if waiting is desired.
   */
  int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                    int bits, struct extent_state **cached_state)
+                    unsigned long bits, struct extent_state **cached_state)
  {
        int err;
        u64 failed_start;
@@@ -1313,8 -1329,9 +1329,9 @@@ static int set_range_writeback(struct e
   * return it.  tree->lock must be held.  NULL will returned if
   * nothing was found after 'start'
   */
- struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
-                                                u64 start, int bits)
+ static struct extent_state *
+ find_first_extent_bit_state(struct extent_io_tree *tree,
+                           u64 start, unsigned long bits)
  {
        struct rb_node *node;
        struct extent_state *state;
@@@ -1348,7 -1365,7 +1365,7 @@@ out
   * If nothing was found, 1 is returned. If found something, return 0.
   */
  int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
-                         u64 *start_ret, u64 *end_ret, int bits,
+                         u64 *start_ret, u64 *end_ret, unsigned long bits,
                          struct extent_state **cached_state)
  {
        struct extent_state *state;
@@@ -1638,7 -1655,7 +1655,7 @@@ int extent_clear_unlock_delalloc(struc
        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
        unsigned long nr_pages = end_index - index + 1;
        int i;
-       int clear_bits = 0;
+       unsigned long clear_bits = 0;
  
        if (op & EXTENT_CLEAR_UNLOCK)
                clear_bits |= EXTENT_LOCKED;
        return ret;
  }
  
+ void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
+                           int count)
+ {
+       struct rb_node *node;
+       struct extent_state *state;
+       spin_lock(&tree->lock);
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+       node = tree_search(tree, start);
+       BUG_ON(!node);
+       state = rb_entry(node, struct extent_state, rb_node);
+       BUG_ON(state->start != start);
+       while (count) {
+               state->private = *csums++;
+               count--;
+               state = next_state(state);
+       }
+       spin_unlock(&tree->lock);
+ }
+ static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
+ {
+       struct bio_vec *bvec = bio->bi_io_vec + bio_index;
+       return page_offset(bvec->bv_page) + bvec->bv_offset;
+ }
+ void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
+                       u32 csums[], int count)
+ {
+       struct rb_node *node;
+       struct extent_state *state = NULL;
+       u64 start;
+       spin_lock(&tree->lock);
+       do {
+               start = __btrfs_get_bio_offset(bio, bio_index);
+               if (state == NULL || state->start != start) {
+                       node = tree_search(tree, start);
+                       BUG_ON(!node);
+                       state = rb_entry(node, struct extent_state, rb_node);
+                       BUG_ON(state->start != start);
+               }
+               state->private = *csums++;
+               count--;
+               bio_index++;
+               state = next_state(state);
+       } while (count);
+       spin_unlock(&tree->lock);
+ }
  int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
  {
        struct rb_node *node;
@@@ -1811,7 -1886,7 +1886,7 @@@ out
   * range is found set.
   */
  int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                  int bits, int filled, struct extent_state *cached)
+                  unsigned long bits, int filled, struct extent_state *cached)
  {
        struct extent_state *state = NULL;
        struct rb_node *node;
@@@ -2560,7 -2635,8 +2635,7 @@@ static int submit_extent_page(int rw, s
                if (old_compressed)
                        contig = bio->bi_sector == sector;
                else
 -                      contig = bio->bi_sector + (bio->bi_size >> 9) ==
 -                              sector;
 +                      contig = bio_end_sector(bio) == sector;
  
                if (prev_bio_flags != bio_flags || !contig ||
                    merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
        return ret;
  }
  
- void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page)
+ static void attach_extent_buffer_page(struct extent_buffer *eb,
+                                     struct page *page)
  {
        if (!PagePrivate(page)) {
                SetPagePrivate(page);
@@@ -2625,7 -2702,7 +2701,7 @@@ static int __extent_read_full_page(stru
                                   struct page *page,
                                   get_extent_t *get_extent,
                                   struct bio **bio, int mirror_num,
-                                  unsigned long *bio_flags)
+                                  unsigned long *bio_flags, int rw)
  {
        struct inode *inode = page->mapping->host;
        u64 start = page_offset(page);
                }
  
                pnr -= page->index;
-               ret = submit_extent_page(READ, tree, page,
+               ret = submit_extent_page(rw, tree, page,
                                         sector, disk_io_size, pg_offset,
                                         bdev, bio, pnr,
                                         end_bio_extent_readpage, mirror_num,
@@@ -2804,7 -2881,7 +2880,7 @@@ int extent_read_full_page(struct extent
        int ret;
  
        ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
-                                     &bio_flags);
+                                     &bio_flags, READ);
        if (bio)
                ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
        return ret;
@@@ -3103,7 -3180,7 +3179,7 @@@ static int eb_wait(void *word
        return 0;
  }
  
static void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
+ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
  {
        wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
                    TASK_UNINTERRUPTIBLE);
@@@ -3228,7 -3305,7 +3304,7 @@@ static int write_one_eb(struct extent_b
        u64 offset = eb->start;
        unsigned long i, num_pages;
        unsigned long bio_flags = 0;
-       int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
+       int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
        int ret = 0;
  
        clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
@@@ -3665,14 -3742,14 +3741,14 @@@ int extent_readpages(struct extent_io_t
                        continue;
                for (i = 0; i < nr; i++) {
                        __extent_read_full_page(tree, pagepool[i], get_extent,
-                                       &bio, 0, &bio_flags);
+                                       &bio, 0, &bio_flags, READ);
                        page_cache_release(pagepool[i]);
                }
                nr = 0;
        }
        for (i = 0; i < nr; i++) {
                __extent_read_full_page(tree, pagepool[i], get_extent,
-                                       &bio, 0, &bio_flags);
+                                       &bio, 0, &bio_flags, READ);
                page_cache_release(pagepool[i]);
        }
  
@@@ -3713,9 -3790,9 +3789,9 @@@ int extent_invalidatepage(struct extent
   * are locked or under IO and drops the related state bits if it is safe
   * to drop the page.
   */
- int try_release_extent_state(struct extent_map_tree *map,
-                            struct extent_io_tree *tree, struct page *page,
-                            gfp_t mask)
static int try_release_extent_state(struct extent_map_tree *map,
+                                   struct extent_io_tree *tree,
+                                   struct page *page, gfp_t mask)
  {
        u64 start = page_offset(page);
        u64 end = start + PAGE_CACHE_SIZE - 1;
@@@ -4006,12 -4083,7 +4082,7 @@@ out
  
  static void __free_extent_buffer(struct extent_buffer *eb)
  {
- #if LEAK_DEBUG
-       unsigned long flags;
-       spin_lock_irqsave(&leak_lock, flags);
-       list_del(&eb->leak_list);
-       spin_unlock_irqrestore(&leak_lock, flags);
- #endif
+       btrfs_leak_debug_del(&eb->leak_list);
        kmem_cache_free(extent_buffer_cache, eb);
  }
  
@@@ -4021,9 -4093,6 +4092,6 @@@ static struct extent_buffer *__alloc_ex
                                                   gfp_t mask)
  {
        struct extent_buffer *eb = NULL;
- #if LEAK_DEBUG
-       unsigned long flags;
- #endif
  
        eb = kmem_cache_zalloc(extent_buffer_cache, mask);
        if (eb == NULL)
        init_waitqueue_head(&eb->write_lock_wq);
        init_waitqueue_head(&eb->read_lock_wq);
  
- #if LEAK_DEBUG
-       spin_lock_irqsave(&leak_lock, flags);
-       list_add(&eb->leak_list, &buffers);
-       spin_unlock_irqrestore(&leak_lock, flags);
- #endif
+       btrfs_leak_debug_add(&eb->leak_list, &buffers);
        spin_lock_init(&eb->refs_lock);
        atomic_set(&eb->refs, 1);
        atomic_set(&eb->io_pages, 0);
@@@ -4385,7 -4451,7 +4450,7 @@@ static inline void btrfs_release_extent
  }
  
  /* Expects to have eb->eb_lock already held */
- static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
+ static int release_extent_buffer(struct extent_buffer *eb)
  {
        WARN_ON(atomic_read(&eb->refs) == 0);
        if (atomic_dec_and_test(&eb->refs)) {
@@@ -4443,7 -4509,7 +4508,7 @@@ void free_extent_buffer(struct extent_b
         * I know this is terrible, but it's temporary until we stop tracking
         * the uptodate bits and such for the extent buffers.
         */
-       release_extent_buffer(eb, GFP_ATOMIC);
+       release_extent_buffer(eb);
  }
  
  void free_extent_buffer_stale(struct extent_buffer *eb)
        if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
            test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
                atomic_dec(&eb->refs);
-       release_extent_buffer(eb, GFP_NOFS);
+       release_extent_buffer(eb);
  }
  
  void clear_extent_buffer_dirty(struct extent_buffer *eb)
@@@ -4509,17 -4575,6 +4574,6 @@@ int set_extent_buffer_dirty(struct exte
        return was_dirty;
  }
  
- static int range_straddles_pages(u64 start, u64 len)
- {
-       if (len < PAGE_CACHE_SIZE)
-               return 1;
-       if (start & (PAGE_CACHE_SIZE - 1))
-               return 1;
-       if ((start + len) & (PAGE_CACHE_SIZE - 1))
-               return 1;
-       return 0;
- }
  int clear_extent_buffer_uptodate(struct extent_buffer *eb)
  {
        unsigned long i;
@@@ -4551,37 -4606,6 +4605,6 @@@ int set_extent_buffer_uptodate(struct e
        return 0;
  }
  
- int extent_range_uptodate(struct extent_io_tree *tree,
-                         u64 start, u64 end)
- {
-       struct page *page;
-       int ret;
-       int pg_uptodate = 1;
-       int uptodate;
-       unsigned long index;
-       if (range_straddles_pages(start, end - start + 1)) {
-               ret = test_range_bit(tree, start, end,
-                                    EXTENT_UPTODATE, 1, NULL);
-               if (ret)
-                       return 1;
-       }
-       while (start <= end) {
-               index = start >> PAGE_CACHE_SHIFT;
-               page = find_get_page(tree->mapping, index);
-               if (!page)
-                       return 1;
-               uptodate = PageUptodate(page);
-               page_cache_release(page);
-               if (!uptodate) {
-                       pg_uptodate = 0;
-                       break;
-               }
-               start += PAGE_CACHE_SIZE;
-       }
-       return pg_uptodate;
- }
  int extent_buffer_uptodate(struct extent_buffer *eb)
  {
        return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@@ -4644,7 -4668,8 +4667,8 @@@ int read_extent_buffer_pages(struct ext
                        ClearPageError(page);
                        err = __extent_read_full_page(tree, page,
                                                      get_extent, &bio,
-                                                     mirror_num, &bio_flags);
+                                                     mirror_num, &bio_flags,
+                                                     READ | REQ_META);
                        if (err)
                                ret = err;
                } else {
        }
  
        if (bio) {
-               err = submit_one_bio(READ, bio, mirror_num, bio_flags);
+               err = submit_one_bio(READ | REQ_META, bio, mirror_num,
+                                    bio_flags);
                if (err)
                        return err;
        }
@@@ -5017,7 -5043,7 +5042,7 @@@ void memmove_extent_buffer(struct exten
        }
  }
  
- int try_release_extent_buffer(struct page *page, gfp_t mask)
+ int try_release_extent_buffer(struct page *page)
  {
        struct extent_buffer *eb;
  
        }
        spin_unlock(&page->mapping->private_lock);
  
-       if ((mask & GFP_NOFS) == GFP_NOFS)
-               mask = GFP_NOFS;
        /*
         * If tree ref isn't set then we know the ref on this eb is a real ref,
         * so just return, this page will likely be freed soon anyway.
                return 0;
        }
  
-       return release_extent_buffer(eb, mask);
+       return release_extent_buffer(eb);
  }
diff --combined fs/btrfs/file.c
index bc4d54c465a04dde6fe40b8ad4afc2275e82b587,b3e359bc8e68641c80205d4ef8fc8f534e32e734..4205ba752d40ccbadfb0fa0b1f053b1022c553c0
@@@ -24,7 -24,6 +24,7 @@@
  #include <linux/string.h>
  #include <linux/backing-dev.h>
  #include <linux/mpage.h>
 +#include <linux/aio.h>
  #include <linux/falloc.h>
  #include <linux/swap.h>
  #include <linux/writeback.h>
@@@ -193,8 -192,8 +193,8 @@@ int btrfs_add_inode_defrag(struct btrfs
   * the same inode in the tree, we will merge them together (by
   * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
   */
- void btrfs_requeue_inode_defrag(struct inode *inode,
-                               struct inode_defrag *defrag)
static void btrfs_requeue_inode_defrag(struct inode *inode,
+                                      struct inode_defrag *defrag)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
@@@ -474,7 -473,7 +474,7 @@@ static noinline int btrfs_copy_from_use
  /*
   * unlocks pages after btrfs_file_write is done with them
   */
- void btrfs_drop_pages(struct page **pages, size_t num_pages)
static void btrfs_drop_pages(struct page **pages, size_t num_pages)
  {
        size_t i;
        for (i = 0; i < num_pages; i++) {
   * doing real data extents, marking pages dirty and delalloc as required.
   */
  int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
-                     struct page **pages, size_t num_pages,
-                     loff_t pos, size_t write_bytes,
-                     struct extent_state **cached)
+                            struct page **pages, size_t num_pages,
+                            loff_t pos, size_t write_bytes,
+                            struct extent_state **cached)
  {
        int err = 0;
        int i;
@@@ -553,6 -552,7 +553,7 @@@ void btrfs_drop_extent_cache(struct ino
        int testend = 1;
        unsigned long flags;
        int compressed = 0;
+       bool modified;
  
        WARN_ON(end < start);
        if (end == (u64)-1) {
        while (1) {
                int no_splits = 0;
  
+               modified = false;
                if (!split)
                        split = alloc_extent_map();
                if (!split2)
                compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
                clear_bit(EXTENT_FLAG_PINNED, &em->flags);
                clear_bit(EXTENT_FLAG_LOGGING, &flags);
+               modified = !list_empty(&em->list);
                remove_extent_mapping(em_tree, em);
                if (no_splits)
                        goto next;
                                split->block_len = em->block_len;
                        else
                                split->block_len = split->len;
+                       split->ram_bytes = em->ram_bytes;
                        split->orig_block_len = max(split->block_len,
                                                    em->orig_block_len);
                        split->generation = gen;
                        split->bdev = em->bdev;
                        split->flags = flags;
                        split->compress_type = em->compress_type;
-                       ret = add_extent_mapping(em_tree, split);
+                       ret = add_extent_mapping(em_tree, split, modified);
                        BUG_ON(ret); /* Logic error */
-                       list_move(&split->list, &em_tree->modified_extents);
                        free_extent_map(split);
                        split = split2;
                        split2 = NULL;
                        split->generation = gen;
                        split->orig_block_len = max(em->block_len,
                                                    em->orig_block_len);
+                       split->ram_bytes = em->ram_bytes;
  
                        if (compressed) {
                                split->block_len = em->block_len;
                                split->orig_start = em->orig_start;
                        }
  
-                       ret = add_extent_mapping(em_tree, split);
+                       ret = add_extent_mapping(em_tree, split, modified);
                        BUG_ON(ret); /* Logic error */
-                       list_move(&split->list, &em_tree->modified_extents);
                        free_extent_map(split);
                        split = NULL;
                }
@@@ -822,7 -824,7 +825,7 @@@ next_slot
  
                        memcpy(&new_key, &key, sizeof(new_key));
                        new_key.offset = end;
-                       btrfs_set_item_key_safe(trans, root, path, &new_key);
+                       btrfs_set_item_key_safe(root, path, &new_key);
  
                        extent_offset += end - key.offset;
                        btrfs_set_file_extent_offset(leaf, fi, extent_offset);
@@@ -1038,7 -1040,7 +1041,7 @@@ again
                                     ino, bytenr, orig_offset,
                                     &other_start, &other_end)) {
                        new_key.offset = end;
-                       btrfs_set_item_key_safe(trans, root, path, &new_key);
+                       btrfs_set_item_key_safe(root, path, &new_key);
                        fi = btrfs_item_ptr(leaf, path->slots[0],
                                            struct btrfs_file_extent_item);
                        btrfs_set_file_extent_generation(leaf, fi,
                                                         trans->transid);
                        path->slots[0]++;
                        new_key.offset = start;
-                       btrfs_set_item_key_safe(trans, root, path, &new_key);
+                       btrfs_set_item_key_safe(root, path, &new_key);
  
                        fi = btrfs_item_ptr(leaf, path->slots[0],
                                            struct btrfs_file_extent_item);
@@@ -1515,6 -1517,8 +1518,6 @@@ static ssize_t btrfs_file_aio_write(str
        size_t count, ocount;
        bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
  
 -      sb_start_write(inode->i_sb);
 -
        mutex_lock(&inode->i_mutex);
  
        err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
        if (sync)
                atomic_dec(&BTRFS_I(inode)->sync_writers);
  out:
 -      sb_end_write(inode->i_sb);
        current->backing_dev_info = NULL;
        return num_written ? num_written : err;
  }
@@@ -1883,7 -1888,7 +1886,7 @@@ static int fill_holes(struct btrfs_tran
  
                path->slots[0]++;
                key.offset = offset;
-               btrfs_set_item_key_safe(trans, root, path, &key);
+               btrfs_set_item_key_safe(root, path, &key);
                fi = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_file_extent_item);
                num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
@@@ -1913,6 -1918,7 +1916,7 @@@ out
        } else {
                hole_em->start = offset;
                hole_em->len = end - offset;
+               hole_em->ram_bytes = hole_em->len;
                hole_em->orig_start = offset;
  
                hole_em->block_start = EXTENT_MAP_HOLE;
                do {
                        btrfs_drop_extent_cache(inode, offset, end - 1, 0);
                        write_lock(&em_tree->lock);
-                       ret = add_extent_mapping(em_tree, hole_em);
-                       if (!ret)
-                               list_move(&hole_em->list,
-                                         &em_tree->modified_extents);
+                       ret = add_extent_mapping(em_tree, hole_em, 1);
                        write_unlock(&em_tree->lock);
                } while (ret == -EEXIST);
                free_extent_map(hole_em);
diff --combined fs/btrfs/inode.c
index 898da0a01e040b2bb263f34275ec8afa50265d16,1669c3b4be2f4ba1f318d9e677f336df87ea41b2..9b31b3b091fceb6536612b30e64f2fd191d47e89
@@@ -32,7 -32,6 +32,7 @@@
  #include <linux/writeback.h>
  #include <linux/statfs.h>
  #include <linux/compat.h>
 +#include <linux/aio.h>
  #include <linux/bit_spinlock.h>
  #include <linux/xattr.h>
  #include <linux/posix_acl.h>
@@@ -101,7 -100,10 +101,10 @@@ static noinline int cow_file_range(stru
  static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
                                           u64 len, u64 orig_start,
                                           u64 block_start, u64 block_len,
-                                          u64 orig_block_len, int type);
+                                          u64 orig_block_len, u64 ram_bytes,
+                                          int type);
+ static int btrfs_dirty_inode(struct inode *inode);
  
  static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
                                     struct inode *inode,  struct inode *dir,
@@@ -723,6 -725,7 +726,7 @@@ retry
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
                em->orig_block_len = ins.offset;
+               em->ram_bytes = async_extent->ram_size;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                em->compress_type = async_extent->compress_type;
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
  
                while (1) {
                        write_lock(&em_tree->lock);
-                       ret = add_extent_mapping(em_tree, em);
-                       if (!ret)
-                               list_move(&em->list,
-                                         &em_tree->modified_extents);
+                       ret = add_extent_mapping(em_tree, em, 1);
                        write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
@@@ -922,7 -922,8 +923,8 @@@ static noinline int __cow_file_range(st
                }
  
                em = alloc_extent_map();
-               BUG_ON(!em); /* -ENOMEM */
+               if (!em)
+                       goto out_reserve;
                em->start = start;
                em->orig_start = em->start;
                ram_size = ins.offset;
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
                em->orig_block_len = ins.offset;
+               em->ram_bytes = ram_size;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
                em->generation = -1;
  
                while (1) {
                        write_lock(&em_tree->lock);
-                       ret = add_extent_mapping(em_tree, em);
-                       if (!ret)
-                               list_move(&em->list,
-                                         &em_tree->modified_extents);
+                       ret = add_extent_mapping(em_tree, em, 1);
                        write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
                        btrfs_drop_extent_cache(inode, start,
                                                start + ram_size - 1, 0);
                }
+               if (ret)
+                       goto out_reserve;
  
                cur_alloc_size = ins.offset;
                ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
                                               ram_size, cur_alloc_size, 0);
-               BUG_ON(ret); /* -ENOMEM */
+               if (ret)
+                       goto out_reserve;
  
                if (root->root_key.objectid ==
                    BTRFS_DATA_RELOC_TREE_OBJECTID) {
                                                      cur_alloc_size);
                        if (ret) {
                                btrfs_abort_transaction(trans, root, ret);
-                               goto out_unlock;
+                               goto out_reserve;
                        }
                }
  
  out:
        return ret;
  
+ out_reserve:
+       btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
  out_unlock:
        extent_clear_unlock_delalloc(inode,
                     &BTRFS_I(inode)->io_tree,
@@@ -1195,6 -1199,7 +1200,7 @@@ static noinline int run_delalloc_nocow(
        u64 disk_bytenr;
        u64 num_bytes;
        u64 disk_num_bytes;
+       u64 ram_bytes;
        int extent_type;
        int ret, err;
        int type;
@@@ -1291,6 -1296,7 +1297,7 @@@ next_slot
                                    struct btrfs_file_extent_item);
                extent_type = btrfs_file_extent_type(leaf, fi);
  
+               ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
                if (extent_type == BTRFS_FILE_EXTENT_REG ||
                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
                        disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
@@@ -1374,6 -1380,7 +1381,7 @@@ out_check
                        em->block_len = num_bytes;
                        em->block_start = disk_bytenr;
                        em->orig_block_len = disk_num_bytes;
+                       em->ram_bytes = ram_bytes;
                        em->bdev = root->fs_info->fs_devices->latest_bdev;
                        em->mod_start = em->start;
                        em->mod_len = em->len;
                        em->generation = -1;
                        while (1) {
                                write_lock(&em_tree->lock);
-                               ret = add_extent_mapping(em_tree, em);
-                               if (!ret)
-                                       list_move(&em->list,
-                                                 &em_tree->modified_extents);
+                               ret = add_extent_mapping(em_tree, em, 1);
                                write_unlock(&em_tree->lock);
                                if (ret != -EEXIST) {
                                        free_extent_map(em);
@@@ -1526,7 -1530,7 +1531,7 @@@ static void btrfs_merge_extent_hook(str
   * have pending delalloc work to be done.
   */
  static void btrfs_set_bit_hook(struct inode *inode,
-                              struct extent_state *state, int *bits)
+                              struct extent_state *state, unsigned long *bits)
  {
  
        /*
   * extent_io.c clear_bit_hook, see set_bit_hook for why
   */
  static void btrfs_clear_bit_hook(struct inode *inode,
-                                struct extent_state *state, int *bits)
+                                struct extent_state *state,
+                                unsigned long *bits)
  {
        /*
         * set_bit and clear bit hooks normally require _irqsave/restore
@@@ -2794,6 -2799,8 +2800,8 @@@ static int btrfs_readpage_end_io_hook(s
        int ret;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        u32 csum = ~(u32)0;
+       static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
+                                     DEFAULT_RATELIMIT_BURST);
  
        if (PageChecked(page)) {
                ClearPageChecked(page);
        if (ret)
                goto zeroit;
  
-       csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
+       csum = btrfs_csum_data(kaddr + offset, csum,  end - start + 1);
        btrfs_csum_final(csum, (char *)&csum);
        if (csum != private)
                goto zeroit;
@@@ -2830,11 -2837,11 +2838,11 @@@ good
        return 0;
  
  zeroit:
-       printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u "
-                      "private %llu\n",
-                      (unsigned long long)btrfs_ino(page->mapping->host),
-                      (unsigned long long)start, csum,
-                      (unsigned long long)private);
+       if (__ratelimit(&_rs))
+               btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu",
+                       (unsigned long long)btrfs_ino(page->mapping->host),
+                       (unsigned long long)start, csum,
+                       (unsigned long long)private);
        memset(kaddr + offset, 1, end - start + 1);
        flush_dcache_page(page);
        kunmap_atomic(kaddr);
@@@ -3020,7 -3027,8 +3028,8 @@@ int btrfs_orphan_add(struct btrfs_trans
   * We have done the truncate/delete so we can go ahead and remove the orphan
   * item for this particular inode.
   */
- int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
+ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
+                           struct inode *inode)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int delete_item = 0;
@@@ -3115,8 -3123,8 +3124,8 @@@ int btrfs_orphan_cleanup(struct btrfs_r
                 */
  
                if (found_key.offset == last_objectid) {
-                       printk(KERN_ERR "btrfs: Error removing orphan entry, "
-                              "stopping orphan cleanup\n");
+                       btrfs_err(root->fs_info,
+                               "Error removing orphan entry, stopping orphan cleanup");
                        ret = -EINVAL;
                        goto out;
                }
                                ret = PTR_ERR(trans);
                                goto out;
                        }
-                       printk(KERN_ERR "auto deleting %Lu\n",
-                              found_key.objectid);
+                       btrfs_debug(root->fs_info, "auto deleting %Lu",
+                               found_key.objectid);
                        ret = btrfs_del_orphan_item(trans, root,
                                                    found_key.objectid);
                        BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
        }
  
        if (nr_unlink)
-               printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
+               btrfs_debug(root->fs_info, "unlinked %d orphans", nr_unlink);
        if (nr_truncate)
-               printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
+               btrfs_debug(root->fs_info, "truncated %d orphans", nr_truncate);
  
  out:
        if (ret)
-               printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret);
+               btrfs_crit(root->fs_info,
+                       "could not do orphan cleanup %d", ret);
        btrfs_free_path(path);
        return ret;
  }
@@@ -3592,9 -3601,10 +3602,10 @@@ static int __btrfs_unlink_inode(struct 
        ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
                                  dir_ino, &index);
        if (ret) {
-               printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
-                      "inode %llu parent %llu\n", name_len, name,
-                      (unsigned long long)ino, (unsigned long long)dir_ino);
+               btrfs_info(root->fs_info,
+                       "failed to delete reference to %.*s, inode %llu parent %llu",
+                       name_len, name,
+                       (unsigned long long)ino, (unsigned long long)dir_ino);
                btrfs_abort_transaction(trans, root, ret);
                goto err;
        }
                                           dir, index);
        if (ret == -ENOENT)
                ret = 0;
+       else if (ret)
+               btrfs_abort_transaction(trans, root, ret);
  err:
        btrfs_free_path(path);
        if (ret)
@@@ -3661,7 -3673,7 +3674,7 @@@ static int check_path_shared(struct btr
                eb = path->nodes[level];
                if (!btrfs_block_can_be_shared(root, eb))
                        continue;
-               ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len,
+               ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1,
                                               &refs, NULL);
                if (refs > 1)
                        return 1;
@@@ -4176,8 -4188,7 +4189,7 @@@ search_again
                                }
                                size =
                                    btrfs_file_extent_calc_inline_size(size);
-                               btrfs_truncate_item(trans, root, path,
-                                                   size, 1);
+                               btrfs_truncate_item(root, path, size, 1);
                        } else if (root->ref_cows) {
                                inode_sub_bytes(inode, item_end + 1 -
                                                found_key.offset);
@@@ -4451,16 -4462,14 +4463,14 @@@ int btrfs_cont_expand(struct inode *ino
                        hole_em->block_start = EXTENT_MAP_HOLE;
                        hole_em->block_len = 0;
                        hole_em->orig_block_len = 0;
+                       hole_em->ram_bytes = hole_size;
                        hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
                        hole_em->compress_type = BTRFS_COMPRESS_NONE;
                        hole_em->generation = trans->transid;
  
                        while (1) {
                                write_lock(&em_tree->lock);
-                               err = add_extent_mapping(em_tree, hole_em);
-                               if (!err)
-                                       list_move(&hole_em->list,
-                                                 &em_tree->modified_extents);
+                               err = add_extent_mapping(em_tree, hole_em, 1);
                                write_unlock(&em_tree->lock);
                                if (err != -EEXIST)
                                        break;
@@@ -4671,8 -4680,9 +4681,9 @@@ void btrfs_evict_inode(struct inode *in
                        ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
  
                if (ret) {
-                       printk(KERN_WARNING "Could not get space for a "
-                              "delete, will truncate on mount %d\n", ret);
+                       btrfs_warn(root->fs_info,
+                               "Could not get space for a delete, will truncate on mount %d",
+                               ret);
                        btrfs_orphan_del(NULL, inode);
                        btrfs_free_block_rsv(root, rsv);
                        goto no_delete;
@@@ -5336,7 -5346,7 +5347,7 @@@ int btrfs_write_inode(struct inode *ino
   * FIXME, needs more benchmarking...there are no reasons other than performance
   * to keep or drop this code.
   */
- int btrfs_dirty_inode(struct inode *inode)
static int btrfs_dirty_inode(struct inode *inode)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
@@@ -5978,7 -5988,7 +5989,7 @@@ static int merge_extent_mapping(struct 
                em->block_start += start_diff;
                em->block_len -= start_diff;
        }
-       return add_extent_mapping(em_tree, em);
+       return add_extent_mapping(em_tree, em, 0);
  }
  
  static noinline int uncompress_inline(struct btrfs_path *path,
@@@ -6152,6 -6162,7 +6163,7 @@@ again
                goto not_found_em;
        }
  
+       em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
        if (found_type == BTRFS_FILE_EXTENT_REG ||
            found_type == BTRFS_FILE_EXTENT_PREALLOC) {
                em->start = extent_start;
@@@ -6260,18 -6271,18 +6272,18 @@@ not_found_em
  insert:
        btrfs_release_path(path);
        if (em->start > start || extent_map_end(em) <= start) {
-               printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed "
-                      "[%llu %llu]\n", (unsigned long long)em->start,
-                      (unsigned long long)em->len,
-                      (unsigned long long)start,
-                      (unsigned long long)len);
+               btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
+                       (unsigned long long)em->start,
+                       (unsigned long long)em->len,
+                       (unsigned long long)start,
+                       (unsigned long long)len);
                err = -EIO;
                goto out;
        }
  
        err = 0;
        write_lock(&em_tree->lock);
-       ret = add_extent_mapping(em_tree, em);
+       ret = add_extent_mapping(em_tree, em, 0);
        /* it is possible that someone inserted the extent into the tree
         * while we had the lock dropped.  It is also possible that
         * an overlapping map exists in the tree
@@@ -6483,7 -6494,7 +6495,7 @@@ static struct extent_map *btrfs_new_ext
        }
  
        em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-                             ins.offset, ins.offset, 0);
+                             ins.offset, ins.offset, ins.offset, 0);
        if (IS_ERR(em))
                goto out;
  
@@@ -6503,7 -6514,9 +6515,9 @@@ out
   * block must be cow'd
   */
  static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
-                                     struct inode *inode, u64 offset, u64 len)
+                                     struct inode *inode, u64 offset, u64 *len,
+                                     u64 *orig_start, u64 *orig_block_len,
+                                     u64 *ram_bytes)
  {
        struct btrfs_path *path;
        int ret;
        disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
        backref_offset = btrfs_file_extent_offset(leaf, fi);
  
+       *orig_start = key.offset - backref_offset;
+       *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
+       *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
        extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
-       if (extent_end < offset + len) {
+       if (extent_end < offset + *len) {
                /* extent doesn't include our full range, must cow */
                goto out;
        }
         */
        disk_bytenr += backref_offset;
        disk_bytenr += offset - key.offset;
-       num_bytes = min(offset + len, extent_end) - offset;
+       num_bytes = min(offset + *len, extent_end) - offset;
        if (csum_exist_in_range(root, disk_bytenr, num_bytes))
                                goto out;
        /*
         * all of the above have passed, it is safe to overwrite this extent
         * without cow
         */
+       *len = num_bytes;
        ret = 1;
  out:
        btrfs_free_path(path);
@@@ -6662,7 -6680,8 +6681,8 @@@ static int lock_extent_direct(struct in
  static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
                                           u64 len, u64 orig_start,
                                           u64 block_start, u64 block_len,
-                                          u64 orig_block_len, int type)
+                                          u64 orig_block_len, u64 ram_bytes,
+                                          int type)
  {
        struct extent_map_tree *em_tree;
        struct extent_map *em;
        em->block_start = block_start;
        em->bdev = root->fs_info->fs_devices->latest_bdev;
        em->orig_block_len = orig_block_len;
+       em->ram_bytes = ram_bytes;
        em->generation = -1;
        set_bit(EXTENT_FLAG_PINNED, &em->flags);
        if (type == BTRFS_ORDERED_PREALLOC)
                btrfs_drop_extent_cache(inode, em->start,
                                em->start + em->len - 1, 0);
                write_lock(&em_tree->lock);
-               ret = add_extent_mapping(em_tree, em);
-               if (!ret)
-                       list_move(&em->list,
-                                 &em_tree->modified_extents);
+               ret = add_extent_mapping(em_tree, em, 1);
                write_unlock(&em_tree->lock);
        } while (ret == -EEXIST);
  
@@@ -6790,7 -6807,7 +6808,7 @@@ static int btrfs_get_blocks_direct(stru
             em->block_start != EXTENT_MAP_HOLE)) {
                int type;
                int ret;
-               u64 block_start;
+               u64 block_start, orig_start, orig_block_len, ram_bytes;
  
                if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
                        type = BTRFS_ORDERED_PREALLOC;
                if (IS_ERR(trans))
                        goto must_cow;
  
-               if (can_nocow_odirect(trans, inode, start, len) == 1) {
-                       u64 orig_start = em->orig_start;
-                       u64 orig_block_len = em->orig_block_len;
+               if (can_nocow_odirect(trans, inode, start, &len, &orig_start,
+                                     &orig_block_len, &ram_bytes) == 1) {
                        if (type == BTRFS_ORDERED_PREALLOC) {
                                free_extent_map(em);
                                em = create_pinned_em(inode, start, len,
                                                       orig_start,
                                                       block_start, len,
-                                                      orig_block_len, type);
+                                                      orig_block_len,
+                                                      ram_bytes, type);
                                if (IS_ERR(em)) {
                                        btrfs_end_transaction(trans, root);
                                        goto unlock_err;
@@@ -6937,7 -6953,7 +6954,7 @@@ static void btrfs_endio_direct_read(str
                                goto failed;
                        local_irq_save(flags);
                        kaddr = kmap_atomic(page);
-                       csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
+                       csum = btrfs_csum_data(kaddr + bvec->bv_offset,
                                               csum, bvec->bv_len);
                        btrfs_csum_final(csum, (char *)&csum);
                        kunmap_atomic(kaddr);
                        flush_dcache_page(bvec->bv_page);
                        if (csum != private) {
  failed:
-                               printk(KERN_ERR "btrfs csum failed ino %llu off"
-                                     " %llu csum %u private %u\n",
-                                     (unsigned long long)btrfs_ino(inode),
-                                     (unsigned long long)start,
-                                     csum, (unsigned)private);
+                               btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u",
+                                       (unsigned long long)btrfs_ino(inode),
+                                       (unsigned long long)start,
+                                       csum, (unsigned)private);
                                err = -EIO;
                        }
                }
@@@ -7426,8 -7441,8 +7442,8 @@@ static int btrfs_writepage(struct page 
        return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
  }
  
- int btrfs_writepages(struct address_space *mapping,
-                    struct writeback_control *wbc)
static int btrfs_writepages(struct address_space *mapping,
+                           struct writeback_control *wbc)
  {
        struct extent_io_tree *tree;
  
@@@ -7942,8 -7957,8 +7958,8 @@@ void btrfs_destroy_inode(struct inode *
  
        if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                     &BTRFS_I(inode)->runtime_flags)) {
-               printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
-                      (unsigned long long)btrfs_ino(inode));
+               btrfs_info(root->fs_info, "inode %llu still on the orphan list",
+                       (unsigned long long)btrfs_ino(inode));
                atomic_dec(&root->orphan_inodes);
        }
  
                if (!ordered)
                        break;
                else {
-                       printk(KERN_ERR "btrfs found ordered "
-                              "extent %llu %llu on inode cleanup\n",
-                              (unsigned long long)ordered->file_offset,
-                              (unsigned long long)ordered->len);
+                       btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
+                               (unsigned long long)ordered->file_offset,
+                               (unsigned long long)ordered->len);
                        btrfs_remove_ordered_extent(inode, ordered);
                        btrfs_put_ordered_extent(ordered);
                        btrfs_put_ordered_extent(ordered);
@@@ -8572,16 -8586,14 +8587,14 @@@ static int __btrfs_prealloc_file_range(
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
                em->orig_block_len = ins.offset;
+               em->ram_bytes = ins.offset;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
                em->generation = trans->transid;
  
                while (1) {
                        write_lock(&em_tree->lock);
-                       ret = add_extent_mapping(em_tree, em);
-                       if (!ret)
-                               list_move(&em->list,
-                                         &em_tree->modified_extents);
+                       ret = add_extent_mapping(em_tree, em, 1);
                        write_unlock(&em_tree->lock);
                        if (ret != -EEXIST)
                                break;
diff --combined fs/btrfs/volumes.c
index 6789772265707bdd02d97f91dc92e8b34a2fd6bb,a191bac31d853745c69b7a73d8aeec54d473615c..0e925ced971ba87bc0e356c6a2c7e3dbf1b5e91e
@@@ -46,6 -46,7 +46,7 @@@ static int init_first_rw_device(struct 
                                struct btrfs_device *device);
  static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
  static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
+ static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
  static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
  
  static DEFINE_MUTEX(uuid_mutex);
@@@ -717,9 -718,9 +718,9 @@@ static int __btrfs_open_devices(struct 
                if (!device->name)
                        continue;
  
-               ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
-                                           &bdev, &bh);
-               if (ret)
+               /* Just open everything we can; ignore failures here */
+               if (btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
+                                           &bdev, &bh))
                        continue;
  
                disk_super = (struct btrfs_super_block *)bh->b_data;
        return ret;
  }
  
- int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
-                          struct btrfs_device *device,
-                          u64 chunk_tree, u64 chunk_objectid,
-                          u64 chunk_offset, u64 start, u64 num_bytes)
static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
+                                 struct btrfs_device *device,
+                                 u64 chunk_tree, u64 chunk_objectid,
+                                 u64 chunk_offset, u64 start, u64 num_bytes)
  {
        int ret;
        struct btrfs_path *path;
@@@ -1329,9 -1330,9 +1330,9 @@@ error
   * the device information is stored in the chunk root
   * the btrfs_device struct should be fully filled in
   */
- int btrfs_add_device(struct btrfs_trans_handle *trans,
-                    struct btrfs_root *root,
-                    struct btrfs_device *device)
static int btrfs_add_device(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root,
+                           struct btrfs_device *device)
  {
        int ret;
        struct btrfs_path *path;
@@@ -1710,8 -1711,8 +1711,8 @@@ void btrfs_destroy_dev_replace_tgtdev(s
        mutex_unlock(&fs_info->fs_devices->device_list_mutex);
  }
  
- int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
-                             struct btrfs_device **device)
static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
+                                    struct btrfs_device **device)
  {
        int ret = 0;
        struct btrfs_super_block *disk_super;
@@@ -3607,7 -3608,7 +3608,7 @@@ static int btrfs_cmp_device_info(const 
        return 0;
  }
  
- struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+ static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID10] = {
                .sub_stripes    = 2,
                .dev_stripes    = 1,
@@@ -3674,18 -3675,10 +3675,10 @@@ static u32 find_raid56_stripe_len(u32 d
  
  static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
  {
-       u64 features;
        if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)))
                return;
  
-       features = btrfs_super_incompat_flags(info->super_copy);
-       if (features & BTRFS_FEATURE_INCOMPAT_RAID56)
-               return;
-       features |= BTRFS_FEATURE_INCOMPAT_RAID56;
-       btrfs_set_super_incompat_flags(info->super_copy, features);
-       printk(KERN_INFO "btrfs: setting RAID5/6 feature flag\n");
+       btrfs_set_fs_incompat(info, RAID56);
  }
  
  static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
  
        em_tree = &extent_root->fs_info->mapping_tree.map_tree;
        write_lock(&em_tree->lock);
-       ret = add_extent_mapping(em_tree, em);
+       ret = add_extent_mapping(em_tree, em, 0);
        write_unlock(&em_tree->lock);
        if (ret) {
                free_extent_map(em);
@@@ -4240,9 -4233,25 +4233,25 @@@ int btrfs_num_copies(struct btrfs_fs_in
        read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, logical, len);
        read_unlock(&em_tree->lock);
-       BUG_ON(!em);
  
-       BUG_ON(em->start > logical || em->start + em->len < logical);
+       /*
+        * We could return errors for these cases, but that could get ugly and
+        * we'd probably do the same thing which is just not do anything else
+        * and exit, so return 1 so the callers don't try to use other copies.
+        */
+       if (!em) {
+               btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical,
+                           logical+len);
+               return 1;
+       }
+       if (em->start > logical || em->start + em->len < logical) {
+               btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
+                           "%Lu-%Lu\n", logical, logical+len, em->start,
+                           em->start + em->len);
+               return 1;
+       }
        map = (struct map_lookup *)em->bdev;
        if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
                ret = map->num_stripes;
@@@ -4411,13 -4420,19 +4420,19 @@@ static int __btrfs_map_block(struct btr
        read_unlock(&em_tree->lock);
  
        if (!em) {
-               printk(KERN_CRIT "btrfs: unable to find logical %llu len %llu\n",
-                      (unsigned long long)logical,
-                      (unsigned long long)*length);
-               BUG();
+               btrfs_crit(fs_info, "unable to find logical %llu len %llu",
+                       (unsigned long long)logical,
+                       (unsigned long long)*length);
+               return -EINVAL;
+       }
+       if (em->start > logical || em->start + em->len < logical) {
+               btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
+                          "found %Lu-%Lu\n", logical, em->start,
+                          em->start + em->len);
+               return -EINVAL;
        }
  
-       BUG_ON(em->start > logical || em->start + em->len < logical);
        map = (struct map_lookup *)em->bdev;
        offset = logical - em->start;
  
@@@ -5106,9 -5121,9 +5121,9 @@@ struct async_sched 
   * This will add one bio to the pending list for a device and make sure
   * the work struct is scheduled.
   */
- noinline void btrfs_schedule_bio(struct btrfs_root *root,
-                                struct btrfs_device *device,
-                                int rw, struct bio *bio)
static noinline void btrfs_schedule_bio(struct btrfs_root *root,
+                                       struct btrfs_device *device,
+                                       int rw, struct bio *bio)
  {
        int should_queue = 1;
        struct btrfs_pending_bios *pending_bios;
@@@ -5177,7 -5192,7 +5192,7 @@@ static int bio_size_ok(struct block_dev
        }
  
        prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
 -      if ((bio->bi_size >> 9) > max_sectors)
 +      if (bio_sectors(bio) > max_sectors)
                return 0;
  
        if (!q->merge_bvec_fn)
@@@ -5308,10 -5323,10 +5323,10 @@@ int btrfs_map_bio(struct btrfs_root *ro
        }
  
        if (map_length < length) {
-               printk(KERN_CRIT "btrfs: mapping failed logical %llu bio len %llu "
-                      "len %llu\n", (unsigned long long)logical,
-                      (unsigned long long)length,
-                      (unsigned long long)map_length);
+               btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
+                       (unsigned long long)logical,
+                       (unsigned long long)length,
+                       (unsigned long long)map_length);
                BUG();
        }
  
@@@ -5476,7 -5491,7 +5491,7 @@@ static int read_one_chunk(struct btrfs_
        }
  
        write_lock(&map_tree->map_tree.lock);
-       ret = add_extent_mapping(&map_tree->map_tree, em);
+       ret = add_extent_mapping(&map_tree->map_tree, em, 0);
        write_unlock(&map_tree->map_tree.lock);
        BUG_ON(ret); /* Tree corruption */
        free_extent_map(em);
@@@ -5583,8 -5598,8 +5598,8 @@@ static int read_one_dev(struct btrfs_ro
                        return -EIO;
  
                if (!device) {
-                       printk(KERN_WARNING "warning devid %llu missing\n",
-                              (unsigned long long)devid);
+                       btrfs_warn(root->fs_info, "devid %llu missing",
+                               (unsigned long long)devid);
                        device = add_missing_dev(root, devid, dev_uuid);
                        if (!device)
                                return -ENOMEM;
@@@ -5926,7 -5941,7 +5941,7 @@@ void btrfs_dev_stat_inc_and_print(struc
        btrfs_dev_stat_print_on_error(dev);
  }
  
- void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
  {
        if (!dev->dev_stats_valid)
                return;