ext4: fix deadlock between inline_data and ext4_expand_extra_isize_ea()
authorTheodore Ts'o <tytso@mit.edu>
Thu, 12 Jan 2017 02:50:46 +0000 (21:50 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 12 Jan 2017 02:50:46 +0000 (21:50 -0500)
The xattr_sem deadlock problems fixed in commit 2e81a4eeedca: "ext4:
avoid deadlock when expanding inode size" didn't include the use of
xattr_sem in fs/ext4/inline.c.  With the addition of project quota
which added a new extra inode field, this exposed deadlocks in the
inline_data code similar to the ones fixed by 2e81a4eeedca.

The deadlock can be reproduced via:

   dmesg -n 7
   mke2fs -t ext4 -O inline_data -Fq -I 256 /dev/vdc 32768
   mount -t ext4 -o debug_want_extra_isize=24 /dev/vdc /vdc
   mkdir /vdc/a
   umount /vdc
   mount -t ext4 /dev/vdc /vdc
   echo foo > /vdc/a/foo

and looks like this:

[   11.158815]
[   11.160276] =============================================
[   11.161960] [ INFO: possible recursive locking detected ]
[   11.161960] 4.10.0-rc3-00015-g011b30a8a3cf #160 Tainted: G        W
[   11.161960] ---------------------------------------------
[   11.161960] bash/2519 is trying to acquire lock:
[   11.161960]  (&ei->xattr_sem){++++..}, at: [<c1225a4b>] ext4_expand_extra_isize_ea+0x3d/0x4cd
[   11.161960]
[   11.161960] but task is already holding lock:
[   11.161960]  (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152
[   11.161960]
[   11.161960] other info that might help us debug this:
[   11.161960]  Possible unsafe locking scenario:
[   11.161960]
[   11.161960]        CPU0
[   11.161960]        ----
[   11.161960]   lock(&ei->xattr_sem);
[   11.161960]   lock(&ei->xattr_sem);
[   11.161960]
[   11.161960]  *** DEADLOCK ***
[   11.161960]
[   11.161960]  May be due to missing lock nesting notation
[   11.161960]
[   11.161960] 4 locks held by bash/2519:
[   11.161960]  #0:  (sb_writers#3){.+.+.+}, at: [<c11a2414>] mnt_want_write+0x1e/0x3e
[   11.161960]  #1:  (&type->i_mutex_dir_key){++++++}, at: [<c119508b>] path_openat+0x338/0x67a
[   11.161960]  #2:  (jbd2_handle){++++..}, at: [<c123314a>] start_this_handle+0x582/0x622
[   11.161960]  #3:  (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152
[   11.161960]
[   11.161960] stack backtrace:
[   11.161960] CPU: 0 PID: 2519 Comm: bash Tainted: G        W       4.10.0-rc3-00015-g011b30a8a3cf #160
[   11.161960] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1 04/01/2014
[   11.161960] Call Trace:
[   11.161960]  dump_stack+0x72/0xa3
[   11.161960]  __lock_acquire+0xb7c/0xcb9
[   11.161960]  ? kvm_clock_read+0x1f/0x29
[   11.161960]  ? __lock_is_held+0x36/0x66
[   11.161960]  ? __lock_is_held+0x36/0x66
[   11.161960]  lock_acquire+0x106/0x18a
[   11.161960]  ? ext4_expand_extra_isize_ea+0x3d/0x4cd
[   11.161960]  down_write+0x39/0x72
[   11.161960]  ? ext4_expand_extra_isize_ea+0x3d/0x4cd
[   11.161960]  ext4_expand_extra_isize_ea+0x3d/0x4cd
[   11.161960]  ? _raw_read_unlock+0x22/0x2c
[   11.161960]  ? jbd2_journal_extend+0x1e2/0x262
[   11.161960]  ? __ext4_journal_get_write_access+0x3d/0x60
[   11.161960]  ext4_mark_inode_dirty+0x17d/0x26d
[   11.161960]  ? ext4_add_dirent_to_inline.isra.12+0xa5/0xb2
[   11.161960]  ext4_add_dirent_to_inline.isra.12+0xa5/0xb2
[   11.161960]  ext4_try_add_inline_entry+0x69/0x152
[   11.161960]  ext4_add_entry+0xa3/0x848
[   11.161960]  ? __brelse+0x14/0x2f
[   11.161960]  ? _raw_spin_unlock_irqrestore+0x44/0x4f
[   11.161960]  ext4_add_nondir+0x17/0x5b
[   11.161960]  ext4_create+0xcf/0x133
[   11.161960]  ? ext4_mknod+0x12f/0x12f
[   11.161960]  lookup_open+0x39e/0x3fb
[   11.161960]  ? __wake_up+0x1a/0x40
[   11.161960]  ? lock_acquire+0x11e/0x18a
[   11.161960]  path_openat+0x35c/0x67a
[   11.161960]  ? sched_clock_cpu+0xd7/0xf2
[   11.161960]  do_filp_open+0x36/0x7c
[   11.161960]  ? _raw_spin_unlock+0x22/0x2c
[   11.161960]  ? __alloc_fd+0x169/0x173
[   11.161960]  do_sys_open+0x59/0xcc
[   11.161960]  SyS_open+0x1d/0x1f
[   11.161960]  do_int80_syscall_32+0x4f/0x61
[   11.161960]  entry_INT80_32+0x2f/0x2f
[   11.161960] EIP: 0xb76ad469
[   11.161960] EFLAGS: 00000286 CPU: 0
[   11.161960] EAX: ffffffda EBX: 08168ac8 ECX: 00008241 EDX: 000001b6
[   11.161960] ESI: b75e46bc EDI: b7755000 EBP: bfbdb108 ESP: bfbdafc0
[   11.161960]  DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b

Cc: stable@vger.kernel.org # 3.10 (requires 2e81a4eeedca as a prereq)
Reported-by: George Spelvin <linux@sciencehorizons.net>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/inline.c
fs/ext4/xattr.c
fs/ext4/xattr.h

index 437df6a1a8417cec309c699cfef2166f02b6930a..99a5312ced5205e05d35b72fc27a23bab546cc94 100644 (file)
@@ -381,7 +381,7 @@ out:
 static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
                                    unsigned int len)
 {
-       int ret, size;
+       int ret, size, no_expand;
        struct ext4_inode_info *ei = EXT4_I(inode);
 
        if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
@@ -391,15 +391,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
        if (size < len)
                return -ENOSPC;
 
-       down_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_lock_xattr(inode, &no_expand);
 
        if (ei->i_inline_off)
                ret = ext4_update_inline_data(handle, inode, len);
        else
                ret = ext4_create_inline_data(handle, inode, len);
 
-       up_write(&EXT4_I(inode)->xattr_sem);
-
+       ext4_write_unlock_xattr(inode, &no_expand);
        return ret;
 }
 
@@ -533,7 +532,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
                                              struct inode *inode,
                                              unsigned flags)
 {
-       int ret, needed_blocks;
+       int ret, needed_blocks, no_expand;
        handle_t *handle = NULL;
        int retries = 0, sem_held = 0;
        struct page *page = NULL;
@@ -573,7 +572,7 @@ retry:
                goto out;
        }
 
-       down_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_lock_xattr(inode, &no_expand);
        sem_held = 1;
        /* If some one has already done this for us, just exit. */
        if (!ext4_has_inline_data(inode)) {
@@ -610,7 +609,7 @@ retry:
                put_page(page);
                page = NULL;
                ext4_orphan_add(handle, inode);
-               up_write(&EXT4_I(inode)->xattr_sem);
+               ext4_write_unlock_xattr(inode, &no_expand);
                sem_held = 0;
                ext4_journal_stop(handle);
                handle = NULL;
@@ -636,7 +635,7 @@ out:
                put_page(page);
        }
        if (sem_held)
-               up_write(&EXT4_I(inode)->xattr_sem);
+               ext4_write_unlock_xattr(inode, &no_expand);
        if (handle)
                ext4_journal_stop(handle);
        brelse(iloc.bh);
@@ -729,7 +728,7 @@ convert:
 int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
                               unsigned copied, struct page *page)
 {
-       int ret;
+       int ret, no_expand;
        void *kaddr;
        struct ext4_iloc iloc;
 
@@ -747,7 +746,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
                goto out;
        }
 
-       down_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_lock_xattr(inode, &no_expand);
        BUG_ON(!ext4_has_inline_data(inode));
 
        kaddr = kmap_atomic(page);
@@ -757,7 +756,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
        /* clear page dirty so that writepages wouldn't work for us. */
        ClearPageDirty(page);
 
-       up_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_unlock_xattr(inode, &no_expand);
        brelse(iloc.bh);
 out:
        return copied;
@@ -768,7 +767,7 @@ ext4_journalled_write_inline_data(struct inode *inode,
                                  unsigned len,
                                  struct page *page)
 {
-       int ret;
+       int ret, no_expand;
        void *kaddr;
        struct ext4_iloc iloc;
 
@@ -778,11 +777,11 @@ ext4_journalled_write_inline_data(struct inode *inode,
                return NULL;
        }
 
-       down_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_lock_xattr(inode, &no_expand);
        kaddr = kmap_atomic(page);
        ext4_write_inline_data(inode, &iloc, kaddr, 0, len);
        kunmap_atomic(kaddr);
-       up_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_unlock_xattr(inode, &no_expand);
 
        return iloc.bh;
 }
@@ -1259,7 +1258,7 @@ out:
 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
                              struct inode *dir, struct inode *inode)
 {
-       int ret, inline_size;
+       int ret, inline_size, no_expand;
        void *inline_start;
        struct ext4_iloc iloc;
 
@@ -1267,7 +1266,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
        if (ret)
                return ret;
 
-       down_write(&EXT4_I(dir)->xattr_sem);
+       ext4_write_lock_xattr(dir, &no_expand);
        if (!ext4_has_inline_data(dir))
                goto out;
 
@@ -1313,7 +1312,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
 
 out:
        ext4_mark_inode_dirty(handle, dir);
-       up_write(&EXT4_I(dir)->xattr_sem);
+       ext4_write_unlock_xattr(dir, &no_expand);
        brelse(iloc.bh);
        return ret;
 }
@@ -1673,7 +1672,7 @@ int ext4_delete_inline_entry(handle_t *handle,
                             struct buffer_head *bh,
                             int *has_inline_data)
 {
-       int err, inline_size;
+       int err, inline_size, no_expand;
        struct ext4_iloc iloc;
        void *inline_start;
 
@@ -1681,7 +1680,7 @@ int ext4_delete_inline_entry(handle_t *handle,
        if (err)
                return err;
 
-       down_write(&EXT4_I(dir)->xattr_sem);
+       ext4_write_lock_xattr(dir, &no_expand);
        if (!ext4_has_inline_data(dir)) {
                *has_inline_data = 0;
                goto out;
@@ -1715,7 +1714,7 @@ int ext4_delete_inline_entry(handle_t *handle,
 
        ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size);
 out:
-       up_write(&EXT4_I(dir)->xattr_sem);
+       ext4_write_unlock_xattr(dir, &no_expand);
        brelse(iloc.bh);
        if (err != -ENOENT)
                ext4_std_error(dir->i_sb, err);
@@ -1814,11 +1813,11 @@ out:
 
 int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
 {
-       int ret;
+       int ret, no_expand;
 
-       down_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_lock_xattr(inode, &no_expand);
        ret = ext4_destroy_inline_data_nolock(handle, inode);
-       up_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_unlock_xattr(inode, &no_expand);
 
        return ret;
 }
@@ -1903,7 +1902,7 @@ out:
 void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
 {
        handle_t *handle;
-       int inline_size, value_len, needed_blocks;
+       int inline_size, value_len, needed_blocks, no_expand;
        size_t i_size;
        void *value = NULL;
        struct ext4_xattr_ibody_find is = {
@@ -1920,7 +1919,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
        if (IS_ERR(handle))
                return;
 
-       down_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_lock_xattr(inode, &no_expand);
        if (!ext4_has_inline_data(inode)) {
                *has_inline = 0;
                ext4_journal_stop(handle);
@@ -1978,7 +1977,7 @@ out_error:
        up_write(&EXT4_I(inode)->i_data_sem);
 out:
        brelse(is.iloc.bh);
-       up_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_unlock_xattr(inode, &no_expand);
        kfree(value);
        if (inode->i_nlink)
                ext4_orphan_del(handle, inode);
@@ -1994,7 +1993,7 @@ out:
 
 int ext4_convert_inline_data(struct inode *inode)
 {
-       int error, needed_blocks;
+       int error, needed_blocks, no_expand;
        handle_t *handle;
        struct ext4_iloc iloc;
 
@@ -2016,15 +2015,10 @@ int ext4_convert_inline_data(struct inode *inode)
                goto out_free;
        }
 
-       down_write(&EXT4_I(inode)->xattr_sem);
-       if (!ext4_has_inline_data(inode)) {
-               up_write(&EXT4_I(inode)->xattr_sem);
-               goto out;
-       }
-
-       error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
-       up_write(&EXT4_I(inode)->xattr_sem);
-out:
+       ext4_write_lock_xattr(inode, &no_expand);
+       if (ext4_has_inline_data(inode))
+               error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
+       ext4_write_unlock_xattr(inode, &no_expand);
        ext4_journal_stop(handle);
 out_free:
        brelse(iloc.bh);
index 5a94fa52b74f8d6cfce999d4be4750d74b0ce2b2..c40bd55b6400307d8d4be60c3dd850c6163ef142 100644 (file)
@@ -1188,16 +1188,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
        struct ext4_xattr_block_find bs = {
                .s = { .not_found = -ENODATA, },
        };
-       unsigned long no_expand;
+       int no_expand;
        int error;
 
        if (!name)
                return -EINVAL;
        if (strlen(name) > 255)
                return -ERANGE;
-       down_write(&EXT4_I(inode)->xattr_sem);
-       no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
-       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+       ext4_write_lock_xattr(inode, &no_expand);
 
        error = ext4_reserve_inode_write(handle, inode, &is.iloc);
        if (error)
@@ -1264,7 +1262,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
                ext4_xattr_update_super_block(handle, inode->i_sb);
                inode->i_ctime = current_time(inode);
                if (!value)
-                       ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+                       no_expand = 0;
                error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
                /*
                 * The bh is consumed by ext4_mark_iloc_dirty, even with
@@ -1278,9 +1276,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 cleanup:
        brelse(is.iloc.bh);
        brelse(bs.bh);
-       if (no_expand == 0)
-               ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
-       up_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_unlock_xattr(inode, &no_expand);
        return error;
 }
 
@@ -1497,12 +1493,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
        int error = 0, tried_min_extra_isize = 0;
        int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
        int isize_diff; /* How much do we need to grow i_extra_isize */
+       int no_expand;
+
+       if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
+               return 0;
 
-       down_write(&EXT4_I(inode)->xattr_sem);
-       /*
-        * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
-        */
-       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
 retry:
        isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
        if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
@@ -1584,17 +1579,16 @@ shift:
        EXT4_I(inode)->i_extra_isize = new_extra_isize;
        brelse(bh);
 out:
-       ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
-       up_write(&EXT4_I(inode)->xattr_sem);
+       ext4_write_unlock_xattr(inode, &no_expand);
        return 0;
 
 cleanup:
        brelse(bh);
        /*
-        * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
-        * size expansion failed.
+        * Inode size expansion failed; don't try again
         */
-       up_write(&EXT4_I(inode)->xattr_sem);
+       no_expand = 1;
+       ext4_write_unlock_xattr(inode, &no_expand);
        return error;
 }
 
index a92e783fa057070a2d46ece41089bae6e3a26fe4..099c8b670ef56e2a4ab85266b4c0f112edaa48b9 100644 (file)
@@ -102,6 +102,38 @@ extern const struct xattr_handler ext4_xattr_security_handler;
 
 #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
 
+/*
+ * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes.
+ * The first is to signal that there the inline xattrs and data are
+ * taking up so much space that we might as well not keep trying to
+ * expand it.  The second is that xattr_sem is taken for writing, so
+ * we shouldn't try to recurse into the inode expansion.  For this
+ * second case, we need to make sure that we take save and restore the
+ * NO_EXPAND state flag appropriately.
+ */
+static inline void ext4_write_lock_xattr(struct inode *inode, int *save)
+{
+       down_write(&EXT4_I(inode)->xattr_sem);
+       *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
+       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+}
+
+static inline int ext4_write_trylock_xattr(struct inode *inode, int *save)
+{
+       if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0)
+               return 0;
+       *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
+       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+       return 1;
+}
+
+static inline void ext4_write_unlock_xattr(struct inode *inode, int *save)
+{
+       if (*save == 0)
+               ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+       up_write(&EXT4_I(inode)->xattr_sem);
+}
+
 extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
 
 extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);