Btrfs: allow for pausing restriper
authorIlya Dryomov <idryomov@gmail.com>
Mon, 16 Jan 2012 20:04:49 +0000 (22:04 +0200)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 16 Jan 2012 20:04:49 +0000 (22:04 +0200)
Implement an ioctl for pausing restriper.  This pauses the relocation,
but balance is still considered to be "in progress": balance item is
not deleted, other volume operations cannot be started, etc.  If paused
in the middle of profile changing operation we will continue making
allocations with the target profile.

Add a hook to close_ctree() to pause restriper and free its data
structures on unmount.  (It's safe to unmount when restriper is in
"paused" state, we will resume with the same parameters on the next
mount)

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/ioctl.c
fs/btrfs/ioctl.h
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index 99eb2bcd9aa75db6c6d1a302a5af3fa007ca6e53..1afda75d5414ba9f8d42b6577f42b6525ef89bdc 100644 (file)
@@ -1214,7 +1214,10 @@ struct btrfs_fs_info {
        /* restriper state */
        spinlock_t balance_lock;
        struct mutex balance_mutex;
+       atomic_t balance_running;
+       atomic_t balance_pause_req;
        struct btrfs_balance_control *balance_ctl;
+       wait_queue_head_t balance_wait_q;
 
        unsigned data_chunk_allocations;
        unsigned metadata_ratio;
@@ -2658,6 +2661,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
 }
 static inline void free_fs_info(struct btrfs_fs_info *fs_info)
 {
+       kfree(fs_info->balance_ctl);
        kfree(fs_info->delayed_root);
        kfree(fs_info->extent_root);
        kfree(fs_info->tree_root);
index eb7a11ac5b73aa127805b1c8f280ffababc1d423..8ce83740780005bb02f83455875021b4e56398e1 100644 (file)
@@ -2004,7 +2004,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
        spin_lock_init(&fs_info->balance_lock);
        mutex_init(&fs_info->balance_mutex);
+       atomic_set(&fs_info->balance_running, 0);
+       atomic_set(&fs_info->balance_pause_req, 0);
        fs_info->balance_ctl = NULL;
+       init_waitqueue_head(&fs_info->balance_wait_q);
 
        sb->s_blocksize = 4096;
        sb->s_blocksize_bits = blksize_bits(4096);
@@ -2980,6 +2983,9 @@ int close_ctree(struct btrfs_root *root)
        fs_info->closing = 1;
        smp_mb();
 
+       /* pause restriper - we want to resume on mount */
+       btrfs_pause_balance(root->fs_info);
+
        btrfs_scrub_cancel(root);
 
        /* wait for any defraggers to finish */
index 29b3a94933f02adfba89fda23de85a0ea9e1440c..f572c53dda4fc8bf58e25d5e0278729714859b10 100644 (file)
@@ -3072,6 +3072,11 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
 
        bargs->flags = bctl->flags;
 
+       if (atomic_read(&fs_info->balance_running))
+               bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
+       if (atomic_read(&fs_info->balance_pause_req))
+               bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
+
        memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
        memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
        memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
@@ -3103,6 +3108,11 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
                bargs = NULL;
        }
 
+       if (fs_info->balance_ctl) {
+               ret = -EINPROGRESS;
+               goto out_bargs;
+       }
+
        bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
        if (!bctl) {
                ret = -ENOMEM;
@@ -3123,7 +3133,8 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
 
        ret = btrfs_balance(bctl, bargs);
        /*
-        * bctl is freed in __cancel_balance
+        * bctl is freed in __cancel_balance or in free_fs_info if
+        * restriper was paused all the way until unmount
         */
        if (arg) {
                if (copy_to_user(arg, bargs, sizeof(*bargs)))
@@ -3138,6 +3149,19 @@ out:
        return ret;
 }
 
+static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
+{
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       switch (cmd) {
+       case BTRFS_BALANCE_CTL_PAUSE:
+               return btrfs_pause_balance(root->fs_info);
+       }
+
+       return -EINVAL;
+}
+
 long btrfs_ioctl(struct file *file, unsigned int
                cmd, unsigned long arg)
 {
@@ -3216,6 +3240,8 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_scrub_progress(root, argp);
        case BTRFS_IOC_BALANCE_V2:
                return btrfs_ioctl_balance(root, argp);
+       case BTRFS_IOC_BALANCE_CTL:
+               return btrfs_ioctl_balance_ctl(root, arg);
        }
 
        return -ENOTTY;
index c8b37d2c0d779fcb9c742e1f430559c2175cf629..e972e11a8d77041bb3dd45807e5e2cc7ed50f8c2 100644 (file)
@@ -109,6 +109,9 @@ struct btrfs_ioctl_fs_info_args {
        __u64 reserved[124];                    /* pad to 1k */
 };
 
+/* balance control ioctl modes */
+#define BTRFS_BALANCE_CTL_PAUSE                1
+
 /*
  * this is packed, because it should be exactly the same as its disk
  * byte order counterpart (struct btrfs_disk_balance_args)
@@ -137,6 +140,9 @@ struct btrfs_balance_progress {
        __u64 completed;        /* # of chunks relocated so far */
 };
 
+#define BTRFS_BALANCE_STATE_RUNNING    (1ULL << 0)
+#define BTRFS_BALANCE_STATE_PAUSE_REQ  (1ULL << 1)
+
 struct btrfs_ioctl_balance_args {
        __u64 flags;                            /* in/out */
        __u64 state;                            /* out */
@@ -315,6 +321,7 @@ struct btrfs_ioctl_logical_ino_args {
                               struct btrfs_ioctl_fs_info_args)
 #define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
                                   struct btrfs_ioctl_balance_args)
+#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
 #define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
                                        struct btrfs_ioctl_ino_path_args)
 #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
index e0160607e6e2906aaf4532caa9c706332118f10b..d32660ce753da9074ceaddef7d8f197cd3613ef2 100644 (file)
@@ -2492,6 +2492,11 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        key.type = BTRFS_CHUNK_ITEM_KEY;
 
        while (1) {
+               if (atomic_read(&fs_info->balance_pause_req)) {
+                       ret = -ECANCELED;
+                       goto error;
+               }
+
                ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
                if (ret < 0)
                        goto error;
@@ -2553,6 +2558,11 @@ error:
        return ret;
 }
 
+static inline int balance_need_close(struct btrfs_fs_info *fs_info)
+{
+       return atomic_read(&fs_info->balance_pause_req) == 0;
+}
+
 static void __cancel_balance(struct btrfs_fs_info *fs_info)
 {
        int ret;
@@ -2575,7 +2585,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        u64 allowed;
        int ret;
 
-       if (btrfs_fs_closing(fs_info)) {
+       if (btrfs_fs_closing(fs_info) ||
+           atomic_read(&fs_info->balance_pause_req)) {
                ret = -EINVAL;
                goto out;
        }
@@ -2680,18 +2691,25 @@ do_balance:
                spin_unlock(&fs_info->balance_lock);
        }
 
+       atomic_inc(&fs_info->balance_running);
        mutex_unlock(&fs_info->balance_mutex);
 
        ret = __btrfs_balance(fs_info);
 
        mutex_lock(&fs_info->balance_mutex);
+       atomic_dec(&fs_info->balance_running);
 
        if (bargs) {
                memset(bargs, 0, sizeof(*bargs));
                update_ioctl_balance_args(fs_info, bargs);
        }
 
-       __cancel_balance(fs_info);
+       if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
+           balance_need_close(fs_info)) {
+               __cancel_balance(fs_info);
+       }
+
+       wake_up(&fs_info->balance_wait_q);
 
        return ret;
 out:
@@ -2785,6 +2803,35 @@ out:
        return ret;
 }
 
+int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
+{
+       int ret = 0;
+
+       mutex_lock(&fs_info->balance_mutex);
+       if (!fs_info->balance_ctl) {
+               mutex_unlock(&fs_info->balance_mutex);
+               return -ENOTCONN;
+       }
+
+       if (atomic_read(&fs_info->balance_running)) {
+               atomic_inc(&fs_info->balance_pause_req);
+               mutex_unlock(&fs_info->balance_mutex);
+
+               wait_event(fs_info->balance_wait_q,
+                          atomic_read(&fs_info->balance_running) == 0);
+
+               mutex_lock(&fs_info->balance_mutex);
+               /* we are good with balance_ctl ripped off from under us */
+               BUG_ON(atomic_read(&fs_info->balance_running));
+               atomic_dec(&fs_info->balance_pause_req);
+       } else {
+               ret = -ENOTCONN;
+       }
+
+       mutex_unlock(&fs_info->balance_mutex);
+       return ret;
+}
+
 /*
  * shrinking a device means finding all of the device extents past
  * the new size, and then following the back refs to the chunks.
index cd25ea58ec35ef47c3fb0f88118c4b5a574ab349..80953afb12b955f0e00273945f0e56352cd54137 100644 (file)
@@ -273,6 +273,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *path);
 int btrfs_balance(struct btrfs_balance_control *bctl,
                  struct btrfs_ioctl_balance_args *bargs);
 int btrfs_recover_balance(struct btrfs_root *tree_root);
+int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
 int find_free_dev_extent(struct btrfs_trans_handle *trans,
                         struct btrfs_device *device, u64 num_bytes,