[PATCH] md: Handle overflow of mdu_array_info_t->size better
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / md / md.c
index 742a82a4d10bbcbeb67e40721c11824637ac9a1a..8f161743e18f2d6e1dab930e567de975879a6798 100644 (file)
@@ -81,10 +81,22 @@ static DEFINE_SPINLOCK(pers_lock);
  * idle IO detection.
  *
  * you can change it via /proc/sys/dev/raid/speed_limit_min and _max.
+ * or /sys/block/mdX/md/sync_speed_{min,max}
  */
 
 static int sysctl_speed_limit_min = 1000;
 static int sysctl_speed_limit_max = 200000;
+static inline int speed_min(mddev_t *mddev)
+{
+       return mddev->sync_speed_min ?
+               mddev->sync_speed_min : sysctl_speed_limit_min;
+}
+
+static inline int speed_max(mddev_t *mddev)
+{
+       return mddev->sync_speed_max ?
+               mddev->sync_speed_max : sysctl_speed_limit_max;
+}
 
 static struct ctl_table_header *raid_table_header;
 
@@ -1149,6 +1161,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 
        sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors);
 
+       sb->raid_disks = cpu_to_le32(mddev->raid_disks);
+       sb->size = cpu_to_le64(mddev->size);
+
        if (mddev->bitmap && mddev->bitmap_file == NULL) {
                sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
                sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
@@ -1226,6 +1241,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
        mdk_rdev_t *same_pdev;
        char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
        struct kobject *ko;
+       char *s;
 
        if (rdev->mddev) {
                MD_BUG();
@@ -1265,6 +1281,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
        bdevname(rdev->bdev,b);
        if (kobject_set_name(&rdev->kobj, "dev-%s", b) < 0)
                return -ENOMEM;
+       while ( (s=strchr(rdev->kobj.k_name, '/')) != NULL)
+               *s = '!';
                        
        list_add(&rdev->same_set, &mddev->disks);
        rdev->mddev = mddev;
@@ -1667,7 +1685,7 @@ __ATTR(slot, 0644, slot_show, slot_store);
 static ssize_t
 offset_show(mdk_rdev_t *rdev, char *page)
 {
-       return sprintf(page, "%llu\n", rdev->data_offset);
+       return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
 }
 
 static ssize_t
@@ -1686,12 +1704,37 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 static struct rdev_sysfs_entry rdev_offset =
 __ATTR(offset, 0644, offset_show, offset_store);
 
+static ssize_t
+rdev_size_show(mdk_rdev_t *rdev, char *page)
+{
+       return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
+}
+
+static ssize_t
+rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+       char *e;
+       unsigned long long size = simple_strtoull(buf, &e, 10);
+       if (e==buf || (*e && *e != '\n'))
+               return -EINVAL;
+       if (rdev->mddev->pers)
+               return -EBUSY;
+       rdev->size = size;
+       if (size < rdev->mddev->size || rdev->mddev->size == 0)
+               rdev->mddev->size = size;
+       return len;
+}
+
+static struct rdev_sysfs_entry rdev_size =
+__ATTR(size, 0644, rdev_size_show, rdev_size_store);
+
 static struct attribute *rdev_default_attrs[] = {
        &rdev_state.attr,
        &rdev_super.attr,
        &rdev_errors.attr,
        &rdev_slot.attr,
        &rdev_offset.attr,
+       &rdev_size.attr,
        NULL,
 };
 static ssize_t
@@ -1962,6 +2005,65 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
 static struct md_sysfs_entry md_chunk_size =
 __ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store);
 
+static ssize_t
+null_show(mddev_t *mddev, char *page)
+{
+       return -EINVAL;
+}
+
+static ssize_t
+new_dev_store(mddev_t *mddev, const char *buf, size_t len)
+{
+       /* buf must be %d:%d\n? giving major and minor numbers */
+       /* The new device is added to the array.
+        * If the array has a persistent superblock, we read the
+        * superblock to initialise info and check validity.
+        * Otherwise, only checking done is that in bind_rdev_to_array,
+        * which mainly checks size.
+        */
+       char *e;
+       int major = simple_strtoul(buf, &e, 10);
+       int minor;
+       dev_t dev;
+       mdk_rdev_t *rdev;
+       int err;
+
+       if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
+               return -EINVAL;
+       minor = simple_strtoul(e+1, &e, 10);
+       if (*e && *e != '\n')
+               return -EINVAL;
+       dev = MKDEV(major, minor);
+       if (major != MAJOR(dev) ||
+           minor != MINOR(dev))
+               return -EOVERFLOW;
+
+
+       if (mddev->persistent) {
+               rdev = md_import_device(dev, mddev->major_version,
+                                       mddev->minor_version);
+               if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
+                       mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
+                                                      mdk_rdev_t, same_set);
+                       err = super_types[mddev->major_version]
+                               .load_super(rdev, rdev0, mddev->minor_version);
+                       if (err < 0)
+                               goto out;
+               }
+       } else
+               rdev = md_import_device(dev, -1, -1);
+
+       if (IS_ERR(rdev))
+               return PTR_ERR(rdev);
+       err = bind_rdev_to_array(rdev, mddev);
+ out:
+       if (err)
+               export_rdev(rdev);
+       return err ? err : len;
+}
+
+static struct md_sysfs_entry md_new_device =
+__ATTR(new_dev, 0200, null_show, new_dev_store);
 
 static ssize_t
 size_show(mddev_t *mddev, char *page)
@@ -2113,18 +2215,107 @@ md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
 static struct md_sysfs_entry
 md_mismatches = __ATTR_RO(mismatch_cnt);
 
+static ssize_t
+sync_min_show(mddev_t *mddev, char *page)
+{
+       return sprintf(page, "%d (%s)\n", speed_min(mddev),
+                      mddev->sync_speed_min ? "local": "system");
+}
+
+static ssize_t
+sync_min_store(mddev_t *mddev, const char *buf, size_t len)
+{
+       int min;
+       char *e;
+       if (strncmp(buf, "system", 6)==0) {
+               mddev->sync_speed_min = 0;
+               return len;
+       }
+       min = simple_strtoul(buf, &e, 10);
+       if (buf == e || (*e && *e != '\n') || min <= 0)
+               return -EINVAL;
+       mddev->sync_speed_min = min;
+       return len;
+}
+
+static struct md_sysfs_entry md_sync_min =
+__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
+
+static ssize_t
+sync_max_show(mddev_t *mddev, char *page)
+{
+       return sprintf(page, "%d (%s)\n", speed_max(mddev),
+                      mddev->sync_speed_max ? "local": "system");
+}
+
+static ssize_t
+sync_max_store(mddev_t *mddev, const char *buf, size_t len)
+{
+       int max;
+       char *e;
+       if (strncmp(buf, "system", 6)==0) {
+               mddev->sync_speed_max = 0;
+               return len;
+       }
+       max = simple_strtoul(buf, &e, 10);
+       if (buf == e || (*e && *e != '\n') || max <= 0)
+               return -EINVAL;
+       mddev->sync_speed_max = max;
+       return len;
+}
+
+static struct md_sysfs_entry md_sync_max =
+__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
+
+
+static ssize_t
+sync_speed_show(mddev_t *mddev, char *page)
+{
+       unsigned long resync, dt, db;
+       resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
+       dt = ((jiffies - mddev->resync_mark) / HZ);
+       if (!dt) dt++;
+       db = resync - (mddev->resync_mark_cnt);
+       return sprintf(page, "%ld\n", db/dt/2); /* K/sec */
+}
+
+static struct md_sysfs_entry
+md_sync_speed = __ATTR_RO(sync_speed);
+
+static ssize_t
+sync_completed_show(mddev_t *mddev, char *page)
+{
+       unsigned long max_blocks, resync;
+
+       if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+               max_blocks = mddev->resync_max_sectors;
+       else
+               max_blocks = mddev->size << 1;
+
+       resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
+       return sprintf(page, "%lu / %lu\n", resync, max_blocks);
+}
+
+static struct md_sysfs_entry
+md_sync_completed = __ATTR_RO(sync_completed);
+
 static struct attribute *md_default_attrs[] = {
        &md_level.attr,
        &md_raid_disks.attr,
        &md_chunk_size.attr,
        &md_size.attr,
        &md_metadata.attr,
+       &md_new_device.attr,
        NULL,
 };
 
 static struct attribute *md_redundancy_attrs[] = {
        &md_scan_mode.attr,
        &md_mismatches.attr,
+       &md_sync_min.attr,
+       &md_sync_max.attr,
+       &md_sync_speed.attr,
+       &md_sync_completed.attr,
        NULL,
 };
 static struct attribute_group md_redundancy_group = {
@@ -2498,14 +2689,6 @@ static int do_md_stop(mddev_t * mddev, int ro)
                        set_disk_ro(disk, 1);
        }
 
-       bitmap_destroy(mddev);
-       if (mddev->bitmap_file) {
-               atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1);
-               fput(mddev->bitmap_file);
-               mddev->bitmap_file = NULL;
-       }
-       mddev->bitmap_offset = 0;
-
        /*
         * Free resources if final stop
         */
@@ -2515,6 +2698,14 @@ static int do_md_stop(mddev_t * mddev, int ro)
                struct gendisk *disk;
                printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
 
+               bitmap_destroy(mddev);
+               if (mddev->bitmap_file) {
+                       atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1);
+                       fput(mddev->bitmap_file);
+                       mddev->bitmap_file = NULL;
+               }
+               mddev->bitmap_offset = 0;
+
                ITERATE_RDEV(mddev,rdev,tmp)
                        if (rdev->raid_disk >= 0) {
                                char nm[20];
@@ -2751,6 +2942,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
        info.ctime         = mddev->ctime;
        info.level         = mddev->level;
        info.size          = mddev->size;
+       if (info.size != mddev->size) /* overflow */
+               info.size = -1;
        info.nr_disks      = nr;
        info.raid_disks    = mddev->raid_disks;
        info.md_minor      = mddev->md_minor;
@@ -3207,6 +3400,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
        mddev->ctime         = get_seconds();
 
        mddev->level         = info->level;
+       mddev->clevel[0]     = 0;
        mddev->size          = info->size;
        mddev->raid_disks    = info->raid_disks;
        /* don't set md_minor, it is determined by which /dev/md* was
@@ -3275,9 +3469,9 @@ static int update_size(mddev_t *mddev, unsigned long size)
 
                bdev = bdget_disk(mddev->gendisk, 0);
                if (bdev) {
-                       down(&bdev->bd_inode->i_sem);
-                       i_size_write(bdev->bd_inode, mddev->array_size << 10);
-                       up(&bdev->bd_inode->i_sem);
+                       mutex_lock(&bdev->bd_inode->i_mutex);
+                       i_size_write(bdev->bd_inode, (loff_t)mddev->array_size << 10);
+                       mutex_unlock(&bdev->bd_inode->i_mutex);
                        bdput(bdev);
                }
        }
@@ -3296,17 +3490,6 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks)
        if (mddev->sync_thread)
                return -EBUSY;
        rv = mddev->pers->reshape(mddev, raid_disks);
-       if (!rv) {
-               struct block_device *bdev;
-
-               bdev = bdget_disk(mddev->gendisk, 0);
-               if (bdev) {
-                       down(&bdev->bd_inode->i_sem);
-                       i_size_write(bdev->bd_inode, mddev->array_size << 10);
-                       up(&bdev->bd_inode->i_sem);
-                       bdput(bdev);
-               }
-       }
        return rv;
 }
 
@@ -3342,7 +3525,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
                )
                return -EINVAL;
        /* Check there is only one change */
-       if (mddev->size != info->size) cnt++;
+       if (info->size >= 0 && mddev->size != info->size) cnt++;
        if (mddev->raid_disks != info->raid_disks) cnt++;
        if (mddev->layout != info->layout) cnt++;
        if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++;
@@ -3359,7 +3542,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
                else
                        return mddev->pers->reconfig(mddev, info->layout, -1);
        }
-       if (mddev->size != info->size)
+       if (info->size >= 0 && mddev->size != info->size)
                rv = update_size(mddev, info->size);
 
        if (mddev->raid_disks    != info->raid_disks)
@@ -3413,12 +3596,21 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev)
        return 0;
 }
 
+static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       mddev_t *mddev = bdev->bd_disk->private_data;
+
+       geo->heads = 2;
+       geo->sectors = 4;
+       geo->cylinders = get_capacity(mddev->gendisk) / 8;
+       return 0;
+}
+
 static int md_ioctl(struct inode *inode, struct file *file,
                        unsigned int cmd, unsigned long arg)
 {
        int err = 0;
        void __user *argp = (void __user *)arg;
-       struct hd_geometry __user *loc = argp;
        mddev_t *mddev = NULL;
 
        if (!capable(CAP_SYS_ADMIN))
@@ -3580,24 +3772,6 @@ static int md_ioctl(struct inode *inode, struct file *file,
         * 4 sectors (with a BIG number of cylinders...). This drives
         * dosfs just mad... ;-)
         */
-               case HDIO_GETGEO:
-                       if (!loc) {
-                               err = -EINVAL;
-                               goto abort_unlock;
-                       }
-                       err = put_user (2, (char __user *) &loc->heads);
-                       if (err)
-                               goto abort_unlock;
-                       err = put_user (4, (char __user *) &loc->sectors);
-                       if (err)
-                               goto abort_unlock;
-                       err = put_user(get_capacity(mddev->gendisk)/8,
-                                       (short __user *) &loc->cylinders);
-                       if (err)
-                               goto abort_unlock;
-                       err = put_user (get_start_sect(inode->i_bdev),
-                                               (long __user *) &loc->start);
-                       goto done_unlock;
        }
 
        /*
@@ -3726,6 +3900,7 @@ static struct block_device_operations md_fops =
        .open           = md_open,
        .release        = md_release,
        .ioctl          = md_ioctl,
+       .getgeo         = md_getgeo,
        .media_changed  = md_media_changed,
        .revalidate_disk= md_revalidate,
 };
@@ -4348,10 +4523,10 @@ static void md_do_sync(mddev_t *mddev)
 
        printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
        printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
-               " %d KB/sec/disc.\n", sysctl_speed_limit_min);
+               " %d KB/sec/disc.\n", speed_min(mddev));
        printk(KERN_INFO "md: using maximum available idle IO bandwidth "
               "(but not more than %d KB/sec) for reconstruction.\n",
-              sysctl_speed_limit_max);
+              speed_max(mddev));
 
        is_mddev_idle(mddev); /* this also initializes IO event counters */
        /* we don't use the checkpoint if there's a bitmap */
@@ -4392,7 +4567,7 @@ static void md_do_sync(mddev_t *mddev)
 
                skipped = 0;
                sectors = mddev->pers->sync_request(mddev, j, &skipped,
-                                           currspeed < sysctl_speed_limit_min);
+                                           currspeed < speed_min(mddev));
                if (sectors == 0) {
                        set_bit(MD_RECOVERY_ERR, &mddev->recovery);
                        goto out;
@@ -4457,8 +4632,8 @@ static void md_do_sync(mddev_t *mddev)
                currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
                        /((jiffies-mddev->resync_mark)/HZ +1) +1;
 
-               if (currspeed > sysctl_speed_limit_min) {
-                       if ((currspeed > sysctl_speed_limit_max) ||
+               if (currspeed > speed_min(mddev)) {
+                       if ((currspeed > speed_max(mddev)) ||
                                        !is_mddev_idle(mddev)) {
                                msleep(500);
                                goto repeat;