dasd: fix error recovery for alias devices during format
authorStefan Haberland <stefan.haberland@de.ibm.com>
Fri, 18 Jul 2014 12:26:01 +0000 (14:26 +0200)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Tue, 22 Jul 2014 07:26:23 +0000 (09:26 +0200)
Kernel panic or a hanging device during format if an alias device is
set offline or I/O errors occur.

Omit the error recovery procedure for alias devices and do retries on
the base device with full erp.

Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
drivers/s390/block/dasd.c
drivers/s390/block/dasd_eckd.c
drivers/s390/block/dasd_int.h
drivers/s390/block/dasd_ioctl.c

index 925b056dc2b3898fe49daf30dd09cc436f1b7623..5df05f26b7d9d966fc35a3fab397afbc33b53f7b 100644 (file)
@@ -2307,17 +2307,27 @@ retry:
 
        rc = 0;
        list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) {
-               if (__dasd_sleep_on_erp(cqr)) {
-                       if (!cqr->status == DASD_CQR_TERMINATED &&
-                           !cqr->status == DASD_CQR_NEED_ERP)
-                               break;
-                       rc = 1;
+               /*
+                * for alias devices simplify error recovery and
+                * return to upper layer
+                */
+               if (cqr->startdev != cqr->basedev &&
+                   (cqr->status == DASD_CQR_TERMINATED ||
+                    cqr->status == DASD_CQR_NEED_ERP))
+                       return -EAGAIN;
+               else {
+                       /* normal recovery for basedev IO */
+                       if (__dasd_sleep_on_erp(cqr)) {
+                               if (!cqr->status == DASD_CQR_TERMINATED &&
+                                   !cqr->status == DASD_CQR_NEED_ERP)
+                                       break;
+                               rc = 1;
+                       }
                }
        }
        if (rc)
                goto retry;
 
-
        return 0;
 }
 
index 39748fda6e1fcd6964aefd277ece87df32ea9289..e74e5f7b431dfac1a3618810ecabcd4c47c17e5c 100644 (file)
@@ -2061,11 +2061,12 @@ dasd_eckd_fill_geometry(struct dasd_block *block, struct hd_geometry *geo)
 
 static struct dasd_ccw_req *
 dasd_eckd_build_format(struct dasd_device *base,
-                      struct format_data_t *fdata)
+                      struct format_data_t *fdata,
+                      int enable_PAV)
 {
        struct dasd_eckd_private *base_priv;
        struct dasd_eckd_private *start_priv;
-       struct dasd_device *startdev;
+       struct dasd_device *startdev = NULL;
        struct dasd_ccw_req *fcp;
        struct eckd_count *ect;
        struct ch_t address;
@@ -2079,7 +2080,9 @@ dasd_eckd_build_format(struct dasd_device *base,
        int nr_tracks;
        int use_prefix;
 
-       startdev = dasd_alias_get_start_dev(base);
+       if (enable_PAV)
+               startdev = dasd_alias_get_start_dev(base);
+
        if (!startdev)
                startdev = base;
 
@@ -2309,6 +2312,7 @@ dasd_eckd_build_format(struct dasd_device *base,
 
        fcp->startdev = startdev;
        fcp->memdev = startdev;
+       fcp->basedev = base;
        fcp->retries = 256;
        fcp->expires = startdev->default_expires * HZ;
        fcp->buildclk = get_tod_clock();
@@ -2319,7 +2323,8 @@ dasd_eckd_build_format(struct dasd_device *base,
 
 static int
 dasd_eckd_format_device(struct dasd_device *base,
-                       struct format_data_t *fdata)
+                       struct format_data_t *fdata,
+                       int enable_PAV)
 {
        struct dasd_ccw_req *cqr, *n;
        struct dasd_block *block;
@@ -2327,7 +2332,7 @@ dasd_eckd_format_device(struct dasd_device *base,
        struct list_head format_queue;
        struct dasd_device *device;
        int old_stop, format_step;
-       int step, rc = 0;
+       int step, rc = 0, sleep_rc;
 
        block = base->block;
        private = (struct dasd_eckd_private *) base->private;
@@ -2361,11 +2366,11 @@ dasd_eckd_format_device(struct dasd_device *base,
        }
 
        INIT_LIST_HEAD(&format_queue);
-       old_stop = fdata->stop_unit;
 
+       old_stop = fdata->stop_unit;
        while (fdata->start_unit <= 1) {
                fdata->stop_unit = fdata->start_unit;
-               cqr = dasd_eckd_build_format(base, fdata);
+               cqr = dasd_eckd_build_format(base, fdata, enable_PAV);
                list_add(&cqr->blocklist, &format_queue);
 
                fdata->stop_unit = old_stop;
@@ -2383,7 +2388,7 @@ retry:
                if (step > format_step)
                        fdata->stop_unit = fdata->start_unit + format_step - 1;
 
-               cqr = dasd_eckd_build_format(base, fdata);
+               cqr = dasd_eckd_build_format(base, fdata, enable_PAV);
                if (IS_ERR(cqr)) {
                        if (PTR_ERR(cqr) == -ENOMEM) {
                                /*
@@ -2403,7 +2408,7 @@ retry:
        }
 
 sleep:
-       dasd_sleep_on_queue(&format_queue);
+       sleep_rc = dasd_sleep_on_queue(&format_queue);
 
        list_for_each_entry_safe(cqr, n, &format_queue, blocklist) {
                device = cqr->startdev;
@@ -2415,6 +2420,9 @@ sleep:
                private->count--;
        }
 
+       if (sleep_rc)
+               return sleep_rc;
+
        /*
         * in case of ENOMEM we need to retry after
         * first requests are finished
index 7a14582dc8a4099fc8b2af5c2290905834887f01..2cc188c8203fca3b75fcdda0378538ba15cc31d6 100644 (file)
@@ -175,6 +175,7 @@ struct dasd_ccw_req {
        struct dasd_block *block;       /* the originating block device */
        struct dasd_device *memdev;     /* the device used to allocate this */
        struct dasd_device *startdev;   /* device the request is started on */
+       struct dasd_device *basedev;    /* base device if no block->base */
        void *cpaddr;                   /* address of ccw or tcw */
        unsigned char cpmode;           /* 0 = cmd mode, 1 = itcw */
        char status;                    /* status of this request */
@@ -321,7 +322,7 @@ struct dasd_discipline {
        int (*term_IO) (struct dasd_ccw_req *);
        void (*handle_terminated_request) (struct dasd_ccw_req *);
        int (*format_device) (struct dasd_device *,
-                             struct format_data_t *);
+                             struct format_data_t *, int enable_PAV);
        int (*free_cp) (struct dasd_ccw_req *, struct request *);
 
        /*
index 25a0f2f8b0b98c93376294e25512e43c53701f72..ad62608e4175b2a75b11c04350c2ca1fb5903938 100644 (file)
@@ -203,7 +203,9 @@ static int
 dasd_format(struct dasd_block *block, struct format_data_t *fdata)
 {
        struct dasd_device *base;
-       int rc;
+       int enable_PAV = 1;
+       int rc, retries;
+       int start, stop;
 
        base = block->base;
        if (base->discipline->format_device == NULL)
@@ -231,11 +233,30 @@ dasd_format(struct dasd_block *block, struct format_data_t *fdata)
                bdput(bdev);
        }
 
-       rc = base->discipline->format_device(base, fdata);
-       if (rc)
-               return rc;
-
-       return 0;
+       retries = 255;
+       /* backup start- and endtrack for retries */
+       start = fdata->start_unit;
+       stop = fdata->stop_unit;
+       do {
+               rc = base->discipline->format_device(base, fdata, enable_PAV);
+               if (rc) {
+                       if (rc == -EAGAIN) {
+                               retries--;
+                               /* disable PAV in case of errors */
+                               enable_PAV = 0;
+                               fdata->start_unit = start;
+                               fdata->stop_unit = stop;
+                       } else
+                               return rc;
+               } else
+                       /* success */
+                       break;
+       } while (retries);
+
+       if (!retries)
+               return -EIO;
+       else
+               return 0;
 }
 
 /*