s390/dasd: channel path aware error recovery
authorStefan Haberland <sth@linux.vnet.ibm.com>
Mon, 8 Aug 2016 13:56:54 +0000 (15:56 +0200)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Mon, 12 Dec 2016 11:05:03 +0000 (12:05 +0100)
With this feature, the DASD device driver more robustly handles DASDs
that are attached via multiple channel paths and are subject to
constant Interface-Control-Checks (IFCCs) and Channel-Control-Checks
(CCCs) or loss of High-Performance-FICON (HPF) functionality on one or
more of these paths.

If a channel path does not work correctly, it is removed from normal
operation as long as other channel paths are available. All extended
error recovery states can be queried and reset via user space
interfaces.

Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com>
Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Jan Hoeppner <hoeppner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/include/asm/scsw.h
drivers/s390/block/dasd.c
drivers/s390/block/dasd_3990_erp.c
drivers/s390/block/dasd_devmap.c
drivers/s390/block/dasd_eckd.c
drivers/s390/block/dasd_eckd.h
drivers/s390/block/dasd_int.h

index 4af99cdaddf50436f13cb8fe4633a17acbca57b0..17a7904f001a3ec4acafdc6b56bd4cf80d9e70e3 100644 (file)
@@ -96,7 +96,8 @@ struct tm_scsw {
        u32 dstat:8;
        u32 cstat:8;
        u32 fcxs:8;
-       u32 schxs:8;
+       u32 ifob:1;
+       u32 sesq:7;
 } __attribute__ ((packed));
 
 /**
@@ -177,6 +178,9 @@ union scsw {
 #define SCHN_STAT_INTF_CTRL_CHK         0x02
 #define SCHN_STAT_CHAIN_CHECK   0x01
 
+#define SCSW_SESQ_DEV_NOFCX     3
+#define SCSW_SESQ_PATH_NOFCX    4
+
 /*
  * architectured values for first sense byte
  */
index 13a337faef43fbf76544a2d99cebfb7b7be559fd..0e3fdfdbd09846c454987913dfd678c4ec725991 100644 (file)
@@ -69,6 +69,7 @@ static void dasd_block_tasklet(struct dasd_block *);
 static void do_kick_device(struct work_struct *);
 static void do_restore_device(struct work_struct *);
 static void do_reload_device(struct work_struct *);
+static void do_requeue_requests(struct work_struct *);
 static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *);
 static void dasd_device_timeout(unsigned long);
 static void dasd_block_timeout(unsigned long);
@@ -125,6 +126,7 @@ struct dasd_device *dasd_alloc_device(void)
        INIT_WORK(&device->kick_work, do_kick_device);
        INIT_WORK(&device->restore_device, do_restore_device);
        INIT_WORK(&device->reload_device, do_reload_device);
+       INIT_WORK(&device->requeue_requests, do_requeue_requests);
        device->state = DASD_STATE_NEW;
        device->target = DASD_STATE_NEW;
        mutex_init(&device->state_mutex);
@@ -1622,6 +1624,13 @@ void dasd_generic_handle_state_change(struct dasd_device *device)
 }
 EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change);
 
+static int dasd_check_hpf_error(struct irb *irb)
+{
+       return (scsw_tm_is_valid_schxs(&irb->scsw) &&
+           (irb->scsw.tm.sesq == SCSW_SESQ_DEV_NOFCX ||
+            irb->scsw.tm.sesq == SCSW_SESQ_PATH_NOFCX));
+}
+
 /*
  * Interrupt handler for "normal" ssch-io based dasd devices.
  */
@@ -1748,6 +1757,13 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
                                          struct dasd_ccw_req, devlist);
                }
        } else {  /* error */
+               /* check for HPF error
+                * call discipline function to requeue all requests
+                * and disable HPF accordingly
+                */
+               if (cqr->cpmode && dasd_check_hpf_error(irb) &&
+                   device->discipline->handle_hpf_error)
+                       device->discipline->handle_hpf_error(device, irb);
                /*
                 * If we don't want complex ERP for this request, then just
                 * reset this and retry it in the fastpath
@@ -2924,10 +2940,10 @@ static int _dasd_requeue_request(struct dasd_ccw_req *cqr)
 
        if (!block)
                return -EINVAL;
-       spin_lock_irqsave(&block->queue_lock, flags);
+       spin_lock_irqsave(&block->request_queue_lock, flags);
        req = (struct request *) cqr->callback_data;
        blk_requeue_request(block->request_queue, req);
-       spin_unlock_irqrestore(&block->queue_lock, flags);
+       spin_unlock_irqrestore(&block->request_queue_lock, flags);
 
        return 0;
 }
@@ -3701,7 +3717,7 @@ EXPORT_SYMBOL_GPL(dasd_generic_notify);
 void dasd_generic_path_event(struct ccw_device *cdev, int *path_event)
 {
        struct dasd_device *device;
-       int chp, oldopm;
+       int chp, oldopm, hpfpm, ifccpm;
 
        device = dasd_device_from_cdev_locked(cdev);
        if (IS_ERR(device))
@@ -3733,7 +3749,30 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event)
                                device->discipline->kick_validate(device);
                }
        }
-       if (oldopm && !dasd_path_get_opm(device)) {
+       hpfpm = dasd_path_get_hpfpm(device);
+       ifccpm = dasd_path_get_ifccpm(device);
+       if (!dasd_path_get_opm(device) && hpfpm) {
+               /*
+                * device has no operational paths but at least one path is
+                * disabled due to HPF errors
+                * disable HPF at all and use the path(s) again
+                */
+               if (device->discipline->disable_hpf)
+                       device->discipline->disable_hpf(device);
+               dasd_device_set_stop_bits(device, DASD_STOPPED_NOT_ACC);
+               dasd_path_set_tbvpm(device, hpfpm);
+               dasd_schedule_device_bh(device);
+               dasd_schedule_requeue(device);
+       } else if (!dasd_path_get_opm(device) && ifccpm) {
+               /*
+                * device has no operational paths but at least one path is
+                * disabled due to IFCC errors
+                * trigger path verification on paths with IFCC errors
+                */
+               dasd_path_set_tbvpm(device, ifccpm);
+               dasd_schedule_device_bh(device);
+       }
+       if (oldopm && !dasd_path_get_opm(device) && !hpfpm && !ifccpm) {
                dev_warn(&device->cdev->dev,
                         "No verified channel paths remain for the device\n");
                DBF_DEV_EVENT(DBF_WARNING, device,
@@ -3757,30 +3796,18 @@ int dasd_generic_verify_path(struct dasd_device *device, __u8 lpm)
 }
 EXPORT_SYMBOL_GPL(dasd_generic_verify_path);
 
-
-int dasd_generic_pm_freeze(struct ccw_device *cdev)
+/*
+ * clear active requests and requeue them to block layer if possible
+ */
+static int dasd_generic_requeue_all_requests(struct dasd_device *device)
 {
-       struct dasd_device *device = dasd_device_from_cdev(cdev);
-       struct list_head freeze_queue;
+       struct list_head requeue_queue;
        struct dasd_ccw_req *cqr, *n;
        struct dasd_ccw_req *refers;
        int rc;
 
-       if (IS_ERR(device))
-               return PTR_ERR(device);
-
-       /* mark device as suspended */
-       set_bit(DASD_FLAG_SUSPENDED, &device->flags);
-
-       if (device->discipline->freeze)
-               rc = device->discipline->freeze(device);
-
-       /* disallow new I/O  */
-       dasd_device_set_stop_bits(device, DASD_STOPPED_PM);
-
-       /* clear active requests and requeue them to block layer if possible */
-       INIT_LIST_HEAD(&freeze_queue);
-       spin_lock_irq(get_ccwdev_lock(cdev));
+       INIT_LIST_HEAD(&requeue_queue);
+       spin_lock_irq(get_ccwdev_lock(device->cdev));
        rc = 0;
        list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) {
                /* Check status and move request to flush_queue */
@@ -3791,25 +3818,22 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev)
                                dev_err(&device->cdev->dev,
                                        "Unable to terminate request %p "
                                        "on suspend\n", cqr);
-                               spin_unlock_irq(get_ccwdev_lock(cdev));
+                               spin_unlock_irq(get_ccwdev_lock(device->cdev));
                                dasd_put_device(device);
                                return rc;
                        }
                }
-               list_move_tail(&cqr->devlist, &freeze_queue);
+               list_move_tail(&cqr->devlist, &requeue_queue);
        }
-       spin_unlock_irq(get_ccwdev_lock(cdev));
+       spin_unlock_irq(get_ccwdev_lock(device->cdev));
 
-       list_for_each_entry_safe(cqr, n, &freeze_queue, devlist) {
+       list_for_each_entry_safe(cqr, n, &requeue_queue, devlist) {
                wait_event(dasd_flush_wq,
                           (cqr->status != DASD_CQR_CLEAR_PENDING));
-               if (cqr->status == DASD_CQR_CLEARED)
-                       cqr->status = DASD_CQR_QUEUED;
 
-               /* requeue requests to blocklayer will only work for
-                  block device requests */
-               if (_dasd_requeue_request(cqr))
-                       continue;
+               /* mark sleepon requests as ended */
+               if (cqr->callback_data == DASD_SLEEPON_START_TAG)
+                       cqr->callback_data = DASD_SLEEPON_END_TAG;
 
                /* remove requests from device and block queue */
                list_del_init(&cqr->devlist);
@@ -3821,6 +3845,14 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev)
                        dasd_free_erp_request(cqr, cqr->memdev);
                        cqr = refers;
                }
+
+               /*
+                * requeue requests to blocklayer will only work
+                * for block device requests
+                */
+               if (_dasd_requeue_request(cqr))
+                       continue;
+
                if (cqr->block)
                        list_del_init(&cqr->blocklist);
                cqr->block->base->discipline->free_cp(
@@ -3831,15 +3863,56 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev)
         * if requests remain then they are internal request
         * and go back to the device queue
         */
-       if (!list_empty(&freeze_queue)) {
+       if (!list_empty(&requeue_queue)) {
                /* move freeze_queue to start of the ccw_queue */
-               spin_lock_irq(get_ccwdev_lock(cdev));
-               list_splice_tail(&freeze_queue, &device->ccw_queue);
-               spin_unlock_irq(get_ccwdev_lock(cdev));
+               spin_lock_irq(get_ccwdev_lock(device->cdev));
+               list_splice_tail(&requeue_queue, &device->ccw_queue);
+               spin_unlock_irq(get_ccwdev_lock(device->cdev));
        }
-       dasd_put_device(device);
+       /* wake up generic waitqueue for eventually ended sleepon requests */
+       wake_up(&generic_waitq);
        return rc;
 }
+
+static void do_requeue_requests(struct work_struct *work)
+{
+       struct dasd_device *device = container_of(work, struct dasd_device,
+                                                 requeue_requests);
+       dasd_generic_requeue_all_requests(device);
+       dasd_device_remove_stop_bits(device, DASD_STOPPED_NOT_ACC);
+       if (device->block)
+               dasd_schedule_block_bh(device->block);
+       dasd_put_device(device);
+}
+
+void dasd_schedule_requeue(struct dasd_device *device)
+{
+       dasd_get_device(device);
+       /* queue call to dasd_reload_device to the kernel event daemon. */
+       if (!schedule_work(&device->requeue_requests))
+               dasd_put_device(device);
+}
+EXPORT_SYMBOL(dasd_schedule_requeue);
+
+int dasd_generic_pm_freeze(struct ccw_device *cdev)
+{
+       struct dasd_device *device = dasd_device_from_cdev(cdev);
+       int rc;
+
+       if (IS_ERR(device))
+               return PTR_ERR(device);
+
+       /* mark device as suspended */
+       set_bit(DASD_FLAG_SUSPENDED, &device->flags);
+
+       if (device->discipline->freeze)
+               rc = device->discipline->freeze(device);
+
+       /* disallow new I/O  */
+       dasd_device_set_stop_bits(device, DASD_STOPPED_PM);
+
+       return dasd_generic_requeue_all_requests(device);
+}
 EXPORT_SYMBOL_GPL(dasd_generic_pm_freeze);
 
 int dasd_generic_restore_device(struct ccw_device *cdev)
index 9236e2c0c3d915caef126db1838f89f1f99bb836..95f7645e3c37e564c7ab9e012d58a7076f25be1e 100644 (file)
@@ -2208,6 +2208,51 @@ dasd_3990_erp_inspect_32(struct dasd_ccw_req * erp, char *sense)
 
 }                              /* end dasd_3990_erp_inspect_32 */
 
+static void dasd_3990_erp_disable_path(struct dasd_device *device, __u8 lpum)
+{
+       int pos = pathmask_to_pos(lpum);
+
+       /* no remaining path, cannot disable */
+       if (!(dasd_path_get_opm(device) & ~lpum))
+               return;
+
+       dev_err(&device->cdev->dev,
+               "Path %x.%02x (pathmask %02x) is disabled - IFCC threshold exceeded\n",
+               device->path[pos].cssid, device->path[pos].chpid, lpum);
+       dasd_path_remove_opm(device, lpum);
+       dasd_path_add_ifccpm(device, lpum);
+       device->path[pos].errorclk = 0;
+       atomic_set(&device->path[pos].error_count, 0);
+}
+
+static void dasd_3990_erp_account_error(struct dasd_ccw_req *erp)
+{
+       struct dasd_device *device = erp->startdev;
+       __u8 lpum = erp->refers->irb.esw.esw1.lpum;
+       int pos = pathmask_to_pos(lpum);
+       unsigned long long clk;
+
+       if (!device->path_thrhld)
+               return;
+
+       clk = get_tod_clock();
+       /*
+        * check if the last error is longer ago than the timeout,
+        * if so reset error state
+        */
+       if ((tod_to_ns(clk - device->path[pos].errorclk) / NSEC_PER_SEC)
+           >= device->path_interval) {
+               atomic_set(&device->path[pos].error_count, 0);
+               device->path[pos].errorclk = 0;
+       }
+       atomic_inc(&device->path[pos].error_count);
+       device->path[pos].errorclk = clk;
+       /* threshold exceeded disable path if possible */
+       if (atomic_read(&device->path[pos].error_count) >=
+           device->path_thrhld)
+               dasd_3990_erp_disable_path(device, lpum);
+}
+
 /*
  *****************************************************************************
  * main ERP control functions (24 and 32 byte sense)
@@ -2237,6 +2282,7 @@ dasd_3990_erp_control_check(struct dasd_ccw_req *erp)
                                           | SCHN_STAT_CHN_CTRL_CHK)) {
                DBF_DEV_EVENT(DBF_WARNING, device, "%s",
                            "channel or interface control check");
+               dasd_3990_erp_account_error(erp);
                erp = dasd_3990_erp_action_4(erp, NULL);
        }
        return erp;
index 4101ab000c1664f9267ec244060eabfd35129924..84ca314c87e3362431e737e66c84a0396d4acc00 100644 (file)
@@ -977,10 +977,12 @@ dasd_access_show(struct device *dev, struct device_attribute *attr,
        if (IS_ERR(device))
                return PTR_ERR(device);
 
-       if (device->discipline->host_access_count)
-               count = device->discipline->host_access_count(device);
-       else
+       if (!device->discipline)
+               count = -ENODEV;
+       else if (!device->discipline->host_access_count)
                count = -EOPNOTSUPP;
+       else
+               count = device->discipline->host_access_count(device);
 
        dasd_put_device(device);
        if (count < 0)
@@ -1341,6 +1343,50 @@ dasd_timeout_store(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR(timeout, 0644,
                   dasd_timeout_show, dasd_timeout_store);
 
+
+static ssize_t
+dasd_path_reset_store(struct device *dev, struct device_attribute *attr,
+                     const char *buf, size_t count)
+{
+       struct dasd_device *device;
+       unsigned int val;
+
+       device = dasd_device_from_cdev(to_ccwdev(dev));
+       if (IS_ERR(device))
+               return -ENODEV;
+
+       if ((kstrtouint(buf, 16, &val) != 0) || val > 0xff)
+               val = 0;
+
+       if (device->discipline && device->discipline->reset_path)
+               device->discipline->reset_path(device, (__u8) val);
+
+       dasd_put_device(device);
+       return count;
+}
+
+static DEVICE_ATTR(path_reset, 0200, NULL, dasd_path_reset_store);
+
+static ssize_t dasd_hpf_show(struct device *dev, struct device_attribute *attr,
+                            char *buf)
+{
+       struct dasd_device *device;
+       int hpf;
+
+       device = dasd_device_from_cdev(to_ccwdev(dev));
+       if (IS_ERR(device))
+               return -ENODEV;
+       if (!device->discipline || !device->discipline->hpf_enabled) {
+               dasd_put_device(device);
+               return snprintf(buf, PAGE_SIZE, "%d\n", dasd_nofcx);
+       }
+       hpf = device->discipline->hpf_enabled(device);
+       dasd_put_device(device);
+       return snprintf(buf, PAGE_SIZE, "%d\n", hpf);
+}
+
+static DEVICE_ATTR(hpf, 0444, dasd_hpf_show, NULL);
+
 static ssize_t dasd_reservation_policy_show(struct device *dev,
                                            struct device_attribute *attr,
                                            char *buf)
@@ -1432,7 +1478,7 @@ static ssize_t dasd_pm_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
 {
        struct dasd_device *device;
-       u8 opm, nppm, cablepm, cuirpm, hpfpm;
+       u8 opm, nppm, cablepm, cuirpm, hpfpm, ifccpm;
 
        device = dasd_device_from_cdev(to_ccwdev(dev));
        if (IS_ERR(device))
@@ -1443,14 +1489,109 @@ static ssize_t dasd_pm_show(struct device *dev,
        cablepm = dasd_path_get_cablepm(device);
        cuirpm = dasd_path_get_cuirpm(device);
        hpfpm = dasd_path_get_hpfpm(device);
+       ifccpm = dasd_path_get_ifccpm(device);
        dasd_put_device(device);
 
-       return sprintf(buf, "%02x %02x %02x %02x %02x\n", opm, nppm,
-                      cablepm, cuirpm, hpfpm);
+       return sprintf(buf, "%02x %02x %02x %02x %02x %02x\n", opm, nppm,
+                      cablepm, cuirpm, hpfpm, ifccpm);
 }
 
 static DEVICE_ATTR(path_masks, 0444, dasd_pm_show, NULL);
 
+/*
+ * threshold value for IFCC/CCC errors
+ */
+static ssize_t
+dasd_path_threshold_show(struct device *dev,
+                         struct device_attribute *attr, char *buf)
+{
+       struct dasd_device *device;
+       int len;
+
+       device = dasd_device_from_cdev(to_ccwdev(dev));
+       if (IS_ERR(device))
+               return -ENODEV;
+       len = snprintf(buf, PAGE_SIZE, "%lu\n", device->path_thrhld);
+       dasd_put_device(device);
+       return len;
+}
+
+static ssize_t
+dasd_path_threshold_store(struct device *dev, struct device_attribute *attr,
+                          const char *buf, size_t count)
+{
+       struct dasd_device *device;
+       unsigned long flags;
+       unsigned long val;
+
+       device = dasd_device_from_cdev(to_ccwdev(dev));
+       if (IS_ERR(device))
+               return -ENODEV;
+
+       if ((kstrtoul(buf, 10, &val) != 0) ||
+           (val > DASD_THRHLD_MAX) || val == 0) {
+               dasd_put_device(device);
+               return -EINVAL;
+       }
+       spin_lock_irqsave(get_ccwdev_lock(to_ccwdev(dev)), flags);
+       if (val)
+               device->path_thrhld = val;
+       spin_unlock_irqrestore(get_ccwdev_lock(to_ccwdev(dev)), flags);
+       dasd_put_device(device);
+       return count;
+}
+
+static DEVICE_ATTR(path_threshold, 0644, dasd_path_threshold_show,
+                  dasd_path_threshold_store);
+/*
+ * interval for IFCC/CCC checks
+ * meaning time with no IFCC/CCC error before the error counter
+ * gets reset
+ */
+static ssize_t
+dasd_path_interval_show(struct device *dev,
+                       struct device_attribute *attr, char *buf)
+{
+       struct dasd_device *device;
+       int len;
+
+       device = dasd_device_from_cdev(to_ccwdev(dev));
+       if (IS_ERR(device))
+               return -ENODEV;
+       len = snprintf(buf, PAGE_SIZE, "%lu\n", device->path_interval);
+       dasd_put_device(device);
+       return len;
+}
+
+static ssize_t
+dasd_path_interval_store(struct device *dev, struct device_attribute *attr,
+              const char *buf, size_t count)
+{
+       struct dasd_device *device;
+       unsigned long flags;
+       unsigned long val;
+
+       device = dasd_device_from_cdev(to_ccwdev(dev));
+       if (IS_ERR(device))
+               return -ENODEV;
+
+       if ((kstrtoul(buf, 10, &val) != 0) ||
+           (val > DASD_INTERVAL_MAX) || val == 0) {
+               dasd_put_device(device);
+               return -EINVAL;
+       }
+       spin_lock_irqsave(get_ccwdev_lock(to_ccwdev(dev)), flags);
+       if (val)
+               device->path_interval = val;
+       spin_unlock_irqrestore(get_ccwdev_lock(to_ccwdev(dev)), flags);
+       dasd_put_device(device);
+       return count;
+}
+
+static DEVICE_ATTR(path_interval, 0644, dasd_path_interval_show,
+                  dasd_path_interval_store);
+
+
 static struct attribute * dasd_attrs[] = {
        &dev_attr_readonly.attr,
        &dev_attr_discipline.attr,
@@ -1471,6 +1612,10 @@ static struct attribute * dasd_attrs[] = {
        &dev_attr_safe_offline.attr,
        &dev_attr_host_access_count.attr,
        &dev_attr_path_masks.attr,
+       &dev_attr_path_threshold.attr,
+       &dev_attr_path_interval.attr,
+       &dev_attr_path_reset.attr,
+       &dev_attr_hpf.attr,
        NULL,
 };
 
index 51fdf31aa8ebaefe68fd24f4b0e16e68b98717e9..67bf50c9946f939a663fc280cacc0a9d21fe9bef 100644 (file)
@@ -1044,6 +1044,9 @@ static void dasd_eckd_clear_conf_data(struct dasd_device *device)
        for (i = 0; i < 8; i++) {
                kfree(device->path[i].conf_data);
                device->path[i].conf_data = NULL;
+               device->path[i].cssid = 0;
+               device->path[i].ssid = 0;
+               device->path[i].chpid = 0;
        }
 }
 
@@ -1057,9 +1060,12 @@ static int dasd_eckd_read_conf(struct dasd_device *device)
        struct dasd_eckd_private *private, path_private;
        struct dasd_uid *uid;
        char print_path_uid[60], print_device_uid[60];
+       struct channel_path_desc *chp_desc;
+       struct subchannel_id sch_id;
 
        private = device->private;
        opm = ccw_device_get_path_mask(device->cdev);
+       ccw_device_get_schid(device->cdev, &sch_id);
        conf_data_saved = 0;
        path_err = 0;
        /* get configuration data per operational path */
@@ -1097,6 +1103,12 @@ static int dasd_eckd_read_conf(struct dasd_device *device)
                        pos = pathmask_to_pos(lpm);
                        /* store per path conf_data */
                        device->path[pos].conf_data = conf_data;
+                       device->path[pos].cssid = sch_id.cssid;
+                       device->path[pos].ssid = sch_id.ssid;
+                       chp_desc = ccw_device_get_chp_desc(device->cdev, pos);
+                       if (chp_desc)
+                               device->path[pos].chpid = chp_desc->chpid;
+                       kfree(chp_desc);
                        /*
                         * build device UID that other path data
                         * can be compared to it
@@ -1157,6 +1169,12 @@ static int dasd_eckd_read_conf(struct dasd_device *device)
                        pos = pathmask_to_pos(lpm);
                        /* store per path conf_data */
                        device->path[pos].conf_data = conf_data;
+                       device->path[pos].cssid = sch_id.cssid;
+                       device->path[pos].ssid = sch_id.ssid;
+                       chp_desc = ccw_device_get_chp_desc(device->cdev, pos);
+                       if (chp_desc)
+                               device->path[pos].chpid = chp_desc->chpid;
+                       kfree(chp_desc);
                        path_private.conf_data = NULL;
                        path_private.conf_len = 0;
                }
@@ -1179,6 +1197,32 @@ static int dasd_eckd_read_conf(struct dasd_device *device)
        return path_err;
 }
 
+static u32 get_fcx_max_data(struct dasd_device *device)
+{
+       struct dasd_eckd_private *private = device->private;
+       int fcx_in_css, fcx_in_gneq, fcx_in_features;
+       int tpm, mdc;
+
+       if (dasd_nofcx)
+               return 0;
+       /* is transport mode supported? */
+       fcx_in_css = css_general_characteristics.fcx;
+       fcx_in_gneq = private->gneq->reserved2[7] & 0x04;
+       fcx_in_features = private->features.feature[40] & 0x80;
+       tpm = fcx_in_css && fcx_in_gneq && fcx_in_features;
+
+       if (!tpm)
+               return 0;
+
+       mdc = ccw_device_get_mdc(device->cdev, 0);
+       if (mdc < 0) {
+               dev_warn(&device->cdev->dev, "Detecting the maximum supported data size for zHPF requests failed\n");
+               return 0;
+       } else {
+               return (u32)mdc * FCX_MAX_DATA_FACTOR;
+       }
+}
+
 static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm)
 {
        struct dasd_eckd_private *private = device->private;
@@ -1438,6 +1482,19 @@ static int dasd_eckd_verify_path(struct dasd_device *device, __u8 lpm)
        return 0;
 }
 
+static void dasd_eckd_reset_path(struct dasd_device *device, __u8 pm)
+{
+       struct dasd_eckd_private *private = device->private;
+       unsigned long flags;
+
+       if (!private->fcx_max_data)
+               private->fcx_max_data = get_fcx_max_data(device);
+       spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
+       dasd_path_set_tbvpm(device, pm ? : dasd_path_get_notoperpm(device));
+       dasd_schedule_device_bh(device);
+       spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
+}
+
 static int dasd_eckd_read_features(struct dasd_device *device)
 {
        struct dasd_eckd_private *private = device->private;
@@ -1634,32 +1691,6 @@ static void dasd_eckd_kick_validate_server(struct dasd_device *device)
                dasd_put_device(device);
 }
 
-static u32 get_fcx_max_data(struct dasd_device *device)
-{
-       struct dasd_eckd_private *private = device->private;
-       int fcx_in_css, fcx_in_gneq, fcx_in_features;
-       int tpm, mdc;
-
-       if (dasd_nofcx)
-               return 0;
-       /* is transport mode supported? */
-       fcx_in_css = css_general_characteristics.fcx;
-       fcx_in_gneq = private->gneq->reserved2[7] & 0x04;
-       fcx_in_features = private->features.feature[40] & 0x80;
-       tpm = fcx_in_css && fcx_in_gneq && fcx_in_features;
-
-       if (!tpm)
-               return 0;
-
-       mdc = ccw_device_get_mdc(device->cdev, 0);
-       if (mdc < 0) {
-               dev_warn(&device->cdev->dev, "Detecting the maximum supported"
-                        " data size for zHPF requests failed\n");
-               return 0;
-       } else
-               return (u32)mdc * FCX_MAX_DATA_FACTOR;
-}
-
 /*
  * Check device characteristics.
  * If the device is accessible using ECKD discipline, the device is enabled.
@@ -1711,10 +1742,11 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
        if (rc)
                goto out_err1;
 
-       /* set default timeout */
+       /* set some default values */
        device->default_expires = DASD_EXPIRES;
-       /* set default retry count */
        device->default_retries = DASD_RETRIES;
+       device->path_thrhld = DASD_ECKD_PATH_THRHLD;
+       device->path_interval = DASD_ECKD_PATH_INTERVAL;
 
        if (private->gneq) {
                value = 1;
@@ -1828,6 +1860,9 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device)
                        private->conf_len = 0;
                }
                device->path[i].conf_data = NULL;
+               device->path[i].cssid = 0;
+               device->path[i].ssid = 0;
+               device->path[i].chpid = 0;
        }
        kfree(private->conf_data);
        private->conf_data = NULL;
@@ -4765,7 +4800,8 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device,
                       req, scsw_cc(&irb->scsw), scsw_fctl(&irb->scsw),
                       scsw_actl(&irb->scsw), scsw_stctl(&irb->scsw),
                       scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw),
-                      irb->scsw.tm.fcxs, irb->scsw.tm.schxs,
+                      irb->scsw.tm.fcxs,
+                      (irb->scsw.tm.ifob << 7) | irb->scsw.tm.sesq,
                       req ? req->intrc : 0);
        len += sprintf(page + len, PRINTK_HEADER
                       " device %s: Failing TCW: %p\n",
@@ -5288,11 +5324,10 @@ static int dasd_hosts_print(struct dasd_device *device, struct seq_file *m)
  */
 static int
 dasd_eckd_psf_cuir_response(struct dasd_device *device, int response,
-                           __u32 message_id,
-                           struct channel_path_desc *desc,
-                           struct subchannel_id sch_id)
+                           __u32 message_id, __u8 lpum)
 {
        struct dasd_psf_cuir_response *psf_cuir;
+       int pos = pathmask_to_pos(lpum);
        struct dasd_ccw_req *cqr;
        struct ccw1 *ccw;
        int rc;
@@ -5310,11 +5345,10 @@ dasd_eckd_psf_cuir_response(struct dasd_device *device, int response,
        psf_cuir = (struct dasd_psf_cuir_response *)cqr->data;
        psf_cuir->order = PSF_ORDER_CUIR_RESPONSE;
        psf_cuir->cc = response;
-       if (desc)
-               psf_cuir->chpid = desc->chpid;
+       psf_cuir->chpid = device->path[pos].chpid;
        psf_cuir->message_id = message_id;
-       psf_cuir->cssid = sch_id.cssid;
-       psf_cuir->ssid = sch_id.ssid;
+       psf_cuir->cssid = device->path[pos].cssid;
+       psf_cuir->ssid = device->path[pos].ssid;
        ccw = cqr->cpaddr;
        ccw->cmd_code = DASD_ECKD_CCW_PSF;
        ccw->cda = (__u32)(addr_t)psf_cuir;
@@ -5427,27 +5461,23 @@ static int dasd_eckd_cuir_scope(struct dasd_device *device, __u8 lpum,
 }
 
 static void dasd_eckd_cuir_notify_user(struct dasd_device *device,
-                                      unsigned long paths,
-                                      struct subchannel_id sch_id, int action)
+                                      unsigned long paths, int action)
 {
-       struct channel_path_desc *desc;
        int pos;
 
        while (paths) {
                /* get position of bit in mask */
-               pos = ffs(paths) - 1;
+               pos = 8 - ffs(paths);
                /* get channel path descriptor from this position */
-               desc = ccw_device_get_chp_desc(device->cdev, 7 - pos);
                if (action == CUIR_QUIESCE)
-                       pr_warn("Service on the storage server caused path "
-                               "%x.%02x to go offline", sch_id.cssid,
-                               desc ? desc->chpid : 0);
+                       pr_warn("Service on the storage server caused path %x.%02x to go offline",
+                               device->path[pos].cssid,
+                               device->path[pos].chpid);
                else if (action == CUIR_RESUME)
-                       pr_info("Path %x.%02x is back online after service "
-                               "on the storage server", sch_id.cssid,
-                               desc ? desc->chpid : 0);
-               kfree(desc);
-               clear_bit(pos, &paths);
+                       pr_info("Path %x.%02x is back online after service on the storage server",
+                               device->path[pos].cssid,
+                               device->path[pos].chpid);
+               clear_bit(7 - pos, &paths);
        }
 }
 
@@ -5480,7 +5510,6 @@ static int dasd_eckd_cuir_remove_path(struct dasd_device *device, __u8 lpum,
  * notify the already set offline devices again
  */
 static int dasd_eckd_cuir_quiesce(struct dasd_device *device, __u8 lpum,
-                                 struct subchannel_id sch_id,
                                  struct dasd_cuir_message *cuir)
 {
        struct dasd_eckd_private *private = device->private;
@@ -5535,14 +5564,13 @@ static int dasd_eckd_cuir_quiesce(struct dasd_device *device, __u8 lpum,
                }
        }
        /* notify user about all paths affected by CUIR action */
-       dasd_eckd_cuir_notify_user(device, paths, sch_id, CUIR_QUIESCE);
+       dasd_eckd_cuir_notify_user(device, paths, CUIR_QUIESCE);
        return 0;
 out_err:
        return tbcpm;
 }
 
 static int dasd_eckd_cuir_resume(struct dasd_device *device, __u8 lpum,
-                                struct subchannel_id sch_id,
                                 struct dasd_cuir_message *cuir)
 {
        struct dasd_eckd_private *private = device->private;
@@ -5601,7 +5629,7 @@ static int dasd_eckd_cuir_resume(struct dasd_device *device, __u8 lpum,
                }
        }
        /* notify user about all paths affected by CUIR action */
-       dasd_eckd_cuir_notify_user(device, paths, sch_id, CUIR_RESUME);
+       dasd_eckd_cuir_notify_user(device, paths, CUIR_RESUME);
        return 0;
 }
 
@@ -5609,38 +5637,31 @@ static void dasd_eckd_handle_cuir(struct dasd_device *device, void *messages,
                                 __u8 lpum)
 {
        struct dasd_cuir_message *cuir = messages;
-       struct channel_path_desc *desc;
-       struct subchannel_id sch_id;
-       int pos, response;
+       int response;
 
        DBF_DEV_EVENT(DBF_WARNING, device,
                      "CUIR request: %016llx %016llx %016llx %08x",
                      ((u64 *)cuir)[0], ((u64 *)cuir)[1], ((u64 *)cuir)[2],
                      ((u32 *)cuir)[3]);
-       ccw_device_get_schid(device->cdev, &sch_id);
-       pos = pathmask_to_pos(lpum);
-       desc = ccw_device_get_chp_desc(device->cdev, pos);
 
        if (cuir->code == CUIR_QUIESCE) {
                /* quiesce */
-               if (dasd_eckd_cuir_quiesce(device, lpum, sch_id, cuir))
+               if (dasd_eckd_cuir_quiesce(device, lpum, cuir))
                        response = PSF_CUIR_LAST_PATH;
                else
                        response = PSF_CUIR_COMPLETED;
        } else if (cuir->code == CUIR_RESUME) {
                /* resume */
-               dasd_eckd_cuir_resume(device, lpum, sch_id, cuir);
+               dasd_eckd_cuir_resume(device, lpum, cuir);
                response = PSF_CUIR_COMPLETED;
        } else
                response = PSF_CUIR_NOT_SUPPORTED;
 
        dasd_eckd_psf_cuir_response(device, response,
-                                   cuir->message_id, desc, sch_id);
+                                   cuir->message_id, lpum);
        DBF_DEV_EVENT(DBF_WARNING, device,
                      "CUIR response: %d on message ID %08x", response,
                      cuir->message_id);
-       /* free descriptor copy */
-       kfree(desc);
        /* to make sure there is no attention left schedule work again */
        device->discipline->check_attention(device, lpum);
 }
@@ -5687,6 +5708,63 @@ static int dasd_eckd_check_attention(struct dasd_device *device, __u8 lpum)
        return 0;
 }
 
+static int dasd_eckd_disable_hpf_path(struct dasd_device *device, __u8 lpum)
+{
+       if (~lpum & dasd_path_get_opm(device)) {
+               dasd_path_add_nohpfpm(device, lpum);
+               dasd_path_remove_opm(device, lpum);
+               dev_err(&device->cdev->dev,
+                       "Channel path %02X lost HPF functionality and is disabled\n",
+                       lpum);
+               return 1;
+       }
+       return 0;
+}
+
+static void dasd_eckd_disable_hpf_device(struct dasd_device *device)
+{
+       struct dasd_eckd_private *private = device->private;
+
+       dev_err(&device->cdev->dev,
+               "High Performance FICON disabled\n");
+       private->fcx_max_data = 0;
+}
+
+static int dasd_eckd_hpf_enabled(struct dasd_device *device)
+{
+       struct dasd_eckd_private *private = device->private;
+
+       return private->fcx_max_data ? 1 : 0;
+}
+
+static void dasd_eckd_handle_hpf_error(struct dasd_device *device,
+                                      struct irb *irb)
+{
+       struct dasd_eckd_private *private = device->private;
+
+       if (!private->fcx_max_data) {
+               /* sanity check for no HPF, the error makes no sense */
+               DBF_DEV_EVENT(DBF_WARNING, device, "%s",
+                             "Trying to disable HPF for a non HPF device");
+               return;
+       }
+       if (irb->scsw.tm.sesq == SCSW_SESQ_DEV_NOFCX) {
+               dasd_eckd_disable_hpf_device(device);
+       } else if (irb->scsw.tm.sesq == SCSW_SESQ_PATH_NOFCX) {
+               if (dasd_eckd_disable_hpf_path(device, irb->esw.esw1.lpum))
+                       return;
+               dasd_eckd_disable_hpf_device(device);
+               dasd_path_set_tbvpm(device,
+                                 dasd_path_get_hpfpm(device));
+       }
+       /*
+        * prevent that any new I/O ist started on the device and schedule a
+        * requeue of existing requests
+        */
+       dasd_device_set_stop_bits(device, DASD_STOPPED_NOT_ACC);
+       dasd_schedule_requeue(device);
+}
+
 static struct ccw_driver dasd_eckd_driver = {
        .driver = {
                .name   = "dasd-eckd",
@@ -5755,6 +5833,10 @@ static struct dasd_discipline dasd_eckd_discipline = {
        .check_attention = dasd_eckd_check_attention,
        .host_access_count = dasd_eckd_host_access_count,
        .hosts_print = dasd_hosts_print,
+       .handle_hpf_error = dasd_eckd_handle_hpf_error,
+       .disable_hpf = dasd_eckd_disable_hpf_device,
+       .hpf_enabled = dasd_eckd_hpf_enabled,
+       .reset_path = dasd_eckd_reset_path,
 };
 
 static int __init
index e491f4416e40627126b86e96421dffb74779d870..e2a710c250a567ff99bbb8ff7e8e22668fca2d84 100644 (file)
@@ -94,6 +94,8 @@
 #define FCX_MAX_DATA_FACTOR 65536
 #define DASD_ECKD_RCD_DATA_SIZE 256
 
+#define DASD_ECKD_PATH_THRHLD           256
+#define DASD_ECKD_PATH_INTERVAL                 300
 
 /*****************************************************************************
  * SECTION: Type Definitions
index d75f996884d92861b966ab3df0630c1079c5c569..24be210c10e5fa3b18ddffcf4be15f7cf265cc50 100644 (file)
@@ -378,6 +378,10 @@ struct dasd_discipline {
        int (*check_attention)(struct dasd_device *, __u8);
        int (*host_access_count)(struct dasd_device *);
        int (*hosts_print)(struct dasd_device *, struct seq_file *);
+       void (*handle_hpf_error)(struct dasd_device *, struct irb *);
+       void (*disable_hpf)(struct dasd_device *);
+       int (*hpf_enabled)(struct dasd_device *);
+       void (*reset_path)(struct dasd_device *, __u8);
 };
 
 extern struct dasd_discipline *dasd_diag_discipline_pointer;
@@ -407,11 +411,19 @@ extern struct dasd_discipline *dasd_diag_discipline_pointer;
 #define DASD_PATH_MISCABLED    5
 #define DASD_PATH_NOHPF        6
 #define DASD_PATH_CUIR        7
+#define DASD_PATH_IFCC        8
 
+#define DASD_THRHLD_MAX                4294967295U
+#define DASD_INTERVAL_MAX      4294967295U
 
 struct dasd_path {
        unsigned long flags;
+       u8 cssid;
+       u8 ssid;
+       u8 chpid;
        struct dasd_conf_data *conf_data;
+       atomic_t error_count;
+       unsigned long long errorclk;
 };
 
 
@@ -491,6 +503,7 @@ struct dasd_device {
        struct work_struct reload_device;
        struct work_struct kick_validate;
        struct work_struct suc_work;
+       struct work_struct requeue_requests;
        struct timer_list timer;
 
        debug_info_t *debug_area;
@@ -506,6 +519,9 @@ struct dasd_device {
 
        unsigned long blk_timeout;
 
+       unsigned long path_thrhld;
+       unsigned long path_interval;
+
        struct dentry *debugfs_dentry;
        struct dentry *hosts_dentry;
        struct dasd_profile profile;
@@ -715,6 +731,7 @@ void dasd_set_target_state(struct dasd_device *, int);
 void dasd_kick_device(struct dasd_device *);
 void dasd_restore_device(struct dasd_device *);
 void dasd_reload_device(struct dasd_device *);
+void dasd_schedule_requeue(struct dasd_device *);
 
 void dasd_add_request_head(struct dasd_ccw_req *);
 void dasd_add_request_tail(struct dasd_ccw_req *);
@@ -941,6 +958,21 @@ static inline void dasd_path_clear_cuir(struct dasd_device *device, int chp)
        __clear_bit(DASD_PATH_CUIR, &device->path[chp].flags);
 }
 
+static inline void dasd_path_ifcc(struct dasd_device *device, int chp)
+{
+       set_bit(DASD_PATH_IFCC, &device->path[chp].flags);
+}
+
+static inline int dasd_path_is_ifcc(struct dasd_device *device, int chp)
+{
+       return test_bit(DASD_PATH_IFCC, &device->path[chp].flags);
+}
+
+static inline void dasd_path_clear_ifcc(struct dasd_device *device, int chp)
+{
+       clear_bit(DASD_PATH_IFCC, &device->path[chp].flags);
+}
+
 static inline void dasd_path_clear_nohpf(struct dasd_device *device, int chp)
 {
        __clear_bit(DASD_PATH_NOHPF, &device->path[chp].flags);
@@ -1032,6 +1064,17 @@ static inline __u8 dasd_path_get_cuirpm(struct dasd_device *device)
        return cuirpm;
 }
 
+static inline __u8 dasd_path_get_ifccpm(struct dasd_device *device)
+{
+       int chp;
+       __u8 ifccpm = 0x00;
+
+       for (chp = 0; chp < 8; chp++)
+               if (dasd_path_is_ifcc(device, chp))
+                       ifccpm |= 0x80 >> chp;
+       return ifccpm;
+}
+
 static inline __u8 dasd_path_get_hpfpm(struct dasd_device *device)
 {
        int chp;
@@ -1056,6 +1099,20 @@ static inline void dasd_path_add_tbvpm(struct dasd_device *device, __u8 pm)
                        dasd_path_verify(device, chp);
 }
 
+static inline __u8 dasd_path_get_notoperpm(struct dasd_device *device)
+{
+       int chp;
+       __u8 nopm = 0x00;
+
+       for (chp = 0; chp < 8; chp++)
+               if (dasd_path_is_nohpf(device, chp) ||
+                   dasd_path_is_ifcc(device, chp) ||
+                   dasd_path_is_cuir(device, chp) ||
+                   dasd_path_is_miscabled(device, chp))
+                       nopm |= 0x80 >> chp;
+       return nopm;
+}
+
 static inline void dasd_path_add_opm(struct dasd_device *device, __u8 pm)
 {
        int chp;
@@ -1070,6 +1127,7 @@ static inline void dasd_path_add_opm(struct dasd_device *device, __u8 pm)
                        dasd_path_clear_nohpf(device, chp);
                        dasd_path_clear_cuir(device, chp);
                        dasd_path_clear_cable(device, chp);
+                       dasd_path_clear_ifcc(device, chp);
                }
 }
 
@@ -1091,6 +1149,15 @@ static inline void dasd_path_add_cuirpm(struct dasd_device *device, __u8 pm)
                        dasd_path_cuir(device, chp);
 }
 
+static inline void dasd_path_add_ifccpm(struct dasd_device *device, __u8 pm)
+{
+       int chp;
+
+       for (chp = 0; chp < 8; chp++)
+               if (pm & (0x80 >> chp))
+                       dasd_path_ifcc(device, chp);
+}
+
 static inline void dasd_path_add_nppm(struct dasd_device *device, __u8 pm)
 {
        int chp;
@@ -1148,6 +1215,7 @@ static inline void dasd_path_set_opm(struct dasd_device *device, __u8 pm)
                        dasd_path_clear_nohpf(device, chp);
                        dasd_path_clear_cuir(device, chp);
                        dasd_path_clear_cable(device, chp);
+                       dasd_path_clear_ifcc(device, chp);
                }
        }
 }