From eb6e199bef288611157b8198c25d12b32bf058d0 Mon Sep 17 00:00:00 2001 From: Stefan Weinhuber Date: Mon, 7 Dec 2009 12:51:51 +0100 Subject: [PATCH] [S390] dasd: improve error recovery for internal I/O Most of the error conditions reported by a FICON storage server indicate situations which can be recovered. Sometimes the host just needs to retry an I/O request, but sometimes the recovery is more complex and requires the device driver to wait, choose a different path, etc. The DASD device driver has a fully featured error recovery for normal block layer I/O, but not for internal I/O request which are for example used during the device bring up. This can lead to situations where the IPL of a system fails because DASD devices are not properly recognized. This patch will extend the internal I/O handling to use the existing error recovery procedures. Signed-off-by: Stefan Weinhuber Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 207 +++++++++++++++++++++-------- drivers/s390/block/dasd_3990_erp.c | 46 +++++-- drivers/s390/block/dasd_alias.c | 15 ++- drivers/s390/block/dasd_eckd.c | 72 +++++++--- drivers/s390/block/dasd_int.h | 3 + drivers/s390/block/dasd_ioctl.c | 4 +- 6 files changed, 253 insertions(+), 94 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 329115a4d4b..4f211c175b5 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -63,6 +63,7 @@ static void do_restore_device(struct work_struct *); static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *); static void dasd_device_timeout(unsigned long); static void dasd_block_timeout(unsigned long); +static void __dasd_process_erp(struct dasd_device *, struct dasd_ccw_req *); /* * SECTION: Operations on the device structure. @@ -959,7 +960,7 @@ static void dasd_device_timeout(unsigned long ptr) device = (struct dasd_device *) ptr; spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); /* re-activate request queue */ - device->stopped &= ~DASD_STOPPED_PENDING; + dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); dasd_schedule_device_bh(device); } @@ -1022,7 +1023,7 @@ void dasd_generic_handle_state_change(struct dasd_device *device) /* First of all start sense subsystem status request. */ dasd_eer_snss(device); - device->stopped &= ~DASD_STOPPED_PENDING; + dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING); dasd_schedule_device_bh(device); if (device->block) dasd_schedule_block_bh(device->block); @@ -1404,6 +1405,20 @@ void dasd_schedule_device_bh(struct dasd_device *device) tasklet_hi_schedule(&device->tasklet); } +void dasd_device_set_stop_bits(struct dasd_device *device, int bits) +{ + device->stopped |= bits; +} +EXPORT_SYMBOL_GPL(dasd_device_set_stop_bits); + +void dasd_device_remove_stop_bits(struct dasd_device *device, int bits) +{ + device->stopped &= ~bits; + if (!device->stopped) + wake_up(&generic_waitq); +} +EXPORT_SYMBOL_GPL(dasd_device_remove_stop_bits); + /* * Queue a request to the head of the device ccw_queue. * Start the I/O if possible. @@ -1464,58 +1479,135 @@ static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr) } /* - * Queue a request to the tail of the device ccw_queue and wait for - * it's completion. + * checks if error recovery is necessary, returns 1 if yes, 0 otherwise. */ -int dasd_sleep_on(struct dasd_ccw_req *cqr) +static int __dasd_sleep_on_erp(struct dasd_ccw_req *cqr) { struct dasd_device *device; - int rc; + dasd_erp_fn_t erp_fn; + if (cqr->status == DASD_CQR_FILLED) + return 0; device = cqr->startdev; + if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) { + if (cqr->status == DASD_CQR_TERMINATED) { + device->discipline->handle_terminated_request(cqr); + return 1; + } + if (cqr->status == DASD_CQR_NEED_ERP) { + erp_fn = device->discipline->erp_action(cqr); + erp_fn(cqr); + return 1; + } + if (cqr->status == DASD_CQR_FAILED) + dasd_log_sense(cqr, &cqr->irb); + if (cqr->refers) { + __dasd_process_erp(device, cqr); + return 1; + } + } + return 0; +} - cqr->callback = dasd_wakeup_cb; - cqr->callback_data = (void *) &generic_waitq; - dasd_add_request_tail(cqr); - wait_event(generic_waitq, _wait_for_wakeup(cqr)); +static int __dasd_sleep_on_loop_condition(struct dasd_ccw_req *cqr) +{ + if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) { + if (cqr->refers) /* erp is not done yet */ + return 1; + return ((cqr->status != DASD_CQR_DONE) && + (cqr->status != DASD_CQR_FAILED)); + } else + return (cqr->status == DASD_CQR_FILLED); +} - if (cqr->status == DASD_CQR_DONE) +static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) +{ + struct dasd_device *device; + int rc; + struct list_head ccw_queue; + struct dasd_ccw_req *cqr; + + INIT_LIST_HEAD(&ccw_queue); + maincqr->status = DASD_CQR_FILLED; + device = maincqr->startdev; + list_add(&maincqr->blocklist, &ccw_queue); + for (cqr = maincqr; __dasd_sleep_on_loop_condition(cqr); + cqr = list_first_entry(&ccw_queue, + struct dasd_ccw_req, blocklist)) { + + if (__dasd_sleep_on_erp(cqr)) + continue; + if (cqr->status != DASD_CQR_FILLED) /* could be failed */ + continue; + + /* Non-temporary stop condition will trigger fail fast */ + if (device->stopped & ~DASD_STOPPED_PENDING && + test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && + (!dasd_eer_enabled(device))) { + cqr->status = DASD_CQR_FAILED; + continue; + } + + /* Don't try to start requests if device is stopped */ + if (interruptible) { + rc = wait_event_interruptible( + generic_waitq, !(device->stopped)); + if (rc == -ERESTARTSYS) { + cqr->status = DASD_CQR_FAILED; + maincqr->intrc = rc; + continue; + } + } else + wait_event(generic_waitq, !(device->stopped)); + + cqr->callback = dasd_wakeup_cb; + cqr->callback_data = (void *) &generic_waitq; + dasd_add_request_tail(cqr); + if (interruptible) { + rc = wait_event_interruptible( + generic_waitq, _wait_for_wakeup(cqr)); + if (rc == -ERESTARTSYS) { + dasd_cancel_req(cqr); + /* wait (non-interruptible) for final status */ + wait_event(generic_waitq, + _wait_for_wakeup(cqr)); + cqr->status = DASD_CQR_FAILED; + maincqr->intrc = rc; + continue; + } + } else + wait_event(generic_waitq, _wait_for_wakeup(cqr)); + } + + maincqr->endclk = get_clock(); + if ((maincqr->status != DASD_CQR_DONE) && + (maincqr->intrc != -ERESTARTSYS)) + dasd_log_sense(maincqr, &maincqr->irb); + if (maincqr->status == DASD_CQR_DONE) rc = 0; - else if (cqr->intrc) - rc = cqr->intrc; + else if (maincqr->intrc) + rc = maincqr->intrc; else rc = -EIO; return rc; } +/* + * Queue a request to the tail of the device ccw_queue and wait for + * it's completion. + */ +int dasd_sleep_on(struct dasd_ccw_req *cqr) +{ + return _dasd_sleep_on(cqr, 0); +} + /* * Queue a request to the tail of the device ccw_queue and wait * interruptible for it's completion. */ int dasd_sleep_on_interruptible(struct dasd_ccw_req *cqr) { - struct dasd_device *device; - int rc; - - device = cqr->startdev; - cqr->callback = dasd_wakeup_cb; - cqr->callback_data = (void *) &generic_waitq; - dasd_add_request_tail(cqr); - rc = wait_event_interruptible(generic_waitq, _wait_for_wakeup(cqr)); - if (rc == -ERESTARTSYS) { - dasd_cancel_req(cqr); - /* wait (non-interruptible) for final status */ - wait_event(generic_waitq, _wait_for_wakeup(cqr)); - cqr->intrc = rc; - } - - if (cqr->status == DASD_CQR_DONE) - rc = 0; - else if (cqr->intrc) - rc = cqr->intrc; - else - rc = -EIO; - return rc; + return _dasd_sleep_on(cqr, 1); } /* @@ -1629,7 +1721,7 @@ static void dasd_block_timeout(unsigned long ptr) block = (struct dasd_block *) ptr; spin_lock_irqsave(get_ccwdev_lock(block->base->cdev), flags); /* re-activate request queue */ - block->base->stopped &= ~DASD_STOPPED_PENDING; + dasd_device_remove_stop_bits(block->base, DASD_STOPPED_PENDING); spin_unlock_irqrestore(get_ccwdev_lock(block->base->cdev), flags); dasd_schedule_block_bh(block); } @@ -1656,11 +1748,10 @@ void dasd_block_clear_timer(struct dasd_block *block) /* * Process finished error recovery ccw. */ -static inline void __dasd_block_process_erp(struct dasd_block *block, - struct dasd_ccw_req *cqr) +static void __dasd_process_erp(struct dasd_device *device, + struct dasd_ccw_req *cqr) { dasd_erp_fn_t erp_fn; - struct dasd_device *device = block->base; if (cqr->status == DASD_CQR_DONE) DBF_DEV_EVENT(DBF_NOTICE, device, "%s", "ERP successful"); @@ -1724,9 +1815,12 @@ static void __dasd_process_request_queue(struct dasd_block *block) */ if (!list_empty(&block->ccw_queue)) break; - spin_lock_irqsave(get_ccwdev_lock(basedev->cdev), flags); - basedev->stopped |= DASD_STOPPED_PENDING; - spin_unlock_irqrestore(get_ccwdev_lock(basedev->cdev), flags); + spin_lock_irqsave( + get_ccwdev_lock(basedev->cdev), flags); + dasd_device_set_stop_bits(basedev, + DASD_STOPPED_PENDING); + spin_unlock_irqrestore( + get_ccwdev_lock(basedev->cdev), flags); dasd_block_set_timer(block, HZ/2); break; } @@ -1812,7 +1906,7 @@ restart: cqr->status = DASD_CQR_FILLED; cqr->retries = 255; spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); - base->stopped |= DASD_STOPPED_QUIESCE; + dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE); spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); goto restart; @@ -1820,7 +1914,7 @@ restart: /* Process finished ERP request. */ if (cqr->refers) { - __dasd_block_process_erp(block, cqr); + __dasd_process_erp(base, cqr); goto restart; } @@ -1951,7 +2045,7 @@ restart_cb: /* Process finished ERP request. */ if (cqr->refers) { spin_lock_bh(&block->queue_lock); - __dasd_block_process_erp(block, cqr); + __dasd_process_erp(block->base, cqr); spin_unlock_bh(&block->queue_lock); /* restart list_for_xx loop since dasd_process_erp * might remove multiple elements */ @@ -2410,16 +2504,16 @@ int dasd_generic_notify(struct ccw_device *cdev, int event) cqr->status = DASD_CQR_QUEUED; cqr->retries++; } - device->stopped |= DASD_STOPPED_DC_WAIT; + dasd_device_set_stop_bits(device, DASD_STOPPED_DC_WAIT); dasd_device_clear_timer(device); dasd_schedule_device_bh(device); ret = 1; break; case CIO_OPER: /* FIXME: add a sanity check. */ - device->stopped &= ~DASD_STOPPED_DC_WAIT; + dasd_device_remove_stop_bits(device, DASD_STOPPED_DC_WAIT); if (device->stopped & DASD_UNRESUMED_PM) { - device->stopped &= ~DASD_UNRESUMED_PM; + dasd_device_remove_stop_bits(device, DASD_UNRESUMED_PM); dasd_restore_device(device); ret = 1; break; @@ -2444,7 +2538,7 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev) if (IS_ERR(device)) return PTR_ERR(device); /* disallow new I/O */ - device->stopped |= DASD_STOPPED_PM; + dasd_device_set_stop_bits(device, DASD_STOPPED_PM); /* clear active requests */ INIT_LIST_HEAD(&freeze_queue); spin_lock_irq(get_ccwdev_lock(cdev)); @@ -2496,14 +2590,18 @@ int dasd_generic_restore_device(struct ccw_device *cdev) return PTR_ERR(device); /* allow new IO again */ - device->stopped &= ~DASD_STOPPED_PM; - device->stopped &= ~DASD_UNRESUMED_PM; + dasd_device_remove_stop_bits(device, + (DASD_STOPPED_PM | DASD_UNRESUMED_PM)); dasd_schedule_device_bh(device); - if (device->discipline->restore) + /* + * call discipline restore function + * if device is stopped do nothing e.g. for disconnected devices + */ + if (device->discipline->restore && !(device->stopped)) rc = device->discipline->restore(device); - if (rc) + if (rc || device->stopped) /* * if the resume failed for the DASD we put it in * an UNRESUMED stop state @@ -2553,8 +2651,7 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, cqr->startdev = device; cqr->memdev = device; cqr->expires = 10*HZ; - clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); - cqr->retries = 2; + cqr->retries = 256; cqr->buildclk = get_clock(); cqr->status = DASD_CQR_FILLED; return cqr; diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index 316eb1256a9..44796ba4eb9 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -69,8 +69,7 @@ dasd_3990_erp_cleanup(struct dasd_ccw_req * erp, char final_status) * processing until the started timer has expired or an related * interrupt was received. */ -static void -dasd_3990_erp_block_queue(struct dasd_ccw_req * erp, int expires) +static void dasd_3990_erp_block_queue(struct dasd_ccw_req *erp, int expires) { struct dasd_device *device = erp->startdev; @@ -80,10 +79,13 @@ dasd_3990_erp_block_queue(struct dasd_ccw_req * erp, int expires) "blocking request queue for %is", expires/HZ); spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - device->stopped |= DASD_STOPPED_PENDING; + dasd_device_set_stop_bits(device, DASD_STOPPED_PENDING); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); erp->status = DASD_CQR_FILLED; - dasd_block_set_timer(device->block, expires); + if (erp->block) + dasd_block_set_timer(erp->block, expires); + else + dasd_device_set_timer(device, expires); } /* @@ -242,9 +244,13 @@ dasd_3990_erp_DCTL(struct dasd_ccw_req * erp, char modifier) * DESCRIPTION * Setup ERP to do the ERP action 1 (see Reference manual). * Repeat the operation on a different channel path. - * If all alternate paths have been tried, the request is posted with a - * permanent error. - * Note: duplex handling is not implemented (yet). + * As deviation from the recommended recovery action, we reset the path mask + * after we have tried each path and go through all paths a second time. + * This will cover situations where only one path at a time is actually down, + * but all paths fail and recover just with the same sequence and timing as + * we try to use them (flapping links). + * If all alternate paths have been tried twice, the request is posted with + * a permanent error. * * PARAMETER * erp pointer to the current ERP @@ -253,17 +259,25 @@ dasd_3990_erp_DCTL(struct dasd_ccw_req * erp, char modifier) * erp pointer to the ERP * */ -static struct dasd_ccw_req * -dasd_3990_erp_action_1(struct dasd_ccw_req * erp) +static struct dasd_ccw_req *dasd_3990_erp_action_1_sec(struct dasd_ccw_req *erp) { + erp->function = dasd_3990_erp_action_1_sec; + dasd_3990_erp_alternate_path(erp); + return erp; +} +static struct dasd_ccw_req *dasd_3990_erp_action_1(struct dasd_ccw_req *erp) +{ erp->function = dasd_3990_erp_action_1; - dasd_3990_erp_alternate_path(erp); - + if (erp->status == DASD_CQR_FAILED) { + erp->status = DASD_CQR_FILLED; + erp->retries = 10; + erp->lpm = LPM_ANYPATH; + erp->function = dasd_3990_erp_action_1_sec; + } return erp; - -} /* end dasd_3990_erp_action_1 */ +} /* end dasd_3990_erp_action_1(b) */ /* * DASD_3990_ERP_ACTION_4 @@ -2294,6 +2308,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr) return cqr; } + ccw = cqr->cpaddr; if (cqr->cpmode == 1) { /* make a shallow copy of the original tcw but set new tsb */ erp->cpmode = 1; @@ -2302,6 +2317,9 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr) tsb = (struct tsb *) &tcw[1]; *tcw = *((struct tcw *)cqr->cpaddr); tcw->tsb = (long)tsb; + } else if (ccw->cmd_code == DASD_ECKD_CCW_PSF) { + /* PSF cannot be chained from NOOP/TIC */ + erp->cpaddr = cqr->cpaddr; } else { /* initialize request with default TIC to current ERP/CQR */ ccw = erp->cpaddr; @@ -2486,6 +2504,8 @@ dasd_3990_erp_further_erp(struct dasd_ccw_req *erp) erp = dasd_3990_erp_action_1(erp); + } else if (erp->function == dasd_3990_erp_action_1_sec) { + erp = dasd_3990_erp_action_1_sec(erp); } else if (erp->function == dasd_3990_erp_action_5) { /* retries have not been successful */ diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 70a008c0052..cdc6c049c35 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -755,11 +755,11 @@ static void __stop_device_on_lcu(struct dasd_device *device, { /* If pos == device then device is already locked! */ if (pos == device) { - pos->stopped |= DASD_STOPPED_SU; + dasd_device_set_stop_bits(pos, DASD_STOPPED_SU); return; } spin_lock(get_ccwdev_lock(pos->cdev)); - pos->stopped |= DASD_STOPPED_SU; + dasd_device_set_stop_bits(pos, DASD_STOPPED_SU); spin_unlock(get_ccwdev_lock(pos->cdev)); } @@ -793,26 +793,26 @@ static void _unstop_all_devices_on_lcu(struct alias_lcu *lcu) list_for_each_entry(device, &lcu->active_devices, alias_list) { spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - device->stopped &= ~DASD_STOPPED_SU; + dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); } list_for_each_entry(device, &lcu->inactive_devices, alias_list) { spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - device->stopped &= ~DASD_STOPPED_SU; + dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); } list_for_each_entry(pavgroup, &lcu->grouplist, group) { list_for_each_entry(device, &pavgroup->baselist, alias_list) { spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - device->stopped &= ~DASD_STOPPED_SU; + dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); } list_for_each_entry(device, &pavgroup->aliaslist, alias_list) { spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - device->stopped &= ~DASD_STOPPED_SU; + dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); } @@ -836,7 +836,8 @@ static void summary_unit_check_handling_work(struct work_struct *work) /* 2. reset summary unit check */ spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - device->stopped &= ~(DASD_STOPPED_SU | DASD_STOPPED_PENDING); + dasd_device_remove_stop_bits(device, + (DASD_STOPPED_SU | DASD_STOPPED_PENDING)); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); reset_summary_unit_check(lcu, device, suc_data->reason); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index abb2ec836ee..39ffc84712f 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -77,6 +77,11 @@ MODULE_DEVICE_TABLE(ccw, dasd_eckd_ids); static struct ccw_driver dasd_eckd_driver; /* see below */ +#define INIT_CQR_OK 0 +#define INIT_CQR_UNFORMATTED 1 +#define INIT_CQR_ERROR 2 + + /* initial attempt at a probe function. this can be simplified once * the other detection code is gone */ static int @@ -749,8 +754,7 @@ static struct dasd_ccw_req *dasd_eckd_build_rcd_lpm(struct dasd_device *device, cqr->block = NULL; cqr->expires = 10*HZ; cqr->lpm = lpm; - clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); - cqr->retries = 2; + cqr->retries = 256; cqr->buildclk = get_clock(); cqr->status = DASD_CQR_FILLED; return cqr; @@ -949,8 +953,7 @@ static int dasd_eckd_read_features(struct dasd_device *device) cqr->startdev = device; cqr->memdev = device; cqr->block = NULL; - clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); - cqr->retries = 5; + cqr->retries = 256; cqr->expires = 10 * HZ; /* Prepare for Read Subsystem Data */ @@ -1025,6 +1028,7 @@ static struct dasd_ccw_req *dasd_eckd_build_psf_ssc(struct dasd_device *device, cqr->startdev = device; cqr->memdev = device; cqr->block = NULL; + cqr->retries = 256; cqr->expires = 10*HZ; cqr->buildclk = get_clock(); cqr->status = DASD_CQR_FILLED; @@ -1068,6 +1072,7 @@ static int dasd_eckd_validate_server(struct dasd_device *device) else enable_pav = 1; rc = dasd_eckd_psf_ssc(device, enable_pav); + /* may be requested feature is not available on server, * therefore just report error and go ahead */ private = (struct dasd_eckd_private *) device->private; @@ -1265,12 +1270,29 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) cqr->block = NULL; cqr->startdev = device; cqr->memdev = device; - cqr->retries = 0; + cqr->retries = 255; cqr->buildclk = get_clock(); cqr->status = DASD_CQR_FILLED; return cqr; } +/* differentiate between 'no record found' and any other error */ +static int dasd_eckd_analysis_evaluation(struct dasd_ccw_req *init_cqr) +{ + char *sense; + if (init_cqr->status == DASD_CQR_DONE) + return INIT_CQR_OK; + else if (init_cqr->status == DASD_CQR_NEED_ERP || + init_cqr->status == DASD_CQR_FAILED) { + sense = dasd_get_sense(&init_cqr->irb); + if (sense && (sense[1] & SNS1_NO_REC_FOUND)) + return INIT_CQR_UNFORMATTED; + else + return INIT_CQR_ERROR; + } else + return INIT_CQR_ERROR; +} + /* * This is the callback function for the init_analysis cqr. It saves * the status of the initial analysis ccw before it frees it and kicks @@ -1278,21 +1300,20 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) * dasd_eckd_do_analysis again (if the devices has not been marked * for deletion in the meantime). */ -static void -dasd_eckd_analysis_callback(struct dasd_ccw_req *init_cqr, void *data) +static void dasd_eckd_analysis_callback(struct dasd_ccw_req *init_cqr, + void *data) { struct dasd_eckd_private *private; struct dasd_device *device; device = init_cqr->startdev; private = (struct dasd_eckd_private *) device->private; - private->init_cqr_status = init_cqr->status; + private->init_cqr_status = dasd_eckd_analysis_evaluation(init_cqr); dasd_sfree_request(init_cqr, device); dasd_kick_device(device); } -static int -dasd_eckd_start_analysis(struct dasd_block *block) +static int dasd_eckd_start_analysis(struct dasd_block *block) { struct dasd_eckd_private *private; struct dasd_ccw_req *init_cqr; @@ -1304,27 +1325,44 @@ dasd_eckd_start_analysis(struct dasd_block *block) init_cqr->callback = dasd_eckd_analysis_callback; init_cqr->callback_data = NULL; init_cqr->expires = 5*HZ; + /* first try without ERP, so we can later handle unformatted + * devices as special case + */ + clear_bit(DASD_CQR_FLAGS_USE_ERP, &init_cqr->flags); + init_cqr->retries = 0; dasd_add_request_head(init_cqr); return -EAGAIN; } -static int -dasd_eckd_end_analysis(struct dasd_block *block) +static int dasd_eckd_end_analysis(struct dasd_block *block) { struct dasd_device *device; struct dasd_eckd_private *private; struct eckd_count *count_area; unsigned int sb, blk_per_trk; int status, i; + struct dasd_ccw_req *init_cqr; device = block->base; private = (struct dasd_eckd_private *) device->private; status = private->init_cqr_status; private->init_cqr_status = -1; - if (status != DASD_CQR_DONE) { - dev_warn(&device->cdev->dev, - "The DASD is not formatted\n"); + if (status == INIT_CQR_ERROR) { + /* try again, this time with full ERP */ + init_cqr = dasd_eckd_analysis_ccw(device); + dasd_sleep_on(init_cqr); + status = dasd_eckd_analysis_evaluation(init_cqr); + dasd_sfree_request(init_cqr, device); + } + + if (status == INIT_CQR_UNFORMATTED) { + dev_warn(&device->cdev->dev, "The DASD is not formatted\n"); return -EMEDIUMTYPE; + } else if (status == INIT_CQR_ERROR) { + dev_err(&device->cdev->dev, + "Detecting the DASD disk layout failed because " + "of an I/O error\n"); + return -EIO; } private->uses_cdl = 1; @@ -1616,8 +1654,7 @@ dasd_eckd_format_device(struct dasd_device * device, } fcp->startdev = device; fcp->memdev = device; - clear_bit(DASD_CQR_FLAGS_USE_ERP, &fcp->flags); - fcp->retries = 5; /* set retry counter to enable default ERP */ + fcp->retries = 256; fcp->buildclk = get_clock(); fcp->status = DASD_CQR_FILLED; return fcp; @@ -2699,6 +2736,7 @@ dasd_eckd_performance(struct dasd_device *device, void __user *argp) cqr->startdev = device; cqr->memdev = device; cqr->retries = 0; + clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); cqr->expires = 10 * HZ; /* Prepare for Read Subsystem Data */ diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 8afd9fa0087..8c2ea045188 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -595,6 +595,9 @@ int dasd_generic_restore_device(struct ccw_device *); int dasd_generic_read_dev_chars(struct dasd_device *, int, void *, int); char *dasd_get_sense(struct irb *); +void dasd_device_set_stop_bits(struct dasd_device *, int); +void dasd_device_remove_stop_bits(struct dasd_device *, int); + /* externals in dasd_devmap.c */ extern int dasd_max_devindex; extern int dasd_probeonly; diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index f756a1b0c57..478bcdb90b6 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -101,7 +101,7 @@ static int dasd_ioctl_quiesce(struct dasd_block *block) pr_info("%s: The DASD has been put in the quiesce " "state\n", dev_name(&base->cdev->dev)); spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); - base->stopped |= DASD_STOPPED_QUIESCE; + dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE); spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); return 0; } @@ -122,7 +122,7 @@ static int dasd_ioctl_resume(struct dasd_block *block) pr_info("%s: I/O operations have been resumed " "on the DASD\n", dev_name(&base->cdev->dev)); spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); - base->stopped &= ~DASD_STOPPED_QUIESCE; + dasd_device_remove_stop_bits(base, DASD_STOPPED_QUIESCE); spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); dasd_schedule_block_bh(block); -- 2.20.1