[SCSI] iscsi: fix command requeues during iscsi recovery
authorMike Christie <michaelc@cs.wisc.edu>
Fri, 19 May 2006 01:31:42 +0000 (20:31 -0500)
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>
Sat, 20 May 2006 14:36:17 +0000 (09:36 -0500)
Do not flush queues then block session. This will cause commands
to needlessly swing around on us and remove goofy
recovery_failed field and replace with state value.

And do not start recovery from within the host reset function.
This causeis too many problems becuase open-iscsi was desinged to
call out to userspace then have userpscae decide if we should
go into recovery or kill the session.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
drivers/scsi/libiscsi.c
include/scsi/libiscsi.h
include/scsi/scsi_transport_iscsi.h

index 580c0505603cd55c791e004ba947891f773c3097..d810acae45f70d4629bef38faab21fac8bce350f 100644 (file)
@@ -487,6 +487,11 @@ void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err)
        unsigned long flags;
 
        spin_lock_irqsave(&session->lock, flags);
+       if (session->state == ISCSI_STATE_FAILED) {
+               spin_unlock_irqrestore(&session->lock, flags);
+               return;
+       }
+
        if (session->conn_cnt == 1 || session->leadconn == conn)
                session->state = ISCSI_STATE_FAILED;
        spin_unlock_irqrestore(&session->lock, flags);
@@ -612,6 +617,7 @@ enum {
        FAILURE_SESSION_FREED,
        FAILURE_WINDOW_CLOSED,
        FAILURE_SESSION_TERMINATE,
+       FAILURE_SESSION_IN_RECOVERY,
        FAILURE_SESSION_RECOVERY_TIMEOUT,
 };
 
@@ -631,18 +637,30 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 
        spin_lock(&session->lock);
 
-       if (session->state != ISCSI_STATE_LOGGED_IN) {
-               if (session->recovery_failed) {
-                       reason = FAILURE_SESSION_RECOVERY_TIMEOUT;
-                       goto fault;
-               } else if (session->state == ISCSI_STATE_FAILED) {
-                       reason = FAILURE_SESSION_FAILED;
-                       goto reject;
-               } else if (session->state == ISCSI_STATE_TERMINATE) {
-                       reason = FAILURE_SESSION_TERMINATE;
+       /*
+        * ISCSI_STATE_FAILED is a temp. state. The recovery
+        * code will decide what is best to do with command queued
+        * during this time
+        */
+       if (session->state != ISCSI_STATE_LOGGED_IN &&
+           session->state != ISCSI_STATE_FAILED) {
+               /*
+                * to handle the race between when we set the recovery state
+                * and block the session we requeue here (commands could
+                * be entering our queuecommand while a block is starting
+                * up because the block code is not locked)
+                */
+               if (session->state == ISCSI_STATE_IN_RECOVERY) {
+                       reason = FAILURE_SESSION_IN_RECOVERY;
                        goto fault;
                }
-               reason = FAILURE_SESSION_FREED;
+
+               if (session->state == ISCSI_STATE_RECOVERY_FAILED)
+                       reason = FAILURE_SESSION_RECOVERY_TIMEOUT;
+               else if (session->state == ISCSI_STATE_TERMINATE)
+                       reason = FAILURE_SESSION_TERMINATE;
+               else
+                       reason = FAILURE_SESSION_FREED;
                goto fault;
        }
 
@@ -728,8 +746,8 @@ iscsi_conn_send_generic(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                 */
                mtask = conn->login_mtask;
        else {
-               BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
-               BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
+               BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
+               BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
                nop->exp_statsn = cpu_to_be32(conn->exp_statsn);
                if (!__kfifo_get(session->mgmtpool.queue,
@@ -803,7 +821,7 @@ void iscsi_session_recovery_timedout(struct iscsi_cls_session *cls_session)
 
        spin_lock_bh(&session->lock);
        if (session->state != ISCSI_STATE_LOGGED_IN) {
-               session->recovery_failed = 1;
+               session->state = ISCSI_STATE_RECOVERY_FAILED;
                if (conn)
                        wake_up(&conn->ehwait);
        }
@@ -838,20 +856,14 @@ failed:
         * we drop the lock here but the leadconn cannot be destoyed while
         * we are in the scsi eh
         */
-       if (fail_session) {
+       if (fail_session)
                iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-               /*
-                * if userspace cannot respond then we must kick this off
-                * here for it
-                */
-               iscsi_start_session_recovery(session, conn, STOP_CONN_RECOVER);
-       }
 
        debug_scsi("iscsi_eh_host_reset wait for relogin\n");
        wait_event_interruptible(conn->ehwait,
                                 session->state == ISCSI_STATE_TERMINATE ||
                                 session->state == ISCSI_STATE_LOGGED_IN ||
-                                session->recovery_failed);
+                                session->state == ISCSI_STATE_RECOVERY_FAILED);
        if (signal_pending(current))
                flush_signals(current);
 
@@ -940,8 +952,7 @@ static int iscsi_exec_abort_task(struct scsi_cmnd *sc,
        wait_event_interruptible(conn->ehwait,
                                 sc->SCp.phase != session->age ||
                                 session->state != ISCSI_STATE_LOGGED_IN ||
-                                conn->tmabort_state != TMABORT_INITIAL ||
-                                session->recovery_failed);
+                                conn->tmabort_state != TMABORT_INITIAL);
        if (signal_pending(current))
                flush_signals(current);
        del_timer_sync(&conn->tmabort_timer);
@@ -1491,7 +1502,6 @@ int iscsi_conn_start(struct iscsi_cls_conn *cls_conn)
                conn->stop_stage = 0;
                conn->tmabort_state = TMABORT_INITIAL;
                session->age++;
-               session->recovery_failed = 0;
                spin_unlock_bh(&session->lock);
 
                iscsi_unblock_session(session_to_cls(session));
@@ -1566,8 +1576,8 @@ static void fail_all_commands(struct iscsi_conn *conn)
        conn->ctask = NULL;
 }
 
-void iscsi_start_session_recovery(struct iscsi_session *session,
-                                 struct iscsi_conn *conn, int flag)
+static void iscsi_start_session_recovery(struct iscsi_session *session,
+                                        struct iscsi_conn *conn, int flag)
 {
        int old_stop_stage;
 
@@ -1597,19 +1607,10 @@ void iscsi_start_session_recovery(struct iscsi_session *session,
        set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
 
        if (session->conn_cnt == 0 || session->leadconn == conn)
-               session->state = ISCSI_STATE_FAILED;
+               session->state = ISCSI_STATE_IN_RECOVERY;
 
        spin_unlock_bh(&session->lock);
 
-       session->tt->terminate_conn(conn);
-       /*
-        * flush queues.
-        */
-       spin_lock_bh(&session->lock);
-       fail_all_commands(conn);
-       flush_control_queues(session, conn);
-       spin_unlock_bh(&session->lock);
-
        /*
         * for connection level recovery we should not calculate
         * header digest. conn->hdr_size used for optimization
@@ -1619,18 +1620,26 @@ void iscsi_start_session_recovery(struct iscsi_session *session,
        if (flag == STOP_CONN_RECOVER) {
                conn->hdrdgst_en = 0;
                conn->datadgst_en = 0;
-
                /*
                 * if this is called from the eh and and from userspace
                 * then we only need to block once.
                 */
-               if (session->state == ISCSI_STATE_FAILED &&
+               if (session->state == ISCSI_STATE_IN_RECOVERY &&
                    old_stop_stage != STOP_CONN_RECOVER)
                        iscsi_block_session(session_to_cls(session));
        }
+
+       session->tt->terminate_conn(conn);
+       /*
+        * flush queues.
+        */
+       spin_lock_bh(&session->lock);
+       fail_all_commands(conn);
+       flush_control_queues(session, conn);
+       spin_unlock_bh(&session->lock);
+
        mutex_unlock(&conn->xmitmutex);
 }
-EXPORT_SYMBOL_GPL(iscsi_start_session_recovery);
 
 void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
 {
index 2dba929a2a05fb850135ecc797610ded104cc1b9..17b28f08b692f833a164a2480eef0b5043318d19 100644 (file)
@@ -210,7 +210,6 @@ struct iscsi_session {
                                                 * - mgmtpool,             *
                                                 * - r2tpool               */
        int                     state;          /* session state           */
-       int                     recovery_failed;
        struct list_head        item;
        int                     conn_cnt;
        int                     age;            /* counts session re-opens */
@@ -241,8 +240,6 @@ iscsi_session_setup(struct iscsi_transport *, struct scsi_transport_template *,
                    int, int, uint32_t, uint32_t *);
 extern void iscsi_session_teardown(struct iscsi_cls_session *);
 extern struct iscsi_session *class_to_transport_session(struct iscsi_cls_session *);
-extern void iscsi_start_session_recovery(struct iscsi_session *,
-                                       struct iscsi_conn *, int);
 extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *);
 
 #define session_to_cls(_sess) \
index c9e9475c6dfff004b3ac811a3ec89915fde4363e..92129b97d31ef62e99b1e8dc9572c0b69e6f4bae 100644 (file)
@@ -173,6 +173,8 @@ struct iscsi_cls_conn {
 #define ISCSI_STATE_LOGGED_IN          2
 #define ISCSI_STATE_FAILED             3
 #define ISCSI_STATE_TERMINATE          4
+#define ISCSI_STATE_IN_RECOVERY                5
+#define ISCSI_STATE_RECOVERY_FAILED    6
 
 struct iscsi_cls_session {
        struct list_head sess_list;             /* item in session_list */