[CIFS] cifs: reconnect unresponsive servers
authorSteve French <sfrench@us.ibm.com>
Thu, 20 Jan 2011 18:06:34 +0000 (18:06 +0000)
committerSteve French <sfrench@us.ibm.com>
Thu, 20 Jan 2011 18:06:34 +0000 (18:06 +0000)
If the server isn't responding to echoes, we don't want to leave tasks
hung waiting for it to reply. At that point, we'll want to reconnect
so that soft mounts can return an error to userspace quickly.

If the client hasn't received a reply after a specified number of echo
intervals, assume that the transport is down and attempt to reconnect
the socket.

The number of echo_intervals to wait before attempting to reconnect is
tunable via a module parameter. Setting it to 0, means that the client
will never attempt to reconnect. The default is 5.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
fs/cifs/cifsfs.c
fs/cifs/cifsglob.h
fs/cifs/connect.c

index d9f652a522a6994097d7ec4008abacb0cfc0b65a..99d777a03dd07b634e6174f54be23602092a4f2b 100644 (file)
@@ -77,7 +77,11 @@ unsigned int cifs_max_pending = CIFS_MAX_REQ;
 module_param(cifs_max_pending, int, 0);
 MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. "
                                   "Default: 50 Range: 2 to 256");
-
+unsigned short echo_retries = 5;
+module_param(echo_retries, ushort, 0644);
+MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and "
+                              "reconnecting server. Default: 5. 0 means "
+                              "never reconnect.");
 extern mempool_t *cifs_sm_req_poolp;
 extern mempool_t *cifs_req_poolp;
 extern mempool_t *cifs_mid_poolp;
index 9c728dd5b146452b1c7b8a7400c44526a3e3d729..7040abc638fa8683f3661bbb6ea1979281548c86 100644 (file)
@@ -804,6 +804,9 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
 GLOBAL_EXTERN unsigned int cifs_min_small;  /* min size of small buf pool */
 GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
 
+/* reconnect after this many failed echo attempts */
+GLOBAL_EXTERN unsigned short echo_retries;
+
 void cifs_oplock_break(struct work_struct *work);
 void cifs_oplock_break_get(struct cifsFileInfo *cfile);
 void cifs_oplock_break_put(struct cifsFileInfo *cfile);
index f38ca084c9d2d2410be8a8e4ea423cb412e03ea1..f5d7b59a355316dc22de8affebff3c07d08f75ae 100644 (file)
@@ -186,6 +186,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
        kfree(server->session_key.response);
        server->session_key.response = NULL;
        server->session_key.len = 0;
+       server->lstrp = jiffies;
        mutex_unlock(&server->srv_mutex);
 
        /* mark submitted MIDs for retry and issue callback */
@@ -420,7 +421,20 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
                smb_msg.msg_control = NULL;
                smb_msg.msg_controllen = 0;
                pdu_length = 4; /* enough to get RFC1001 header */
+
 incomplete_rcv:
+               if (echo_retries > 0 &&
+                   time_after(jiffies, server->lstrp +
+                                       (echo_retries * SMB_ECHO_INTERVAL))) {
+                       cERROR(1, "Server %s has not responded in %d seconds. "
+                                 "Reconnecting...", server->hostname,
+                                 (echo_retries * SMB_ECHO_INTERVAL / HZ));
+                       cifs_reconnect(server);
+                       csocket = server->ssocket;
+                       wake_up(&server->response_q);
+                       continue;
+               }
+
                length =
                    kernel_recvmsg(csocket, &smb_msg,
                                &iov, 1, pdu_length, 0 /* BB other flags? */);
@@ -581,6 +595,8 @@ incomplete_rcv:
                }
 
                mid_entry = NULL;
+               server->lstrp = jiffies;
+
                spin_lock(&GlobalMid_Lock);
                list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
                        mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
@@ -629,10 +645,6 @@ multi_t2_fnd:
 #ifdef CONFIG_CIFS_STATS2
                                mid_entry->when_received = jiffies;
 #endif
-                               /* so we do not time out requests to  server
-                               which is still responding (since server could
-                               be busy but not dead) */
-                               server->lstrp = jiffies;
                                break;
                        }
                        mid_entry = NULL;
@@ -1685,6 +1697,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
                volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
        tcp_ses->session_estab = false;
        tcp_ses->sequence_number = 0;
+       tcp_ses->lstrp = jiffies;
        INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
        INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
        INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);