NFSv4.1: turn off pNFS on ds connection failure
authorAndy Adamson <andros@netapp.com>
Tue, 1 Mar 2011 01:34:22 +0000 (01:34 +0000)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 11 Mar 2011 20:38:43 +0000 (15:38 -0500)
If a data server is unavailable, go through MDS.

Mark the deviceid containing the data server as a negative cache entry.
Do not try to connect to any data server on a deviceid marked as a negative
cache entry. Mark any layout that tries to use the marked deviceid as failed.

Inodes with a layout marked as fails will not use the layout for I/O, and will
not perform any more layoutgets.
Inodes without a layout will still do layoutget, but the layout will get
marked immediately.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
fs/nfs/nfs4filelayout.c
fs/nfs/nfs4filelayout.h
fs/nfs/nfs4filelayoutdev.c
fs/nfs/pnfs.c

index a922e75af42eb4573d7f2acf5c10c7ed55994d1d..0040a5ee6208a742406daa181149c7d18999fbfa 100644 (file)
@@ -214,7 +214,9 @@ filelayout_read_pagelist(struct nfs_read_data *data)
        idx = nfs4_fl_calc_ds_index(lseg, j);
        ds = nfs4_fl_prepare_ds(lseg, idx);
        if (!ds) {
-               printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
+               /* Either layout fh index faulty, or ds connect failed */
+               set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+               set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
                return PNFS_NOT_ATTEMPTED;
        }
        dprintk("%s USE DS:ip %x %hu\n", __func__,
index 23f1e1e2a0f5bb9fb4e097686c4165e85f8e3023..ee0c907742b529f5bf1ea54cc8b70f9db87f5e2c 100644 (file)
@@ -55,10 +55,14 @@ struct nfs4_pnfs_ds {
        atomic_t                ds_count;
 };
 
+/* nfs4_file_layout_dsaddr flags */
+#define NFS4_DEVICE_ID_NEG_ENTRY       0x00000001
+
 struct nfs4_file_layout_dsaddr {
        struct hlist_node               node;
        struct nfs4_deviceid            deviceid;
        atomic_t                        ref;
+       unsigned long                   flags;
        u32                             stripe_count;
        u8                              *stripe_indices;
        u32                             ds_num;
index f594ca35a99657c2c29d4ed03dd975349a27a367..68143c162e3be30ef824027a1db99de69975acfc 100644 (file)
@@ -606,6 +606,21 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
        return flseg->fh_array[i];
 }
 
+static void
+filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
+                              int err, u32 ds_addr)
+{
+       u32 *p = (u32 *)&dsaddr->deviceid;
+
+       printk(KERN_ERR "NFS: data server %x connection error %d."
+               " Deviceid [%x%x%x%x] marked out of use.\n",
+               ds_addr, err, p[0], p[1], p[2], p[3]);
+
+       spin_lock(&filelayout_deviceid_lock);
+       dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
+       spin_unlock(&filelayout_deviceid_lock);
+}
+
 struct nfs4_pnfs_ds *
 nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 {
@@ -619,13 +634,18 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
        }
 
        if (!ds->ds_clp) {
+               struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
                int err;
 
-               err = nfs4_ds_connect(NFS_SERVER(lseg->pls_layout->plh_inode),
-                                         dsaddr->ds_list[ds_idx]);
+               if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
+                       /* Already tried to connect, don't try again */
+                       dprintk("%s Deviceid marked out of use\n", __func__);
+                       return NULL;
+               }
+               err = nfs4_ds_connect(s, ds);
                if (err) {
-                       printk(KERN_ERR "%s nfs4_ds_connect error %d\n",
-                              __func__, err);
+                       filelayout_mark_devid_negative(dsaddr, err,
+                                                      ntohl(ds->ds_ip_addr));
                        return NULL;
                }
        }
index 1f4c153441a10b019c883045aebc1b3dab7bc3ca..3e545144a0b205fc8cc1325abadf9622dad9e81e 100644 (file)
@@ -739,15 +739,16 @@ pnfs_update_layout(struct inode *ino,
                dprintk("%s matches recall, use MDS\n", __func__);
                goto out_unlock;
        }
-       /* Check to see if the layout for the given range already exists */
-       lseg = pnfs_find_lseg(lo, iomode);
-       if (lseg)
-               goto out_unlock;
 
        /* if LAYOUTGET already failed once we don't try again */
        if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
                goto out_unlock;
 
+       /* Check to see if the layout for the given range already exists */
+       lseg = pnfs_find_lseg(lo, iomode);
+       if (lseg)
+               goto out_unlock;
+
        if (pnfs_layoutgets_blocked(lo, NULL, 0))
                goto out_unlock;
        atomic_inc(&lo->plh_outstanding);