sunrpc: fix sleeping under rcu_read_lock in gss_stringify_acceptor
authorJeff Layton <jlayton@primarydata.com>
Thu, 13 Nov 2014 12:30:46 +0000 (07:30 -0500)
committerTrond Myklebust <trond.myklebust@primarydata.com>
Thu, 13 Nov 2014 18:15:49 +0000 (13:15 -0500)
Bruce reported that he was seeing the following BUG pop:

    BUG: sleeping function called from invalid context at mm/slab.c:2846
    in_atomic(): 0, irqs_disabled(): 0, pid: 4539, name: mount.nfs
    2 locks held by mount.nfs/4539:
    #0:  (nfs_clid_init_mutex){+.+.+.}, at: [<ffffffffa01c0a9a>] nfs4_discover_server_trunking+0x4a/0x2f0 [nfsv4]
    #1:  (rcu_read_lock){......}, at: [<ffffffffa00e3185>] gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss]
    Preemption disabled at:[<ffffffff81a4f082>] printk+0x4d/0x4f

    CPU: 3 PID: 4539 Comm: mount.nfs Not tainted 3.18.0-rc1-00013-g5b095e9 #3393
    Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
    ffff880021499390 ffff8800381476a8 ffffffff81a534cf 0000000000000001
    0000000000000000 ffff8800381476c8 ffffffff81097854 00000000000000d0
    0000000000000018 ffff880038147718 ffffffff8118e4f3 0000000020479f00
    Call Trace:
    [<ffffffff81a534cf>] dump_stack+0x4f/0x7c
    [<ffffffff81097854>] __might_sleep+0x114/0x180
    [<ffffffff8118e4f3>] __kmalloc+0x1a3/0x280
    [<ffffffffa00e31d8>] gss_stringify_acceptor+0x58/0xb0 [auth_rpcgss]
    [<ffffffffa00e3185>] ? gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss]
    [<ffffffffa006b438>] rpcauth_stringify_acceptor+0x18/0x30 [sunrpc]
    [<ffffffffa01b0469>] nfs4_proc_setclientid+0x199/0x380 [nfsv4]
    [<ffffffffa01b04d0>] ? nfs4_proc_setclientid+0x200/0x380 [nfsv4]
    [<ffffffffa01bdf1a>] nfs40_discover_server_trunking+0xda/0x150 [nfsv4]
    [<ffffffffa01bde45>] ? nfs40_discover_server_trunking+0x5/0x150 [nfsv4]
    [<ffffffffa01c0acf>] nfs4_discover_server_trunking+0x7f/0x2f0 [nfsv4]
    [<ffffffffa01c8e24>] nfs4_init_client+0x104/0x2f0 [nfsv4]
    [<ffffffffa01539b4>] nfs_get_client+0x314/0x3f0 [nfs]
    [<ffffffffa0153780>] ? nfs_get_client+0xe0/0x3f0 [nfs]
    [<ffffffffa01c83aa>] nfs4_set_client+0x8a/0x110 [nfsv4]
    [<ffffffffa0069708>] ? __rpc_init_priority_wait_queue+0xa8/0xf0 [sunrpc]
    [<ffffffffa01c9b2f>] nfs4_create_server+0x12f/0x390 [nfsv4]
    [<ffffffffa01c1472>] nfs4_remote_mount+0x32/0x60 [nfsv4]
    [<ffffffff81196489>] mount_fs+0x39/0x1b0
    [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20
    [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150
    [<ffffffffa01c1396>] nfs_do_root_mount+0x86/0xc0 [nfsv4]
    [<ffffffffa01c1784>] nfs4_try_mount+0x44/0xc0 [nfsv4]
    [<ffffffffa01549b7>] ? get_nfs_version+0x27/0x90 [nfs]
    [<ffffffffa0161a2d>] nfs_fs_mount+0x47d/0xd60 [nfs]
    [<ffffffff81a59c5e>] ? mutex_unlock+0xe/0x10
    [<ffffffffa01606a0>] ? nfs_remount+0x430/0x430 [nfs]
    [<ffffffffa01609c0>] ? nfs_clone_super+0x140/0x140 [nfs]
    [<ffffffff81196489>] mount_fs+0x39/0x1b0
    [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20
    [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150
    [<ffffffff811b5830>] do_mount+0x210/0xbe0
    [<ffffffff811b54ca>] ? copy_mount_options+0x3a/0x160
    [<ffffffff811b651f>] SyS_mount+0x6f/0xb0
    [<ffffffff81a5c852>] system_call_fastpath+0x12/0x17

Sleeping under the rcu_read_lock is bad. This patch fixes it by dropping
the rcu_read_lock before doing the allocation and then reacquiring it
and redoing the dereference before doing the copy. If we find that the
string has somehow grown in the meantime, we'll reallocate and try again.

Cc: <stable@vger.kernel.org> # v3.17+
Reported-by: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
net/sunrpc/auth_gss/auth_gss.c

index afb292cd797decf08561492925d87d34d09e485b..53ed8d3f88970a877b2799e6ecb2dc9de6893304 100644 (file)
@@ -1353,6 +1353,7 @@ gss_stringify_acceptor(struct rpc_cred *cred)
        char *string = NULL;
        struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
        struct gss_cl_ctx *ctx;
+       unsigned int len;
        struct xdr_netobj *acceptor;
 
        rcu_read_lock();
@@ -1360,15 +1361,39 @@ gss_stringify_acceptor(struct rpc_cred *cred)
        if (!ctx)
                goto out;
 
-       acceptor = &ctx->gc_acceptor;
+       len = ctx->gc_acceptor.len;
+       rcu_read_unlock();
 
        /* no point if there's no string */
-       if (!acceptor->len)
-               goto out;
-
-       string = kmalloc(acceptor->len + 1, GFP_KERNEL);
+       if (!len)
+               return NULL;
+realloc:
+       string = kmalloc(len + 1, GFP_KERNEL);
        if (!string)
+               return NULL;
+
+       rcu_read_lock();
+       ctx = rcu_dereference(gss_cred->gc_ctx);
+
+       /* did the ctx disappear or was it replaced by one with no acceptor? */
+       if (!ctx || !ctx->gc_acceptor.len) {
+               kfree(string);
+               string = NULL;
                goto out;
+       }
+
+       acceptor = &ctx->gc_acceptor;
+
+       /*
+        * Did we find a new acceptor that's longer than the original? Allocate
+        * a longer buffer and try again.
+        */
+       if (len < acceptor->len) {
+               len = acceptor->len;
+               rcu_read_unlock();
+               kfree(string);
+               goto realloc;
+       }
 
        memcpy(string, acceptor->data, acceptor->len);
        string[acceptor->len] = '\0';