posix_acl: Inode acl caching fixes
authorAndreas Gruenbacher <agruenba@redhat.com>
Thu, 24 Mar 2016 13:38:37 +0000 (14:38 +0100)
committerAl Viro <viro@zeniv.linux.org.uk>
Thu, 31 Mar 2016 04:30:15 +0000 (00:30 -0400)
When get_acl() is called for an inode whose ACL is not cached yet, the
get_acl inode operation is called to fetch the ACL from the filesystem.
The inode operation is responsible for updating the cached acl with
set_cached_acl().  This is done without locking at the VFS level, so
another task can call set_cached_acl() or forget_cached_acl() before the
get_acl inode operation gets to calling set_cached_acl(), and then
get_acl's call to set_cached_acl() results in caching an outdate ACL.

Prevent this from happening by setting the cached ACL pointer to a
task-specific sentinel value before calling the get_acl inode operation.
Move the responsibility for updating the cached ACL from the get_acl
inode operations to get_acl().  There, only set the cached ACL if the
sentinel value hasn't changed.

The sentinel values are chosen to have odd values.  Likewise, the value
of ACL_NOT_CACHED is odd.  In contrast, ACL object pointers always have
an even value (ACLs are aligned in memory).  This allows to distinguish
uncached ACLs values from ACL objects.

In addition, switch from guarding inode->i_acl and inode->i_default_acl
upates by the inode->i_lock spinlock to using xchg() and cmpxchg().

Filesystems that do not want ACLs returned from their get_acl inode
operations to be cached must call forget_cached_acl() to prevent the VFS
from doing so.

(Patch written by Al Viro and Andreas Gruenbacher.)

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
17 files changed:
fs/9p/acl.c
fs/btrfs/acl.c
fs/ceph/acl.c
fs/ext2/acl.c
fs/ext4/acl.c
fs/f2fs/acl.c
fs/hfsplus/posix_acl.c
fs/inode.c
fs/jffs2/acl.c
fs/jfs/acl.c
fs/namei.c
fs/nfs/nfs3acl.c
fs/ocfs2/dlmglue.c
fs/posix_acl.c
fs/reiserfs/xattr_acl.c
fs/xfs/xfs_acl.c
include/linux/fs.h

index 9da967f383872dab4bc08ed149a230f5f8304f1a..2d94e94b6b591277753e0f7f8dd66663fa0db1ed 100644 (file)
@@ -93,7 +93,7 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
         * instantiating the inode (v9fs_inode_from_fid)
         */
        acl = get_cached_acl(inode, type);
-       BUG_ON(acl == ACL_NOT_CACHED);
+       BUG_ON(is_uncached_acl(acl));
        return acl;
 }
 
index 6d263bb1621cd92c51cc6ebd014f8ad06dcd719f..67a607709d4f7eb18d941cf1a70c7d00a7afba66 100644 (file)
@@ -63,9 +63,6 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
        }
        kfree(value);
 
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
-
        return acl;
 }
 
index f19708487e2f74c80e55b14ecbadb07bf79b73a9..5457f216e2e5bc4999eb4795cee994b941a57365 100644 (file)
@@ -37,6 +37,8 @@ static inline void ceph_set_cached_acl(struct inode *inode,
        spin_lock(&ci->i_ceph_lock);
        if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
                set_cached_acl(inode, type, acl);
+       else
+               forget_cached_acl(inode, type);
        spin_unlock(&ci->i_ceph_lock);
 }
 
index 27695e6f4e46629a8a888f38bcaf6f0e4ceaf1ec..42f1d1814083c568cff9b6daf181d96da1ac5420 100644 (file)
@@ -172,9 +172,6 @@ ext2_get_acl(struct inode *inode, int type)
                acl = ERR_PTR(retval);
        kfree(value);
 
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
-
        return acl;
 }
 
index 69b1e73026a51f6b8ae0e591d33abb0fdc9169d8..c6601a476c021f52235350666fc82f9424f87464 100644 (file)
@@ -172,9 +172,6 @@ ext4_get_acl(struct inode *inode, int type)
                acl = ERR_PTR(retval);
        kfree(value);
 
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
-
        return acl;
 }
 
index c8f25f7241f06a96c3d99ebf5c82e53a6099ed60..6f1fdda977b396839d7804fc4fcdb4a948895b53 100644 (file)
@@ -190,9 +190,6 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
                acl = ERR_PTR(retval);
        kfree(value);
 
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
-
        return acl;
 }
 
index afb33eda6d7dbae7ea5dab4d19b1edc401db151a..ab7ea2506b4defa4ff126fe19762e9d6157ef893 100644 (file)
@@ -48,9 +48,6 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
 
        hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
 
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
-
        return acl;
 }
 
index 69b8b526c1946c455c8b5ad0c8939542d9c2e82d..4202aac99464079e89609a492fb0c514a0af8a7c 100644 (file)
@@ -238,9 +238,9 @@ void __destroy_inode(struct inode *inode)
        }
 
 #ifdef CONFIG_FS_POSIX_ACL
-       if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
+       if (inode->i_acl && !is_uncached_acl(inode->i_acl))
                posix_acl_release(inode->i_acl);
-       if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
+       if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
                posix_acl_release(inode->i_default_acl);
 #endif
        this_cpu_dec(nr_inodes);
index 2f7a3c09048999f4365c3e5612ffdf18e9c65d54..bc2693d562987a6602b9403599f48d6c481a39be 100644 (file)
@@ -203,8 +203,6 @@ struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
                acl = ERR_PTR(rc);
        }
        kfree(value);
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
        return acl;
 }
 
index ab4882801b24393470f301a4b990f1399b0f87e2..21fa92ba2c191664fb2851da022f76a5e5d62388 100644 (file)
@@ -63,8 +63,6 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type)
                acl = posix_acl_from_xattr(&init_user_ns, value, size);
        }
        kfree(value);
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
        return acl;
 }
 
index 794f81dce76606ec025e3dcf1f182a175eaa0e82..3498d53de26fe4e33aa0620f4cd5fcfb3f208149 100644 (file)
@@ -265,7 +265,7 @@ static int check_acl(struct inode *inode, int mask)
                if (!acl)
                        return -EAGAIN;
                /* no ->get_acl() calls in RCU mode... */
-               if (acl == ACL_NOT_CACHED)
+               if (is_uncached_acl(acl))
                        return -ECHILD;
                return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
        }
index 17c0fa1eccfaad22710fe6e938dc4904cc5b8cdf..720d92f5abfb838875bb1a01630e101e4738581a 100644 (file)
 
 #define NFSDBG_FACILITY        NFSDBG_PROC
 
+/*
+ * nfs3_prepare_get_acl, nfs3_complete_get_acl, nfs3_abort_get_acl: Helpers for
+ * caching get_acl results in a race-free way.  See fs/posix_acl.c:get_acl()
+ * for explanations.
+ */
+static void nfs3_prepare_get_acl(struct posix_acl **p)
+{
+       struct posix_acl *sentinel = uncached_acl_sentinel(current);
+
+       if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED) {
+               /* Not the first reader or sentinel already in place. */
+       }
+}
+
+static void nfs3_complete_get_acl(struct posix_acl **p, struct posix_acl *acl)
+{
+       struct posix_acl *sentinel = uncached_acl_sentinel(current);
+
+       /* Only cache the ACL if our sentinel is still in place. */
+       posix_acl_dup(acl);
+       if (cmpxchg(p, sentinel, acl) != sentinel)
+               posix_acl_release(acl);
+}
+
+static void nfs3_abort_get_acl(struct posix_acl **p)
+{
+       struct posix_acl *sentinel = uncached_acl_sentinel(current);
+
+       /* Remove our sentinel upon failure. */
+       cmpxchg(p, sentinel, ACL_NOT_CACHED);
+}
+
 struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
 {
        struct nfs_server *server = NFS_SERVER(inode);
@@ -55,6 +87,11 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
        if (res.fattr == NULL)
                return ERR_PTR(-ENOMEM);
 
+       if (args.mask & NFS_ACL)
+               nfs3_prepare_get_acl(&inode->i_acl);
+       if (args.mask & NFS_DFACL)
+               nfs3_prepare_get_acl(&inode->i_default_acl);
+
        status = rpc_call_sync(server->client_acl, &msg, 0);
        dprintk("NFS reply getacl: %d\n", status);
 
@@ -89,12 +126,12 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
        }
 
        if (res.mask & NFS_ACL)
-               set_cached_acl(inode, ACL_TYPE_ACCESS, res.acl_access);
+               nfs3_complete_get_acl(&inode->i_acl, res.acl_access);
        else
                forget_cached_acl(inode, ACL_TYPE_ACCESS);
 
        if (res.mask & NFS_DFACL)
-               set_cached_acl(inode, ACL_TYPE_DEFAULT, res.acl_default);
+               nfs3_complete_get_acl(&inode->i_default_acl, res.acl_default);
        else
                forget_cached_acl(inode, ACL_TYPE_DEFAULT);
 
@@ -108,6 +145,8 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
        }
 
 getout:
+       nfs3_abort_get_acl(&inode->i_acl);
+       nfs3_abort_get_acl(&inode->i_default_acl);
        posix_acl_release(res.acl_access);
        posix_acl_release(res.acl_default);
        nfs_free_fattr(res.fattr);
index 474e57f834e6caff12eaca03c8ece4d35303d1e0..1eaa9100c8897a143d1409a471c4900546e94dc4 100644 (file)
@@ -54,6 +54,7 @@
 #include "uptodate.h"
 #include "quota.h"
 #include "refcounttree.h"
+#include "acl.h"
 
 #include "buffer_head_io.h"
 
@@ -3623,6 +3624,8 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
                filemap_fdatawait(mapping);
        }
 
+       forget_all_cached_acls(inode);
+
 out:
        return UNBLOCK_CONTINUE;
 }
index 711dd517037689ac03fa1b31085a72ca010ab314..bc6736d6078638f6cf940f68a6a2cf08d7f5814d 100644 (file)
@@ -37,14 +37,18 @@ EXPORT_SYMBOL(acl_by_type);
 struct posix_acl *get_cached_acl(struct inode *inode, int type)
 {
        struct posix_acl **p = acl_by_type(inode, type);
-       struct posix_acl *acl = ACCESS_ONCE(*p);
-       if (acl) {
-               spin_lock(&inode->i_lock);
-               acl = *p;
-               if (acl != ACL_NOT_CACHED)
-                       acl = posix_acl_dup(acl);
-               spin_unlock(&inode->i_lock);
+       struct posix_acl *acl;
+
+       for (;;) {
+               rcu_read_lock();
+               acl = rcu_dereference(*p);
+               if (!acl || is_uncached_acl(acl) ||
+                   atomic_inc_not_zero(&acl->a_refcount))
+                       break;
+               rcu_read_unlock();
+               cpu_relax();
        }
+       rcu_read_unlock();
        return acl;
 }
 EXPORT_SYMBOL(get_cached_acl);
@@ -59,58 +63,72 @@ void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
        struct posix_acl **p = acl_by_type(inode, type);
        struct posix_acl *old;
-       spin_lock(&inode->i_lock);
-       old = *p;
-       rcu_assign_pointer(*p, posix_acl_dup(acl));
-       spin_unlock(&inode->i_lock);
-       if (old != ACL_NOT_CACHED)
+
+       old = xchg(p, posix_acl_dup(acl));
+       if (!is_uncached_acl(old))
                posix_acl_release(old);
 }
 EXPORT_SYMBOL(set_cached_acl);
 
-void forget_cached_acl(struct inode *inode, int type)
+static void __forget_cached_acl(struct posix_acl **p)
 {
-       struct posix_acl **p = acl_by_type(inode, type);
        struct posix_acl *old;
-       spin_lock(&inode->i_lock);
-       old = *p;
-       *p = ACL_NOT_CACHED;
-       spin_unlock(&inode->i_lock);
-       if (old != ACL_NOT_CACHED)
+
+       old = xchg(p, ACL_NOT_CACHED);
+       if (!is_uncached_acl(old))
                posix_acl_release(old);
 }
+
+void forget_cached_acl(struct inode *inode, int type)
+{
+       __forget_cached_acl(acl_by_type(inode, type));
+}
 EXPORT_SYMBOL(forget_cached_acl);
 
 void forget_all_cached_acls(struct inode *inode)
 {
-       struct posix_acl *old_access, *old_default;
-       spin_lock(&inode->i_lock);
-       old_access = inode->i_acl;
-       old_default = inode->i_default_acl;
-       inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
-       spin_unlock(&inode->i_lock);
-       if (old_access != ACL_NOT_CACHED)
-               posix_acl_release(old_access);
-       if (old_default != ACL_NOT_CACHED)
-               posix_acl_release(old_default);
+       __forget_cached_acl(&inode->i_acl);
+       __forget_cached_acl(&inode->i_default_acl);
 }
 EXPORT_SYMBOL(forget_all_cached_acls);
 
 struct posix_acl *get_acl(struct inode *inode, int type)
 {
+       void *sentinel;
+       struct posix_acl **p;
        struct posix_acl *acl;
 
+       /*
+        * The sentinel is used to detect when another operation like
+        * set_cached_acl() or forget_cached_acl() races with get_acl().
+        * It is guaranteed that is_uncached_acl(sentinel) is true.
+        */
+
        acl = get_cached_acl(inode, type);
-       if (acl != ACL_NOT_CACHED)
+       if (!is_uncached_acl(acl))
                return acl;
 
        if (!IS_POSIXACL(inode))
                return NULL;
 
+       sentinel = uncached_acl_sentinel(current);
+       p = acl_by_type(inode, type);
+
        /*
-        * A filesystem can force a ACL callback by just never filling the
-        * ACL cache. But normally you'd fill the cache either at inode
-        * instantiation time, or on the first ->get_acl call.
+        * If the ACL isn't being read yet, set our sentinel.  Otherwise, the
+        * current value of the ACL will not be ACL_NOT_CACHED and so our own
+        * sentinel will not be set; another task will update the cache.  We
+        * could wait for that other task to complete its job, but it's easier
+        * to just call ->get_acl to fetch the ACL ourself.  (This is going to
+        * be an unlikely race.)
+        */
+       if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED)
+               /* fall through */ ;
+
+       /*
+        * Normally, the ACL returned by ->get_acl will be cached.
+        * A filesystem can prevent that by calling
+        * forget_cached_acl(inode, type) in ->get_acl.
         *
         * If the filesystem doesn't have a get_acl() function at all, we'll
         * just create the negative cache entry.
@@ -119,7 +137,24 @@ struct posix_acl *get_acl(struct inode *inode, int type)
                set_cached_acl(inode, type, NULL);
                return NULL;
        }
-       return inode->i_op->get_acl(inode, type);
+       acl = inode->i_op->get_acl(inode, type);
+
+       if (IS_ERR(acl)) {
+               /*
+                * Remove our sentinel so that we don't block future attempts
+                * to cache the ACL.
+                */
+               cmpxchg(p, sentinel, ACL_NOT_CACHED);
+               return acl;
+       }
+
+       /*
+        * Cache the result, but only if our sentinel is still in place.
+        */
+       posix_acl_dup(acl);
+       if (unlikely(cmpxchg(p, sentinel, acl) != sentinel))
+               posix_acl_release(acl);
+       return acl;
 }
 EXPORT_SYMBOL(get_acl);
 
index ec74bbedc8731db5534ff5094cb46a0933c5e3b7..dbed42f755e01ff46518efb35e46a06036b1b52d 100644 (file)
@@ -197,10 +197,8 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 
        size = reiserfs_xattr_get(inode, name, NULL, 0);
        if (size < 0) {
-               if (size == -ENODATA || size == -ENOSYS) {
-                       set_cached_acl(inode, type, NULL);
+               if (size == -ENODATA || size == -ENOSYS)
                        return NULL;
-               }
                return ERR_PTR(size);
        }
 
@@ -220,8 +218,6 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
        } else {
                acl = reiserfs_posix_acl_from_disk(value, retval);
        }
-       if (!IS_ERR(acl))
-               set_cached_acl(inode, type, acl);
 
        kfree(value);
        return acl;
index 2d5df1f23bbcbe47cebc087023d04126230cbe6c..b6e527b8eccb6d2917a9ef8a7938da902ed9b7c8 100644 (file)
@@ -158,22 +158,14 @@ xfs_get_acl(struct inode *inode, int type)
        if (error) {
                /*
                 * If the attribute doesn't exist make sure we have a negative
-                * cache entry, for any other error assume it is transient and
-                * leave the cache entry as ACL_NOT_CACHED.
+                * cache entry, for any other error assume it is transient.
                 */
-               if (error == -ENOATTR)
-                       goto out_update_cache;
-               acl = ERR_PTR(error);
-               goto out;
+               if (error != -ENOATTR)
+                       acl = ERR_PTR(error);
+       } else  {
+               acl = xfs_acl_from_disk(xfs_acl, len,
+                                       XFS_ACL_MAX_ENTRIES(ip->i_mount));
        }
-
-       acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount));
-       if (IS_ERR(acl))
-               goto out;
-
-out_update_cache:
-       set_cached_acl(inode, type, acl);
-out:
        kmem_free(xfs_acl);
        return acl;
 }
index 14a97194b34ba16ba7906ae8c27b1767f266f317..329ed372d7082f07046f440ac9ae16a86731a1b5 100644 (file)
@@ -577,6 +577,18 @@ static inline void mapping_allow_writable(struct address_space *mapping)
 struct posix_acl;
 #define ACL_NOT_CACHED ((void *)(-1))
 
+static inline struct posix_acl *
+uncached_acl_sentinel(struct task_struct *task)
+{
+       return (void *)task + 1;
+}
+
+static inline bool
+is_uncached_acl(struct posix_acl *acl)
+{
+       return (long)acl & 1;
+}
+
 #define IOP_FASTPERM   0x0001
 #define IOP_LOOKUP     0x0002
 #define IOP_NOFOLLOW   0x0004