fs: fs_struct use seqlock
authorNick Piggin <npiggin@kernel.dk>
Fri, 7 Jan 2011 06:49:53 +0000 (17:49 +1100)
committerNick Piggin <npiggin@kernel.dk>
Fri, 7 Jan 2011 06:50:27 +0000 (17:50 +1100)
Use a seqlock in the fs_struct to enable us to take an atomic copy of the
complete cwd and root paths. Use this in the RCU lookup path to avoid a
thread-shared spinlock in RCU lookup operations.

Multi-threaded apps may now perform path lookups with scalability matching
multi-process apps. Operations such as stat(2) become very scalable for
multi-threaded workload.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
fs/fs_struct.c
fs/namei.c
include/linux/fs_struct.h

index ed45a9cf5f3de46ae08e90d581a500bed9eb4dbf..60b8531f41c52c73be80f26931eb62feb6bc812f 100644 (file)
@@ -14,9 +14,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
        struct path old_root;
 
        spin_lock(&fs->lock);
+       write_seqcount_begin(&fs->seq);
        old_root = fs->root;
        fs->root = *path;
        path_get(path);
+       write_seqcount_end(&fs->seq);
        spin_unlock(&fs->lock);
        if (old_root.dentry)
                path_put(&old_root);
@@ -31,9 +33,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
        struct path old_pwd;
 
        spin_lock(&fs->lock);
+       write_seqcount_begin(&fs->seq);
        old_pwd = fs->pwd;
        fs->pwd = *path;
        path_get(path);
+       write_seqcount_end(&fs->seq);
        spin_unlock(&fs->lock);
 
        if (old_pwd.dentry)
@@ -52,6 +56,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
                fs = p->fs;
                if (fs) {
                        spin_lock(&fs->lock);
+                       write_seqcount_begin(&fs->seq);
                        if (fs->root.dentry == old_root->dentry
                            && fs->root.mnt == old_root->mnt) {
                                path_get(new_root);
@@ -64,6 +69,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
                                fs->pwd = *new_root;
                                count++;
                        }
+                       write_seqcount_end(&fs->seq);
                        spin_unlock(&fs->lock);
                }
                task_unlock(p);
@@ -88,8 +94,10 @@ void exit_fs(struct task_struct *tsk)
                int kill;
                task_lock(tsk);
                spin_lock(&fs->lock);
+               write_seqcount_begin(&fs->seq);
                tsk->fs = NULL;
                kill = !--fs->users;
+               write_seqcount_end(&fs->seq);
                spin_unlock(&fs->lock);
                task_unlock(tsk);
                if (kill)
@@ -105,6 +113,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
                fs->users = 1;
                fs->in_exec = 0;
                spin_lock_init(&fs->lock);
+               seqcount_init(&fs->seq);
                fs->umask = old->umask;
                get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
        }
@@ -144,6 +153,7 @@ EXPORT_SYMBOL(current_umask);
 struct fs_struct init_fs = {
        .users          = 1,
        .lock           = __SPIN_LOCK_UNLOCKED(init_fs.lock),
+       .seq            = SEQCNT_ZERO,
        .umask          = 0022,
 };
 
index 8d3f15b3a54167171bd0e2ec06103c60d5099f45..c731b50a618469cb70de76f98f8661ea15110ca3 100644 (file)
@@ -684,9 +684,12 @@ static __always_inline void set_root_rcu(struct nameidata *nd)
 {
        if (!nd->root.mnt) {
                struct fs_struct *fs = current->fs;
-               spin_lock(&fs->lock);
-               nd->root = fs->root;
-               spin_unlock(&fs->lock);
+               unsigned seq;
+
+               do {
+                       seq = read_seqcount_begin(&fs->seq);
+                       nd->root = fs->root;
+               } while (read_seqcount_retry(&fs->seq, seq));
        }
 }
 
@@ -1369,26 +1372,31 @@ static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct n
 
        if (*name=='/') {
                struct fs_struct *fs = current->fs;
+               unsigned seq;
 
                br_read_lock(vfsmount_lock);
                rcu_read_lock();
 
-               spin_lock(&fs->lock);
-               nd->root = fs->root;
-               nd->path = nd->root;
-               nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
-               spin_unlock(&fs->lock);
+               do {
+                       seq = read_seqcount_begin(&fs->seq);
+                       nd->root = fs->root;
+                       nd->path = nd->root;
+                       nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+               } while (read_seqcount_retry(&fs->seq, seq));
 
        } else if (dfd == AT_FDCWD) {
                struct fs_struct *fs = current->fs;
+               unsigned seq;
 
                br_read_lock(vfsmount_lock);
                rcu_read_lock();
 
-               spin_lock(&fs->lock);
-               nd->path = fs->pwd;
-               nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
-               spin_unlock(&fs->lock);
+               do {
+                       seq = read_seqcount_begin(&fs->seq);
+                       nd->path = fs->pwd;
+                       nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+               } while (read_seqcount_retry(&fs->seq, seq));
+
        } else {
                struct dentry *dentry;
 
@@ -1411,7 +1419,7 @@ static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct n
                if (fput_needed)
                        nd->file = file;
 
-               nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+               nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
                br_read_lock(vfsmount_lock);
                rcu_read_lock();
        }
index a42b5bf02f8bcfec3ea20ecef765df3f4d690250..003dc0fd73473a8e03cb23a163d7ae2ee547fa93 100644 (file)
@@ -2,10 +2,13 @@
 #define _LINUX_FS_STRUCT_H
 
 #include <linux/path.h>
+#include <linux/spinlock.h>
+#include <linux/seqlock.h>
 
 struct fs_struct {
        int users;
        spinlock_t lock;
+       seqcount_t seq;
        int umask;
        int in_exec;
        struct path root, pwd;