mnt->mnt_group_id = 0;
}
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_add_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+ this_cpu_add(mnt->mnt_pcp->mnt_count, n);
+#else
+ preempt_disable();
+ mnt->mnt_count += n;
+ preempt_enable();
+#endif
+}
+
+static inline void mnt_set_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+ this_cpu_write(mnt->mnt_pcp->mnt_count, n);
+#else
+ mnt->mnt_count = n;
+#endif
+}
+
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_inc_count(struct vfsmount *mnt)
+{
+ mnt_add_count(mnt, 1);
+}
+
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_dec_count(struct vfsmount *mnt)
+{
+ mnt_add_count(mnt, -1);
+}
+
+/*
+ * vfsmount lock must be held for write
+ */
+unsigned int mnt_get_count(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ unsigned int count = atomic_read(&mnt->mnt_longrefs);
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
+ }
+
+ return count;
+#else
+ return mnt->mnt_count;
+#endif
+}
+
struct vfsmount *alloc_vfsmnt(const char *name)
{
struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
goto out_free_id;
}
- atomic_set(&mnt->mnt_count, 1);
+#ifdef CONFIG_SMP
+ mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
+ if (!mnt->mnt_pcp)
+ goto out_free_devname;
+
+ atomic_set(&mnt->mnt_longrefs, 1);
+#else
+ mnt->mnt_count = 1;
+ mnt->mnt_writers = 0;
+#endif
+
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_mounts);
INIT_LIST_HEAD(&mnt->mnt_slave);
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
-#endif
-#ifdef CONFIG_SMP
- mnt->mnt_writers = alloc_percpu(int);
- if (!mnt->mnt_writers)
- goto out_free_devname;
-#else
- mnt->mnt_writers = 0;
#endif
}
return mnt;
static inline void mnt_inc_writers(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
- (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
+ this_cpu_inc(mnt->mnt_pcp->mnt_writers);
#else
mnt->mnt_writers++;
#endif
static inline void mnt_dec_writers(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
- (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
+ this_cpu_dec(mnt->mnt_pcp->mnt_writers);
#else
mnt->mnt_writers--;
#endif
int cpu;
for_each_possible_cpu(cpu) {
- count += *per_cpu_ptr(mnt->mnt_writers, cpu);
+ count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
}
return count;
kfree(mnt->mnt_devname);
mnt_free_id(mnt);
#ifdef CONFIG_SMP
- free_percpu(mnt->mnt_writers);
+ free_percpu(mnt->mnt_pcp);
#endif
kmem_cache_free(mnt_cache, mnt);
}
return NULL;
}
-static inline void __mntput(struct vfsmount *mnt)
+static inline void mntfree(struct vfsmount *mnt)
{
struct super_block *sb = mnt->mnt_sb;
+
/*
* This probably indicates that somebody messed
* up a mnt_want/drop_write() pair. If this
* to make r/w->r/o transitions.
*/
/*
- * atomic_dec_and_lock() used to deal with ->mnt_count decrements
- * provides barriers, so mnt_get_writers() below is safe. AV
+ * The locking used to deal with mnt_count decrement provides barriers,
+ * so mnt_get_writers() below is safe.
*/
WARN_ON(mnt_get_writers(mnt));
fsnotify_vfsmount_delete(mnt);
deactivate_super(sb);
}
-void mntput_no_expire(struct vfsmount *mnt)
-{
-repeat:
- if (atomic_add_unless(&mnt->mnt_count, -1, 1))
- return;
+#ifdef CONFIG_SMP
+static inline void __mntput(struct vfsmount *mnt, int longrefs)
+{
+ if (!longrefs) {
+put_again:
+ br_read_lock(vfsmount_lock);
+ if (likely(atomic_read(&mnt->mnt_longrefs))) {
+ mnt_dec_count(mnt);
+ br_read_unlock(vfsmount_lock);
+ return;
+ }
+ br_read_unlock(vfsmount_lock);
+ } else {
+ BUG_ON(!atomic_read(&mnt->mnt_longrefs));
+ if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
+ return;
+ }
+
br_write_lock(vfsmount_lock);
- if (!atomic_dec_and_test(&mnt->mnt_count)) {
+ if (!longrefs)
+ mnt_dec_count(mnt);
+ else
+ atomic_dec(&mnt->mnt_longrefs);
+ if (mnt_get_count(mnt)) {
br_write_unlock(vfsmount_lock);
return;
}
- if (likely(!mnt->mnt_pinned)) {
+ if (unlikely(mnt->mnt_pinned)) {
+ mnt_add_count(mnt, mnt->mnt_pinned + 1);
+ mnt->mnt_pinned = 0;
br_write_unlock(vfsmount_lock);
- __mntput(mnt);
+ acct_auto_close_mnt(mnt);
+ goto put_again;
+ }
+ br_write_unlock(vfsmount_lock);
+ mntfree(mnt);
+}
+#else
+static inline void __mntput(struct vfsmount *mnt, int longrefs)
+{
+put_again:
+ mnt_dec_count(mnt);
+ if (likely(mnt_get_count(mnt)))
return;
+ br_write_lock(vfsmount_lock);
+ if (unlikely(mnt->mnt_pinned)) {
+ mnt_add_count(mnt, mnt->mnt_pinned + 1);
+ mnt->mnt_pinned = 0;
+ br_write_unlock(vfsmount_lock);
+ acct_auto_close_mnt(mnt);
+ goto put_again;
}
- atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
- mnt->mnt_pinned = 0;
br_write_unlock(vfsmount_lock);
- acct_auto_close_mnt(mnt);
- goto repeat;
+ mntfree(mnt);
+}
+#endif
+
+static void mntput_no_expire(struct vfsmount *mnt)
+{
+ __mntput(mnt, 0);
+}
+
+void mntput(struct vfsmount *mnt)
+{
+ if (mnt) {
+ /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
+ if (unlikely(mnt->mnt_expiry_mark))
+ mnt->mnt_expiry_mark = 0;
+ __mntput(mnt, 0);
+ }
+}
+EXPORT_SYMBOL(mntput);
+
+struct vfsmount *mntget(struct vfsmount *mnt)
+{
+ if (mnt)
+ mnt_inc_count(mnt);
+ return mnt;
+}
+EXPORT_SYMBOL(mntget);
+
+void mntput_long(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ if (mnt) {
+ /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
+ if (unlikely(mnt->mnt_expiry_mark))
+ mnt->mnt_expiry_mark = 0;
+ __mntput(mnt, 1);
+ }
+#else
+ mntput(mnt);
+#endif
}
-EXPORT_SYMBOL(mntput_no_expire);
+EXPORT_SYMBOL(mntput_long);
+
+struct vfsmount *mntget_long(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ if (mnt)
+ atomic_inc(&mnt->mnt_longrefs);
+ return mnt;
+#else
+ return mntget(mnt);
+#endif
+}
+EXPORT_SYMBOL(mntget_long);
void mnt_pin(struct vfsmount *mnt)
{
mnt->mnt_pinned++;
br_write_unlock(vfsmount_lock);
}
-
EXPORT_SYMBOL(mnt_pin);
void mnt_unpin(struct vfsmount *mnt)
{
br_write_lock(vfsmount_lock);
if (mnt->mnt_pinned) {
- atomic_inc(&mnt->mnt_count);
+ mnt_inc_count(mnt);
mnt->mnt_pinned--;
}
br_write_unlock(vfsmount_lock);
}
-
EXPORT_SYMBOL(mnt_unpin);
static inline void mangle(struct seq_file *m, const char *s)
int minimum_refs = 0;
struct vfsmount *p;
- br_read_lock(vfsmount_lock);
+ /* write lock needed for mnt_get_count */
+ br_write_lock(vfsmount_lock);
for (p = mnt; p; p = next_mnt(p, mnt)) {
- actual_refs += atomic_read(&p->mnt_count);
+ actual_refs += mnt_get_count(p);
minimum_refs += 2;
}
- br_read_unlock(vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
if (actual_refs > minimum_refs)
return 0;
{
int ret = 1;
down_read(&namespace_sem);
- br_read_lock(vfsmount_lock);
+ br_write_lock(vfsmount_lock);
if (propagate_mount_busy(mnt, 2))
ret = 0;
- br_read_unlock(vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
up_read(&namespace_sem);
return ret;
}
dput(dentry);
mntput(m);
}
- mntput(mnt);
+ mntput_long(mnt);
}
}
flags & (MNT_FORCE | MNT_DETACH))
return -EINVAL;
- if (atomic_read(&mnt->mnt_count) != 2)
+ /*
+ * probably don't strictly need the lock here if we examined
+ * all race cases, but it's a slowpath.
+ */
+ br_write_lock(vfsmount_lock);
+ if (mnt_get_count(mnt) != 2) {
+ br_write_lock(vfsmount_lock);
return -EBUSY;
+ }
+ br_write_unlock(vfsmount_lock);
if (!xchg(&mnt->mnt_expiry_mark, 1))
return -EAGAIN;
unlock:
up_write(&namespace_sem);
- mntput(newmnt);
+ mntput_long(newmnt);
return err;
}
if (fs) {
if (p == fs->root.mnt) {
rootmnt = p;
- fs->root.mnt = mntget(q);
+ fs->root.mnt = mntget_long(q);
}
if (p == fs->pwd.mnt) {
pwdmnt = p;
- fs->pwd.mnt = mntget(q);
+ fs->pwd.mnt = mntget_long(q);
}
}
p = next_mnt(p, mnt_ns->root);
up_write(&namespace_sem);
if (rootmnt)
- mntput(rootmnt);
+ mntput_long(rootmnt);
if (pwdmnt)
- mntput(pwdmnt);
+ mntput_long(pwdmnt);
return new_ns;
}
touch_mnt_namespace(current->nsproxy->mnt_ns);
br_write_unlock(vfsmount_lock);
chroot_fs_refs(&root, &new);
+
error = 0;
path_put(&root_parent);
path_put(&parent_path);
mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
if (IS_ERR(mnt))
panic("Can't create rootfs");
+
ns = create_mnt_ns(mnt);
if (IS_ERR(ns))
panic("Can't allocate initial namespace");
#include <linux/list.h>
#include <linux/nodemask.h>
#include <linux/spinlock.h>
+#include <linux/seqlock.h>
#include <asm/atomic.h>
struct super_block;
#define MNT_INTERNAL 0x4000
+struct mnt_pcp {
+ int mnt_count;
+ int mnt_writers;
+};
+
struct vfsmount {
struct list_head mnt_hash;
struct vfsmount *mnt_parent; /* fs we are mounted on */
struct dentry *mnt_mountpoint; /* dentry of mountpoint */
struct dentry *mnt_root; /* root of the mounted tree */
struct super_block *mnt_sb; /* pointer to superblock */
+#ifdef CONFIG_SMP
+ struct mnt_pcp __percpu *mnt_pcp;
+ atomic_t mnt_longrefs;
+#else
+ int mnt_count;
+ int mnt_writers;
+#endif
struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; /* and going through their mnt_child */
int mnt_flags;
struct mnt_namespace *mnt_ns; /* containing namespace */
int mnt_id; /* mount identifier */
int mnt_group_id; /* peer group identifier */
- /*
- * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
- * to let these frequently modified fields in a separate cache line
- * (so that reads of mnt_flags wont ping-pong on SMP machines)
- */
- atomic_t mnt_count;
int mnt_expiry_mark; /* true if marked for expiry */
int mnt_pinned;
int mnt_ghosts;
-#ifdef CONFIG_SMP
- int __percpu *mnt_writers;
-#else
- int mnt_writers;
-#endif
};
-static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
- return mnt->mnt_writers;
-#else
- return &mnt->mnt_writers;
-#endif
-}
-
-static inline struct vfsmount *mntget(struct vfsmount *mnt)
-{
- if (mnt)
- atomic_inc(&mnt->mnt_count);
- return mnt;
-}
-
struct file; /* forward dec */
extern int mnt_want_write(struct vfsmount *mnt);
extern int mnt_want_write_file(struct file *file);
extern int mnt_clone_write(struct vfsmount *mnt);
extern void mnt_drop_write(struct vfsmount *mnt);
-extern void mntput_no_expire(struct vfsmount *mnt);
+extern void mntput(struct vfsmount *mnt);
+extern struct vfsmount *mntget(struct vfsmount *mnt);
+extern void mntput_long(struct vfsmount *mnt);
+extern struct vfsmount *mntget_long(struct vfsmount *mnt);
extern void mnt_pin(struct vfsmount *mnt);
extern void mnt_unpin(struct vfsmount *mnt);
extern int __mnt_is_readonly(struct vfsmount *mnt);
-static inline void mntput(struct vfsmount *mnt)
-{
- if (mnt) {
- mnt->mnt_expiry_mark = 0;
- mntput_no_expire(mnt);
- }
-}
-
extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
const char *name, void *data);