static inline void lock_rcu_walk(void)
{
- br_read_lock(&vfsmount_lock);
rcu_read_lock();
}
static inline void unlock_rcu_walk(void)
{
rcu_read_unlock();
- br_read_unlock(&vfsmount_lock);
}
/**
BUG_ON(!(nd->flags & LOOKUP_RCU));
/*
- * Get a reference to the parent first: we're
- * going to make "path_put(nd->path)" valid in
- * non-RCU context for "terminate_walk()".
- *
- * If this doesn't work, return immediately with
- * RCU walking still active (and then we will do
- * the RCU walk cleanup in terminate_walk()).
+ * After legitimizing the bastards, terminate_walk()
+ * will do the right thing for non-RCU mode, and all our
+ * subsequent exit cases should rcu_read_unlock()
+ * before returning. Do vfsmount first; if dentry
+ * can't be legitimized, just set nd->path.dentry to NULL
+ * and rely on dput(NULL) being a no-op.
*/
- if (!lockref_get_not_dead(&parent->d_lockref))
+ if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
return -ECHILD;
-
- /*
- * After the mntget(), we terminate_walk() will do
- * the right thing for non-RCU mode, and all our
- * subsequent exit cases should unlock_rcu_walk()
- * before returning.
- */
- mntget(nd->path.mnt);
nd->flags &= ~LOOKUP_RCU;
+ if (!lockref_get_not_dead(&parent->d_lockref)) {
+ nd->path.dentry = NULL;
+ unlock_rcu_walk();
+ return -ECHILD;
+ }
+
/*
* For a negative lookup, the lookup sequence point is the parents
* sequence point, and it only needs to revalidate the parent dentry.
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
+ if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
+ unlock_rcu_walk();
+ return -ECHILD;
+ }
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
unlock_rcu_walk();
+ mntput(nd->path.mnt);
return -ECHILD;
}
if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
unlock_rcu_walk();
dput(dentry);
+ mntput(nd->path.mnt);
return -ECHILD;
}
- mntget(nd->path.mnt);
unlock_rcu_walk();
}
struct mount *parent;
struct dentry *mountpoint;
- br_read_lock(&vfsmount_lock);
+ read_seqlock_excl(&mount_lock);
parent = mnt->mnt_parent;
if (parent == mnt) {
- br_read_unlock(&vfsmount_lock);
+ read_sequnlock_excl(&mount_lock);
return 0;
}
mntget(&parent->mnt);
mountpoint = dget(mnt->mnt_mountpoint);
- br_read_unlock(&vfsmount_lock);
+ read_sequnlock_excl(&mount_lock);
dput(path->dentry);
path->dentry = mountpoint;
mntput(path->mnt);
/* Something is mounted on this dentry in another
* namespace and/or whatever was mounted there in this
- * namespace got unmounted before we managed to get the
- * vfsmount_lock */
+ * namespace got unmounted before lookup_mnt() could
+ * get it */
}
/* Handle an automount point */
if (flags & LOOKUP_RCU) {
lock_rcu_walk();
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ nd->m_seq = read_seqbegin(&mount_lock);
} else {
path_get(&nd->path);
}
nd->root.mnt = NULL;
+ nd->m_seq = read_seqbegin(&mount_lock);
if (*name=='/') {
if (flags & LOOKUP_RCU) {
lock_rcu_walk();
* It should be taken for write in all cases where the vfsmount
* tree or hash is modified or when a vfsmount structure is modified.
*/
-DEFINE_BRLOCK(vfsmount_lock);
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
{
kmem_cache_free(mnt_cache, mnt);
}
+/* call under rcu_read_lock */
+bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
+{
+ struct mount *mnt;
+ if (read_seqretry(&mount_lock, seq))
+ return false;
+ if (bastard == NULL)
+ return true;
+ mnt = real_mount(bastard);
+ mnt_add_count(mnt, 1);
+ if (likely(!read_seqretry(&mount_lock, seq)))
+ return true;
+ if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
+ mnt_add_count(mnt, -1);
+ return false;
+ }
+ rcu_read_unlock();
+ mntput(bastard);
+ rcu_read_lock();
+ return false;
+}
+
/*
* find the first mount at @dentry on vfsmount @mnt.
- * vfsmount_lock must be held for read or write.
+ * call under rcu_read_lock()
*/
struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
{
struct list_head *head = mount_hashtable + hash(mnt, dentry);
struct mount *p;
- list_for_each_entry(p, head, mnt_hash)
+ list_for_each_entry_rcu(p, head, mnt_hash)
if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
return p;
return NULL;
/*
* find the last mount at @dentry on vfsmount @mnt.
- * vfsmount_lock must be held for read or write.
+ * mount_lock must be held.
*/
struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
{
struct vfsmount *lookup_mnt(struct path *path)
{
struct mount *child_mnt;
+ struct vfsmount *m;
+ unsigned seq;
- br_read_lock(&vfsmount_lock);
- child_mnt = __lookup_mnt(path->mnt, path->dentry);
- if (child_mnt) {
- mnt_add_count(child_mnt, 1);
- br_read_unlock(&vfsmount_lock);
- return &child_mnt->mnt;
- } else {
- br_read_unlock(&vfsmount_lock);
- return NULL;
- }
+ rcu_read_lock();
+ do {
+ seq = read_seqbegin(&mount_lock);
+ child_mnt = __lookup_mnt(path->mnt, path->dentry);
+ m = child_mnt ? &child_mnt->mnt : NULL;
+ } while (!legitimize_mnt(m, seq));
+ rcu_read_unlock();
+ return m;
}
static struct mountpoint *new_mountpoint(struct dentry *dentry)
return ERR_PTR(err);
}
+static void delayed_free(struct rcu_head *head)
+{
+ struct mount *mnt = container_of(head, struct mount, mnt_rcu);
+ kfree(mnt->mnt_devname);
+#ifdef CONFIG_SMP
+ free_percpu(mnt->mnt_pcp);
+#endif
+ kmem_cache_free(mnt_cache, mnt);
+}
+
static void mntput_no_expire(struct mount *mnt)
{
put_again:
-#ifdef CONFIG_SMP
- br_read_lock(&vfsmount_lock);
- if (likely(mnt->mnt_ns)) {
- /* shouldn't be the last one */
- mnt_add_count(mnt, -1);
- br_read_unlock(&vfsmount_lock);
+ rcu_read_lock();
+ mnt_add_count(mnt, -1);
+ if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+ rcu_read_unlock();
return;
}
- br_read_unlock(&vfsmount_lock);
-
lock_mount_hash();
- mnt_add_count(mnt, -1);
if (mnt_get_count(mnt)) {
+ rcu_read_unlock();
unlock_mount_hash();
return;
}
-#else
- mnt_add_count(mnt, -1);
- if (likely(mnt_get_count(mnt)))
- return;
- lock_mount_hash();
-#endif
if (unlikely(mnt->mnt_pinned)) {
mnt_add_count(mnt, mnt->mnt_pinned + 1);
mnt->mnt_pinned = 0;
+ rcu_read_unlock();
unlock_mount_hash();
acct_auto_close_mnt(&mnt->mnt);
goto put_again;
}
+ if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
+ rcu_read_unlock();
+ unlock_mount_hash();
+ return;
+ }
+ mnt->mnt.mnt_flags |= MNT_DOOMED;
+ rcu_read_unlock();
list_del(&mnt->mnt_instance);
unlock_mount_hash();
fsnotify_vfsmount_delete(&mnt->mnt);
dput(mnt->mnt.mnt_root);
deactivate_super(mnt->mnt.mnt_sb);
- free_vfsmnt(mnt);
+ mnt_free_id(mnt);
+ call_rcu(&mnt->mnt_rcu, delayed_free);
}
void mntput(struct vfsmount *mnt)
list_splice_init(&unmounted, &head);
up_write(&namespace_sem);
+ synchronize_rcu();
+
while (!list_empty(&head)) {
mnt = list_first_entry(&head, struct mount, mnt_hash);
list_del_init(&mnt->mnt_hash);
}
/*
- * vfsmount lock must be held for write
+ * mount_lock must be held
* namespace_sem must be held for write
+ * how = 0 => just this tree, don't propagate
+ * how = 1 => propagate; we know that nobody else has reference to any victims
+ * how = 2 => lazy umount
*/
-void umount_tree(struct mount *mnt, int propagate)
+void umount_tree(struct mount *mnt, int how)
{
LIST_HEAD(tmp_list);
struct mount *p;
for (p = mnt; p; p = next_mnt(p, mnt))
list_move(&p->mnt_hash, &tmp_list);
- if (propagate)
+ if (how)
propagate_umount(&tmp_list);
list_for_each_entry(p, &tmp_list, mnt_hash) {
list_del_init(&p->mnt_list);
__touch_mnt_namespace(p->mnt_ns);
p->mnt_ns = NULL;
+ if (how < 2)
+ p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
list_del_init(&p->mnt_child);
if (mnt_has_parent(p)) {
put_mountpoint(p->mnt_mp);
lock_mount_hash();
event++;
- if (!(flags & MNT_DETACH))
- shrink_submounts(mnt);
-
- retval = -EBUSY;
- if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
+ if (flags & MNT_DETACH) {
if (!list_empty(&mnt->mnt_list))
- umount_tree(mnt, 1);
+ umount_tree(mnt, 2);
retval = 0;
+ } else {
+ shrink_submounts(mnt);
+ retval = -EBUSY;
+ if (!propagate_mount_busy(mnt, 2)) {
+ if (!list_empty(&mnt->mnt_list))
+ umount_tree(mnt, 1);
+ retval = 0;
+ }
}
unlock_mount_hash();
namespace_unlock();
struct mount *parent;
int err;
- mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
+ mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
mp = lock_mount(path);
if (IS_ERR(mp))
* process a list of expirable mountpoints with the intent of discarding any
* submounts of a specific parent mountpoint
*
- * vfsmount_lock must be held for write
+ * mount_lock must be held for write
*/
static void shrink_submounts(struct mount *mnt)
{
/*
* Return true if path is reachable from root
*
- * namespace_sem or vfsmount_lock is held
+ * namespace_sem or mount_lock is held
*/
bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
const struct path *root)
int path_is_under(struct path *path1, struct path *path2)
{
int res;
- br_read_lock(&vfsmount_lock);
+ read_seqlock_excl(&mount_lock);
res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
- br_read_unlock(&vfsmount_lock);
+ read_sequnlock_excl(&mount_lock);
return res;
}
EXPORT_SYMBOL(path_is_under);
for (u = 0; u < HASH_SIZE; u++)
INIT_LIST_HEAD(&mountpoint_hashtable[u]);
- br_lock_init(&vfsmount_lock);
-
err = sysfs_init();
if (err)
printk(KERN_WARNING "%s: sysfs_init error: %d\n",
{
/* release long term mount so mount point can be released */
if (!IS_ERR_OR_NULL(mnt)) {
- lock_mount_hash();
real_mount(mnt)->mnt_ns = NULL;
- unlock_mount_hash();
+ synchronize_rcu(); /* yecchhh... */
mntput(mnt);
}
}