mm: convert i_mmap_mutex to rwsem

author Davidlohr Bueso <dave@stgolabs.net>

Sat, 13 Dec 2014 00:54:24 +0000 (16:54 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 13 Dec 2014 20:42:45 +0000 (12:42 -0800)
author Davidlohr Bueso <dave@stgolabs.net>
Sat, 13 Dec 2014 00:54:24 +0000 (16:54 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 13 Dec 2014 20:42:45 +0000 (12:42 -0800)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index a082709aa42748869fe08fafb43f319f081537fe..5eba47f593f8f888855c6b83428455f8b46af670 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -472,12 +472,12 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
  }
  
  /*
- * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never
+ * Hugetlbfs is not reclaimable; therefore its i_mmap_rwsem will never
   * be taken from reclaim -- unlike regular filesystems. This needs an
   * annotation because huge_pmd_share() does an allocation under
- * i_mmap_mutex.
+ * i_mmap_rwsem.
   */
-static struct lock_class_key hugetlbfs_i_mmap_mutex_key;
+static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
  
  static struct inode *hugetlbfs_get_inode(struct super_block *sb,
                                         struct inode *dir,
@@ -495,8 +495,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
                 struct hugetlbfs_inode_info *info;
                 inode->i_ino = get_next_ino();
                 inode_init_owner(inode, dir, mode);
-               lockdep_set_class(&inode->i_mapping->i_mmap_mutex,
-                               &hugetlbfs_i_mmap_mutex_key);
+               lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
+                               &hugetlbfs_i_mmap_rwsem_key);
                 inode->i_mapping->a_ops = &hugetlbfs_aops;
                 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/inode.c b/fs/inode.c

index 2ed95f7caa4f874ea97dbc7ebc698a4d0fc04674..ad60555b4768fa012dfeeba0661de6f7fc9addec 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -346,7 +346,7 @@ void address_space_init_once(struct address_space *mapping)
         memset(mapping, 0, sizeof(*mapping));
         INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
         spin_lock_init(&mapping->tree_lock);
-       mutex_init(&mapping->i_mmap_mutex);
+       init_rwsem(&mapping->i_mmap_rwsem);
         INIT_LIST_HEAD(&mapping->private_list);
         spin_lock_init(&mapping->private_lock);
         mapping->i_mmap = RB_ROOT;
diff --git a/include/linux/fs.h b/include/linux/fs.h

index bd0a1b2f3c0212a58440f82c4413bfbd76607302..6abcd0b72ae001583f9072ca2be010fafadc0cf3 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -18,6 +18,7 @@
  #include <linux/pid.h>
  #include <linux/bug.h>
  #include <linux/mutex.h>
+#include <linux/rwsem.h>
  #include <linux/capability.h>
  #include <linux/semaphore.h>
  #include <linux/fiemap.h>
@@ -401,7 +402,7 @@ struct address_space {
         atomic_t                i_mmap_writable;/* count VM_SHARED mappings */
         struct rb_root          i_mmap;         /* tree of private and shared mappings */
         struct list_head        i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
-       struct mutex            i_mmap_mutex;   /* protect tree, count, list */
+       struct rw_semaphore     i_mmap_rwsem;   /* protect tree, count, list */
         /* Protected by tree_lock together with the radix tree */
         unsigned long           nrpages;        /* number of total pages */
         unsigned long           nrshadows;      /* number of shadow entries */
@@ -469,12 +470,12 @@ int mapping_tagged(struct address_space *mapping, int tag);
  
  static inline void i_mmap_lock_write(struct address_space *mapping)
  {
-       mutex_lock(&mapping->i_mmap_mutex);
+       down_write(&mapping->i_mmap_rwsem);
  }
  
  static inline void i_mmap_unlock_write(struct address_space *mapping)
  {
-       mutex_unlock(&mapping->i_mmap_mutex);
+       up_write(&mapping->i_mmap_rwsem);
  }
  
  /*
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h

index 88787bb4b3b93fe08d322c2a54a183486427a2af..ab8564b03468ed7e493b17a89d14f9ab79d3f59e 100644 (file)
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -154,7 +154,7 @@ struct mmu_notifier_ops {
   * Therefore notifier chains can only be traversed when either
   *
   * 1. mmap_sem is held.
- * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem).
+ * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem).
   * 3. No other concurrent thread can access the list (release)
   */
  struct mmu_notifier {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c

index aac81bf9df096247ea763a738465c9ccc0ce743c..1901dbfa7ce0d25039ef3c9209926b8733732ed4 100644 (file)
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -731,7 +731,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
  
                 if (!prev && !more) {
                         /*
-                        * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through
+                        * Needs GFP_NOWAIT to avoid i_mmap_rwsem recursion through
                          * reclaim. This is optimistic, no harm done if it fails.
                          */
                         prev = kmalloc(sizeof(struct map_info),
diff --git a/mm/filemap.c b/mm/filemap.c

index 14b4642279f1260e6bbd6a6b73b0518ee377acd1..e8905bc3cbd7c546dd9c1e586b61abac9d38cc8f 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -62,16 +62,16 @@
  /*
   * Lock ordering:
   *
- *  ->i_mmap_mutex             (truncate_pagecache)
+ *  ->i_mmap_rwsem             (truncate_pagecache)
   *    ->private_lock           (__free_pte->__set_page_dirty_buffers)
   *      ->swap_lock            (exclusive_swap_page, others)
   *        ->mapping->tree_lock
   *
   *  ->i_mutex
- *    ->i_mmap_mutex           (truncate->unmap_mapping_range)
+ *    ->i_mmap_rwsem           (truncate->unmap_mapping_range)
   *
   *  ->mmap_sem
- *    ->i_mmap_mutex
+ *    ->i_mmap_rwsem
   *      ->page_table_lock or pte_lock  (various, mainly in memory.c)
   *        ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock)
   *
@@ -85,7 +85,7 @@
   *    sb_lock                  (fs/fs-writeback.c)
   *    ->mapping->tree_lock     (__sync_single_inode)
   *
- *  ->i_mmap_mutex
+ *  ->i_mmap_rwsem
   *    ->anon_vma.lock          (vma_adjust)
   *
   *  ->anon_vma.lock
@@ -105,7 +105,7 @@
   *    ->inode->i_lock          (zap_pte_range->set_page_dirty)
   *    ->private_lock           (zap_pte_range->__set_page_dirty_buffers)
   *
- * ->i_mmap_mutex
+ * ->i_mmap_rwsem
   *   ->tasklist_lock            (memory_failure, collect_procs_ao)
   */
  
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index ffe19304cc093728e54721b88f5e0c6dbba3dc3a..989cb032eaf512913b46c2ee2aa8e7014649aa7d 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2726,9 +2726,9 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
          * on its way out.  We're lucky that the flag has such an appropriate
          * name, and can in fact be safely cleared here. We could clear it
          * before the __unmap_hugepage_range above, but all that's necessary
-        * is to clear it before releasing the i_mmap_mutex. This works
+        * is to clear it before releasing the i_mmap_rwsem. This works
          * because in the context this is called, the VMA is about to be
-        * destroyed and the i_mmap_mutex is held.
+        * destroyed and the i_mmap_rwsem is held.
          */
         vma->vm_flags &= ~VM_MAYSHARE;
  }
@@ -3370,9 +3370,9 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                 spin_unlock(ptl);
         }
         /*
-        * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
+        * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
          * may have cleared our pud entry and done put_page on the page table:
-        * once we release i_mmap_mutex, another task can do the final put_page
+        * once we release i_mmap_rwsem, another task can do the final put_page
          * and that page table be reused and filled with junk.
          */
         flush_tlb_range(vma, start, end);
@@ -3525,7 +3525,7 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
   * and returns the corresponding pte. While this is not necessary for the
   * !shared pmd case because we can allocate the pmd later as well, it makes the
   * code much cleaner. pmd allocation is essential for the shared case because
- * pud has to be populated inside the same i_mmap_mutex section - otherwise
+ * pud has to be populated inside the same i_mmap_rwsem section - otherwise
   * racing tasks could either miss the sharing (see huge_pte_offset) or select a
   * bad pmd for sharing.
   */
diff --git a/mm/mmap.c b/mm/mmap.c

index ecd6ecf487780281dc9d74ce8056b0d295f94e67..0d84b2f86f3b8fb65f8bd9ff52dea247fe2f2781 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -232,7 +232,7 @@ error:
  }
  
  /*
- * Requires inode->i_mapping->i_mmap_mutex
+ * Requires inode->i_mapping->i_mmap_rwsem
   */
  static void __remove_shared_vm_struct(struct vm_area_struct *vma,
                 struct file *file, struct address_space *mapping)
@@ -2791,7 +2791,7 @@ void exit_mmap(struct mm_struct *mm)
  
  /* Insert vm structure into process list sorted by address
   * and into the inode's i_mmap tree.  If vm_file is non-NULL
- * then i_mmap_mutex is taken here.
+ * then i_mmap_rwsem is taken here.
   */
  int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
  {
@@ -3086,7 +3086,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
                  */
                 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
                         BUG();
-               mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
+               down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
         }
  }
  
@@ -3113,7 +3113,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
   * vma in this mm is backed by the same anon_vma or address_space.
   *
   * We can take all the locks in random order because the VM code
- * taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never
+ * taking i_mmap_rwsem or anon_vma->rwsem outside the mmap_sem never
   * takes more than one of them in a row. Secondly we're protected
   * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
   *
diff --git a/mm/mremap.c b/mm/mremap.c

index 426b448d64479facef691b9b872beff9e04e8fda..84aa36f9f30886358d048aa3240eae36b9e72d0e 100644 (file)
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -99,7 +99,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
         spinlock_t *old_ptl, *new_ptl;
  
         /*
-        * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma
+        * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
          * locks to ensure that rmap will always observe either the old or the
          * new ptes. This is the easiest way to avoid races with
          * truncate_pagecache(), page migration, etc...
diff --git a/mm/rmap.c b/mm/rmap.c

index bea03f6bec61a9283fc8ef1fbbda6223be574613..18247f89f1a884598971b4c6d3a9591eb11ee84c 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -23,7 +23,7 @@
   * inode->i_mutex      (while writing or truncating, not reading or faulting)
   *   mm->mmap_sem
   *     page->flags PG_locked (lock_page)
- *       mapping->i_mmap_mutex
+ *       mapping->i_mmap_rwsem
   *         anon_vma->rwsem
   *           mm->page_table_lock or pte_lock
   *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
@@ -1260,7 +1260,7 @@ out_mlock:
         /*
          * We need mmap_sem locking, Otherwise VM_LOCKED check makes
          * unstable result and race. Plus, We can't wait here because
-        * we now hold anon_vma->rwsem or mapping->i_mmap_mutex.
+        * we now hold anon_vma->rwsem or mapping->i_mmap_rwsem.
          * if trylock failed, the page remain in evictable lru and later
          * vmscan could retry to move the page to unevictable lru if the
          * page is actually mlocked.
@@ -1684,7 +1684,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
          * The page lock not only makes sure that page->mapping cannot
          * suddenly be NULLified by truncation, it makes sure that the
          * structure at mapping cannot be freed and reused yet,
-        * so we can safely take mapping->i_mmap_mutex.
+        * so we can safely take mapping->i_mmap_rwsem.
          */
         VM_BUG_ON_PAGE(!PageLocked(page), page);
author	Davidlohr Bueso <dave@stgolabs.net>
	Sat, 13 Dec 2014 00:54:24 +0000 (16:54 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 13 Dec 2014 20:42:45 +0000 (12:42 -0800)
fs/hugetlbfs/inode.c		patch \| blob \| blame \| history
fs/inode.c		patch \| blob \| blame \| history
include/linux/fs.h		patch \| blob \| blame \| history
include/linux/mmu_notifier.h		patch \| blob \| blame \| history
kernel/events/uprobes.c		patch \| blob \| blame \| history
mm/filemap.c		patch \| blob \| blame \| history
mm/hugetlb.c		patch \| blob \| blame \| history
mm/mmap.c		patch \| blob \| blame \| history
mm/mremap.c		patch \| blob \| blame \| history
mm/rmap.c		patch \| blob \| blame \| history