Merge tag 'v3.10.56' into update

[GitHub/mt8127/android_kernel_alcatel_ttab.git] / mm / shmem.c
diff --git a/mm/shmem.c b/mm/shmem.c

index 5e6a8422658b832921196ca4908e7cc6148eb84a..e490ad476f01381e6b210a9130f96a62ec7409bd 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -80,11 +80,12 @@ static struct vfsmount *shm_mnt;
  #define SHORT_SYMLINK_LEN 128
  
  /*
- * shmem_fallocate and shmem_writepage communicate via inode->i_private
- * (with i_mutex making sure that it has only one user at a time):
- * we would prefer not to enlarge the shmem inode just for that.
+ * shmem_fallocate communicates with shmem_fault or shmem_writepage via
+ * inode->i_private (with i_mutex making sure that it has only one user at
+ * a time): we would prefer not to enlarge the shmem inode just for that.
   */
  struct shmem_falloc {
+       wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
         pgoff_t start;          /* start of range currently being fallocated */
         pgoff_t next;           /* the next page offset to be fallocated */
         pgoff_t nr_falloced;    /* how many new pages have been fallocated */
@@ -533,22 +534,19 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                 return;
  
         index = start;
-       for ( ; ; ) {
+       while (index < end) {
                 cond_resched();
                 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
                                 min(end - index, (pgoff_t)PAGEVEC_SIZE),
                                                         pvec.pages, indices);
                 if (!pvec.nr) {
-                       if (index == start || unfalloc)
+                       /* If all gone or hole-punch or unfalloc, we're done */
+                       if (index == start || end != -1)
                                 break;
+                       /* But if truncating, restart to make sure all gone */
                         index = start;
                         continue;
                 }
-               if ((index == start || unfalloc) && indices[0] >= end) {
-                       shmem_deswap_pagevec(&pvec);
-                       pagevec_release(&pvec);
-                       break;
-               }
                 mem_cgroup_uncharge_start();
                 for (i = 0; i < pagevec_count(&pvec); i++) {
                         struct page *page = pvec.pages[i];
@@ -560,8 +558,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                         if (radix_tree_exceptional_entry(page)) {
                                 if (unfalloc)
                                         continue;
-                               nr_swaps_freed += !shmem_free_swap(mapping,
-                                                               index, page);
+                               if (shmem_free_swap(mapping, index, page)) {
+                                       /* Swap was replaced by page: retry */
+                                       index--;
+                                       break;
+                               }
+                               nr_swaps_freed++;
                                 continue;
                         }
  
@@ -570,6 +572,11 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                                 if (page->mapping == mapping) {
                                         VM_BUG_ON(PageWriteback(page));
                                         truncate_inode_page(mapping, page);
+                               } else {
+                                       /* Page was replaced by swap: retry */
+                                       unlock_page(page);
+                                       index--;
+                                       break;
                                 }
                         }
                         unlock_page(page);
@@ -826,6 +833,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
                         spin_lock(&inode->i_lock);
                         shmem_falloc = inode->i_private;
                         if (shmem_falloc &&
+                           !shmem_falloc->waitq &&
                             index >= shmem_falloc->start &&
                             index < shmem_falloc->next)
                                 shmem_falloc->nr_unswapped++;
@@ -840,7 +848,11 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
                 SetPageUptodate(page);
         }
  
+#ifndef CONFIG_MEMCG
         swap = get_swap_page();
+#else
+       swap = get_swap_page_by_memcg(page);
+#endif
         if (!swap.val)
                 goto redirty;
  
@@ -1300,6 +1312,64 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
         int error;
         int ret = VM_FAULT_LOCKED;
  
+       /*
+        * Trinity finds that probing a hole which tmpfs is punching can
+        * prevent the hole-punch from ever completing: which in turn
+        * locks writers out with its hold on i_mutex.  So refrain from
+        * faulting pages into the hole while it's being punched.  Although
+        * shmem_undo_range() does remove the additions, it may be unable to
+        * keep up, as each new page needs its own unmap_mapping_range() call,
+        * and the i_mmap tree grows ever slower to scan if new vmas are added.
+        *
+        * It does not matter if we sometimes reach this check just before the
+        * hole-punch begins, so that one fault then races with the punch:
+        * we just need to make racing faults a rare case.
+        *
+        * The implementation below would be much simpler if we just used a
+        * standard mutex or completion: but we cannot take i_mutex in fault,
+        * and bloating every shmem inode for this unlikely case would be sad.
+        */
+       if (unlikely(inode->i_private)) {
+               struct shmem_falloc *shmem_falloc;
+
+               spin_lock(&inode->i_lock);
+               shmem_falloc = inode->i_private;
+               if (shmem_falloc &&
+                   shmem_falloc->waitq &&
+                   vmf->pgoff >= shmem_falloc->start &&
+                   vmf->pgoff < shmem_falloc->next) {
+                       wait_queue_head_t *shmem_falloc_waitq;
+                       DEFINE_WAIT(shmem_fault_wait);
+
+                       ret = VM_FAULT_NOPAGE;
+                       if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
+                          !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+                               /* It's polite to up mmap_sem if we can */
+                               up_read(&vma->vm_mm->mmap_sem);
+                               ret = VM_FAULT_RETRY;
+                       }
+
+                       shmem_falloc_waitq = shmem_falloc->waitq;
+                       prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
+                                       TASK_UNINTERRUPTIBLE);
+                       spin_unlock(&inode->i_lock);
+                       schedule();
+
+                       /*
+                        * shmem_falloc_waitq points into the shmem_fallocate()
+                        * stack of the hole-punching task: shmem_falloc_waitq
+                        * is usually invalid by the time we reach here, but
+                        * finish_wait() does not dereference it in that case;
+                        * though i_lock needed lest racing with wake_up_all().
+                        */
+                       spin_lock(&inode->i_lock);
+                       finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
+                       spin_unlock(&inode->i_lock);
+                       return ret;
+               }
+               spin_unlock(&inode->i_lock);
+       }
+
         error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
         if (error)
                 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
@@ -1362,7 +1432,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
  }
  
  static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
-                                    umode_t mode, dev_t dev, unsigned long flags)
+                                    umode_t mode, dev_t dev, unsigned long flags, int atomic_copy)
  {
         struct inode *inode;
         struct shmem_inode_info *info;
@@ -1373,6 +1443,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
  
         inode = new_inode(sb);
         if (inode) {
+               /* We don't let shmem use __GFP_SLOWHIGHMEM */
+               mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE);
                 inode->i_ino = get_next_ino();
                 inode_init_owner(inode, dir, mode);
                 inode->i_blocks = 0;
@@ -1383,6 +1455,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
                 memset(info, 0, (char *)inode - (char *)info);
                 spin_lock_init(&info->lock);
                 info->flags = flags & VM_NORESERVE;
+               if (atomic_copy)
+                       inode->i_flags |= S_ATOMIC_COPY;
                 INIT_LIST_HEAD(&info->swaplist);
                 simple_xattrs_init(&info->xattrs);
                 cache_no_acl(inode);
@@ -1815,18 +1889,33 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
         pgoff_t start, index, end;
         int error;
  
-       mutex_lock(&inode->i_mutex);
+       //To avoid nested lock
+       if (!mutex_trylock(&inode->i_mutex))
+               return -1;
  
         if (mode & FALLOC_FL_PUNCH_HOLE) {
                 struct address_space *mapping = file->f_mapping;
                 loff_t unmap_start = round_up(offset, PAGE_SIZE);
                 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
+               DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
+
+               shmem_falloc.waitq = &shmem_falloc_waitq;
+               shmem_falloc.start = unmap_start >> PAGE_SHIFT;
+               shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
+               spin_lock(&inode->i_lock);
+               inode->i_private = &shmem_falloc;
+               spin_unlock(&inode->i_lock);
  
                 if ((u64)unmap_end > (u64)unmap_start)
                         unmap_mapping_range(mapping, unmap_start,
                                             1 + unmap_end - unmap_start, 0);
                 shmem_truncate_range(inode, offset, offset + len - 1);
                 /* No need to unmap again: hole-punching leaves COWed pages */
+
+               spin_lock(&inode->i_lock);
+               inode->i_private = NULL;
+               wake_up_all(&shmem_falloc_waitq);
+               spin_unlock(&inode->i_lock);
                 error = 0;
                 goto out;
         }
@@ -1844,6 +1933,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                 goto out;
         }
  
+       shmem_falloc.waitq = NULL;
         shmem_falloc.start = start;
         shmem_falloc.next  = start;
         shmem_falloc.nr_falloced = 0;
@@ -1937,7 +2027,7 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
         struct inode *inode;
         int error = -ENOSPC;
  
-       inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
+       inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE, 0);
         if (inode) {
                 error = security_inode_init_security(inode, dir,
                                                      &dentry->d_name,
@@ -2048,8 +2138,10 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct
  
         if (new_dentry->d_inode) {
                 (void) shmem_unlink(new_dir, new_dentry);
-               if (they_are_dirs)
+               if (they_are_dirs) {
+                       drop_nlink(new_dentry->d_inode);
                         drop_nlink(old_dir);
+               }
         } else if (they_are_dirs) {
                 drop_nlink(old_dir);
                 inc_nlink(new_dir);
@@ -2076,7 +2168,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
         if (len > PAGE_CACHE_SIZE)
                 return -ENAMETOOLONG;
  
-       inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
+       inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE, 0);
         if (!inode)
                 return -ENOSPC;
  
@@ -2618,7 +2710,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
         sb->s_flags |= MS_POSIXACL;
  #endif
  
-       inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
+       inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE, 0);
         if (!inode)
                 goto failed;
         inode->i_uid = sbinfo->uid;
@@ -2871,7 +2963,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
  
  #define shmem_vm_ops                           generic_file_vm_ops
  #define shmem_file_operations                  ramfs_file_operations
-#define shmem_get_inode(sb, dir, mode, dev, flags)     ramfs_get_inode(sb, dir, mode, dev)
+#define shmem_get_inode(sb, dir, mode, dev, flags, atomic_copy)        ramfs_get_inode(sb, dir, mode, dev)
  #define shmem_acct_size(flags, size)           0
  #define shmem_unacct_size(flags, size)         do {} while (0)
  
@@ -2879,14 +2971,8 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
  
  /* common code */
  
-static char *shmem_dname(struct dentry *dentry, char *buffer, int buflen)
-{
-       return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)",
-                               dentry->d_name.name);
-}
-
  static struct dentry_operations anon_ops = {
-       .d_dname = shmem_dname
+       .d_dname = simple_dname
  };
  
  /**
@@ -2894,8 +2980,10 @@ static struct dentry_operations anon_ops = {
   * @name: name for dentry (to be seen in /proc/<pid>/maps
   * @size: size to be set for the file
   * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
+ * @atomic_copy: Atomically copy the area when hibernating?
   */
-struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags,
+               int atomic_copy)
  {
         struct file *res;
         struct inode *inode;
@@ -2924,7 +3012,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
         path.mnt = mntget(shm_mnt);
  
         res = ERR_PTR(-ENOSPC);
-       inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
+       inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags, atomic_copy);
         if (!inode)
                 goto put_dentry;
  
@@ -2950,6 +3038,14 @@ put_memory:
  }
  EXPORT_SYMBOL_GPL(shmem_file_setup);
  
+void shmem_set_file(struct vm_area_struct *vma, struct file *file)
+{
+       if (vma->vm_file)
+               fput(vma->vm_file);
+       vma->vm_file = file;
+       vma->vm_ops = &shmem_vm_ops;
+}
+
  /**
   * shmem_zero_setup - setup a shared anonymous mapping
   * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
@@ -2959,14 +3055,11 @@ int shmem_zero_setup(struct vm_area_struct *vma)
         struct file *file;
         loff_t size = vma->vm_end - vma->vm_start;
  
-       file = shmem_file_setup("dev/zero", size, vma->vm_flags);
+       file = shmem_file_setup("dev/zero", size, vma->vm_flags, 0);
         if (IS_ERR(file))
                 return PTR_ERR(file);
  
-       if (vma->vm_file)
-               fput(vma->vm_file);
-       vma->vm_file = file;
-       vma->vm_ops = &shmem_vm_ops;
+       shmem_set_file(vma, file);
         return 0;
  }