dax: remove dax_pmd_fault()
authorRoss Zwisler <ross.zwisler@linux.intel.com>
Tue, 8 Nov 2016 00:32:35 +0000 (11:32 +1100)
committerDave Chinner <david@fromorbit.com>
Tue, 8 Nov 2016 00:32:35 +0000 (11:32 +1100)
dax_pmd_fault() is the old struct buffer_head + get_block_t based 2 MiB DAX
fault handler.  This fault handler has been disabled for several kernel
releases, and support for PMDs will be reintroduced using the struct iomap
interface instead.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dave Chinner <david@fromorbit.com>
fs/dax.c
include/linux/dax.h

index 72387023545e34688b60d0bff4efbbe8ad334cfa..3d0b1032c555463bf6853dae118ba5b5646dfadc 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -915,219 +915,6 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 }
 EXPORT_SYMBOL_GPL(dax_fault);
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
-/*
- * The 'colour' (ie low bits) within a PMD of a page offset.  This comes up
- * more often than one might expect in the below function.
- */
-#define PG_PMD_COLOUR  ((PMD_SIZE >> PAGE_SHIFT) - 1)
-
-static void __dax_dbg(struct buffer_head *bh, unsigned long address,
-               const char *reason, const char *fn)
-{
-       if (bh) {
-               char bname[BDEVNAME_SIZE];
-               bdevname(bh->b_bdev, bname);
-               pr_debug("%s: %s addr: %lx dev %s state %lx start %lld "
-                       "length %zd fallback: %s\n", fn, current->comm,
-                       address, bname, bh->b_state, (u64)bh->b_blocknr,
-                       bh->b_size, reason);
-       } else {
-               pr_debug("%s: %s addr: %lx fallback: %s\n", fn,
-                       current->comm, address, reason);
-       }
-}
-
-#define dax_pmd_dbg(bh, address, reason)       __dax_dbg(bh, address, reason, "dax_pmd")
-
-/**
- * dax_pmd_fault - handle a PMD fault on a DAX file
- * @vma: The virtual memory area where the fault occurred
- * @vmf: The description of the fault
- * @get_block: The filesystem method used to translate file offsets to blocks
- *
- * When a page fault occurs, filesystems may call this helper in their
- * pmd_fault handler for DAX files.
- */
-int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
-               pmd_t *pmd, unsigned int flags, get_block_t get_block)
-{
-       struct file *file = vma->vm_file;
-       struct address_space *mapping = file->f_mapping;
-       struct inode *inode = mapping->host;
-       struct buffer_head bh;
-       unsigned blkbits = inode->i_blkbits;
-       unsigned long pmd_addr = address & PMD_MASK;
-       bool write = flags & FAULT_FLAG_WRITE;
-       struct block_device *bdev;
-       pgoff_t size, pgoff;
-       sector_t block;
-       int result = 0;
-       bool alloc = false;
-
-       /* dax pmd mappings require pfn_t_devmap() */
-       if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
-               return VM_FAULT_FALLBACK;
-
-       /* Fall back to PTEs if we're going to COW */
-       if (write && !(vma->vm_flags & VM_SHARED)) {
-               split_huge_pmd(vma, pmd, address);
-               dax_pmd_dbg(NULL, address, "cow write");
-               return VM_FAULT_FALLBACK;
-       }
-       /* If the PMD would extend outside the VMA */
-       if (pmd_addr < vma->vm_start) {
-               dax_pmd_dbg(NULL, address, "vma start unaligned");
-               return VM_FAULT_FALLBACK;
-       }
-       if ((pmd_addr + PMD_SIZE) > vma->vm_end) {
-               dax_pmd_dbg(NULL, address, "vma end unaligned");
-               return VM_FAULT_FALLBACK;
-       }
-
-       pgoff = linear_page_index(vma, pmd_addr);
-       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       if (pgoff >= size)
-               return VM_FAULT_SIGBUS;
-       /* If the PMD would cover blocks out of the file */
-       if ((pgoff | PG_PMD_COLOUR) >= size) {
-               dax_pmd_dbg(NULL, address,
-                               "offset + huge page size > file size");
-               return VM_FAULT_FALLBACK;
-       }
-
-       memset(&bh, 0, sizeof(bh));
-       bh.b_bdev = inode->i_sb->s_bdev;
-       block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
-
-       bh.b_size = PMD_SIZE;
-
-       if (get_block(inode, block, &bh, 0) != 0)
-               return VM_FAULT_SIGBUS;
-
-       if (!buffer_mapped(&bh) && write) {
-               if (get_block(inode, block, &bh, 1) != 0)
-                       return VM_FAULT_SIGBUS;
-               alloc = true;
-               WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
-       }
-
-       bdev = bh.b_bdev;
-
-       if (bh.b_size < PMD_SIZE) {
-               dax_pmd_dbg(&bh, address, "allocated block too small");
-               return VM_FAULT_FALLBACK;
-       }
-
-       /*
-        * If we allocated new storage, make sure no process has any
-        * zero pages covering this hole
-        */
-       if (alloc) {
-               loff_t lstart = pgoff << PAGE_SHIFT;
-               loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
-
-               truncate_pagecache_range(inode, lstart, lend);
-       }
-
-       if (!write && !buffer_mapped(&bh)) {
-               spinlock_t *ptl;
-               pmd_t entry;
-               struct page *zero_page = mm_get_huge_zero_page(vma->vm_mm);
-
-               if (unlikely(!zero_page)) {
-                       dax_pmd_dbg(&bh, address, "no zero page");
-                       goto fallback;
-               }
-
-               ptl = pmd_lock(vma->vm_mm, pmd);
-               if (!pmd_none(*pmd)) {
-                       spin_unlock(ptl);
-                       dax_pmd_dbg(&bh, address, "pmd already present");
-                       goto fallback;
-               }
-
-               dev_dbg(part_to_dev(bdev->bd_part),
-                               "%s: %s addr: %lx pfn: <zero> sect: %llx\n",
-                               __func__, current->comm, address,
-                               (unsigned long long) to_sector(&bh, inode));
-
-               entry = mk_pmd(zero_page, vma->vm_page_prot);
-               entry = pmd_mkhuge(entry);
-               set_pmd_at(vma->vm_mm, pmd_addr, pmd, entry);
-               result = VM_FAULT_NOPAGE;
-               spin_unlock(ptl);
-       } else {
-               struct blk_dax_ctl dax = {
-                       .sector = to_sector(&bh, inode),
-                       .size = PMD_SIZE,
-               };
-               long length = dax_map_atomic(bdev, &dax);
-
-               if (length < 0) {
-                       dax_pmd_dbg(&bh, address, "dax-error fallback");
-                       goto fallback;
-               }
-               if (length < PMD_SIZE) {
-                       dax_pmd_dbg(&bh, address, "dax-length too small");
-                       dax_unmap_atomic(bdev, &dax);
-                       goto fallback;
-               }
-               if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) {
-                       dax_pmd_dbg(&bh, address, "pfn unaligned");
-                       dax_unmap_atomic(bdev, &dax);
-                       goto fallback;
-               }
-
-               if (!pfn_t_devmap(dax.pfn)) {
-                       dax_unmap_atomic(bdev, &dax);
-                       dax_pmd_dbg(&bh, address, "pfn not in memmap");
-                       goto fallback;
-               }
-               dax_unmap_atomic(bdev, &dax);
-
-               /*
-                * For PTE faults we insert a radix tree entry for reads, and
-                * leave it clean.  Then on the first write we dirty the radix
-                * tree entry via the dax_pfn_mkwrite() path.  This sequence
-                * allows the dax_pfn_mkwrite() call to be simpler and avoid a
-                * call into get_block() to translate the pgoff to a sector in
-                * order to be able to create a new radix tree entry.
-                *
-                * The PMD path doesn't have an equivalent to
-                * dax_pfn_mkwrite(), though, so for a read followed by a
-                * write we traverse all the way through dax_pmd_fault()
-                * twice.  This means we can just skip inserting a radix tree
-                * entry completely on the initial read and just wait until
-                * the write to insert a dirty entry.
-                */
-               if (write) {
-                       /*
-                        * We should insert radix-tree entry and dirty it here.
-                        * For now this is broken...
-                        */
-               }
-
-               dev_dbg(part_to_dev(bdev->bd_part),
-                               "%s: %s addr: %lx pfn: %lx sect: %llx\n",
-                               __func__, current->comm, address,
-                               pfn_t_to_pfn(dax.pfn),
-                               (unsigned long long) dax.sector);
-               result |= vmf_insert_pfn_pmd(vma, address, pmd,
-                               dax.pfn, write);
-       }
-
- out:
-       return result;
-
- fallback:
-       count_vm_event(THP_FAULT_FALLBACK);
-       result = VM_FAULT_FALLBACK;
-       goto out;
-}
-EXPORT_SYMBOL_GPL(dax_pmd_fault);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
 /**
  * dax_pfn_mkwrite - handle first write to DAX page
  * @vma: The virtual memory area where the fault occurred
index a41a747d6112010036dcbdc7108f5e6529be2e6c..0f74866edae62c284dc894fa51a8013b5adc9643 100644 (file)
@@ -48,16 +48,12 @@ static inline int __dax_zero_page_range(struct block_device *bdev,
 }
 #endif
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
-int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
-                               unsigned int flags, get_block_t);
-#else
 static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
                                pmd_t *pmd, unsigned int flags, get_block_t gb)
 {
        return VM_FAULT_FALLBACK;
 }
-#endif
+
 int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
 #define dax_mkwrite(vma, vmf, gb)      dax_fault(vma, vmf, gb)