ret == -ENOIOCTLCMD;
}
+#ifdef CONFIG_FS_DAX
+bool blkdev_dax_capable(struct block_device *bdev)
+{
+ struct gendisk *disk = bdev->bd_disk;
+
+ if (!disk->fops->direct_access)
+ return false;
+
+ /*
+ * If the partition is not aligned on a page boundary, we can't
+ * do dax I/O to it.
+ */
+ if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
+ || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
+ return false;
+
+ return true;
+}
+
+static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
+{
+ unsigned long arg;
+ int rc = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ if (get_user(arg, (int __user *)(argp)))
+ return -EFAULT;
+ arg = !!arg;
+ if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
+ return 0;
+
+ if (arg)
+ arg = S_DAX;
+
+ if (arg && !blkdev_dax_capable(bdev))
+ return -ENOTTY;
+
+ mutex_lock(&bdev->bd_inode->i_mutex);
+ if (bdev->bd_map_count == 0)
+ inode_set_flags(bdev->bd_inode, arg, S_DAX);
+ else
+ rc = -EBUSY;
+ mutex_unlock(&bdev->bd_inode->i_mutex);
+ return rc;
+}
+#else
+static int blkdev_daxset(struct block_device *bdev, int arg)
+{
+ if (arg)
+ return -ENOTTY;
+ return 0;
+}
+#endif
+
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
unsigned cmd, unsigned long arg)
{
case BLKTRACESETUP:
case BLKTRACETEARDOWN:
return blk_trace_ioctl(bdev, cmd, argp);
+ case BLKDAXSET:
+ return blkdev_daxset(bdev, arg);
+ case BLKDAXGET:
+ return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
+ break;
case IOC_PR_REGISTER:
return blkdev_pr_register(bdev, argp);
case IOC_PR_RESERVE:
}
}
- if (!ret)
+ if (!ret) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
+ if (!blkdev_dax_capable(bdev))
+ bdev->bd_inode->i_flags &= ~S_DAX;
+ }
/*
* If the device is invalidated, rescan partition
else if (ret == -ENOMEDIUM)
invalidate_partitions(disk, bdev);
}
+
if (ret)
goto out_clear;
} else {
goto out_clear;
}
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
- /*
- * If the partition is not aligned on a page
- * boundary, we can't do dax I/O to it.
- */
- if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
- (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
+ if (!blkdev_dax_capable(bdev))
bdev->bd_inode->i_flags &= ~S_DAX;
}
} else {
.is_dirty_writeback = buffer_check_dirty_writeback,
};
+#ifdef CONFIG_FS_DAX
+/*
+ * In the raw block case we do not need to contend with truncation nor
+ * unwritten file extents. Without those concerns there is no need for
+ * additional locking beyond the mmap_sem context that these routines
+ * are already executing under.
+ *
+ * Note, there is no protection if the block device is dynamically
+ * resized (partition grow/shrink) during a fault. A stable block device
+ * size is already not enforced in the blkdev_direct_IO path.
+ *
+ * For DAX, it is the responsibility of the block device driver to
+ * ensure the whole-disk device size is stable while requests are in
+ * flight.
+ *
+ * Finally, unlike the filemap_page_mkwrite() case there is no
+ * filesystem superblock to sync against freezing. We still include a
+ * pfn_mkwrite callback for dax drivers to receive write fault
+ * notifications.
+ */
+static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return __dax_fault(vma, vmf, blkdev_get_block, NULL);
+}
+
+static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, unsigned int flags)
+{
+ return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
+}
+
+static void blkdev_vm_open(struct vm_area_struct *vma)
+{
+ struct inode *bd_inode = bdev_file_inode(vma->vm_file);
+ struct block_device *bdev = I_BDEV(bd_inode);
+
+ mutex_lock(&bd_inode->i_mutex);
+ bdev->bd_map_count++;
+ mutex_unlock(&bd_inode->i_mutex);
+}
+
+static void blkdev_vm_close(struct vm_area_struct *vma)
+{
+ struct inode *bd_inode = bdev_file_inode(vma->vm_file);
+ struct block_device *bdev = I_BDEV(bd_inode);
+
+ mutex_lock(&bd_inode->i_mutex);
+ bdev->bd_map_count--;
+ mutex_unlock(&bd_inode->i_mutex);
+}
+
+static const struct vm_operations_struct blkdev_dax_vm_ops = {
+ .open = blkdev_vm_open,
+ .close = blkdev_vm_close,
+ .fault = blkdev_dax_fault,
+ .pmd_fault = blkdev_dax_pmd_fault,
+ .pfn_mkwrite = blkdev_dax_fault,
+};
+
+static const struct vm_operations_struct blkdev_default_vm_ops = {
+ .open = blkdev_vm_open,
+ .close = blkdev_vm_close,
+ .fault = filemap_fault,
+ .map_pages = filemap_map_pages,
+};
+
+static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct inode *bd_inode = bdev_file_inode(file);
+ struct block_device *bdev = I_BDEV(bd_inode);
+
+ file_accessed(file);
+ mutex_lock(&bd_inode->i_mutex);
+ bdev->bd_map_count++;
+ if (IS_DAX(bd_inode)) {
+ vma->vm_ops = &blkdev_dax_vm_ops;
+ vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
+ } else {
+ vma->vm_ops = &blkdev_default_vm_ops;
+ }
+ mutex_unlock(&bd_inode->i_mutex);
+
+ return 0;
+}
+#else
+#define blkdev_mmap generic_file_mmap
+#endif
+
const struct file_operations def_blk_fops = {
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
- .mmap = generic_file_mmap,
+ .mmap = blkdev_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT