NVMe: Meta data handling through submit io ioctl
authorKeith Busch <keith.busch@intel.com>
Tue, 7 Apr 2015 22:57:19 +0000 (16:57 -0600)
committerJens Axboe <axboe@fb.com>
Wed, 8 Apr 2015 01:11:06 +0000 (19:11 -0600)
This adds support for the extended metadata formats through the submit
IO ioctl, and simplifies the rest when using a separate metadata format.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
drivers/block/nvme-core.c
include/linux/nvme.h

index 973c895bd7afdcaecabf95814ed41a16a0c436ae..e919de48ff25d558f70684445a853b809c113e82 100644 (file)
@@ -1745,25 +1745,31 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
        struct nvme_dev *dev = ns->dev;
        struct nvme_user_io io;
        struct nvme_command c;
-       unsigned length, meta_len;
-       int status, i;
-       struct nvme_iod *iod, *meta_iod = NULL;
-       dma_addr_t meta_dma_addr;
-       void *meta, *uninitialized_var(meta_mem);
+       unsigned length, meta_len, prp_len;
+       int status, write;
+       struct nvme_iod *iod;
+       dma_addr_t meta_dma = 0;
+       void *meta = NULL;
 
        if (copy_from_user(&io, uio, sizeof(io)))
                return -EFAULT;
        length = (io.nblocks + 1) << ns->lba_shift;
        meta_len = (io.nblocks + 1) * ns->ms;
 
-       if (meta_len && ((io.metadata & 3) || !io.metadata))
+       if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
                return -EINVAL;
+       else if (meta_len && ns->ext) {
+               length += meta_len;
+               meta_len = 0;
+       }
+
+       write = io.opcode & 1;
 
        switch (io.opcode) {
        case nvme_cmd_write:
        case nvme_cmd_read:
        case nvme_cmd_compare:
-               iod = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length);
+               iod = nvme_map_user_pages(dev, write, io.addr, length);
                break;
        default:
                return -EINVAL;
@@ -1772,6 +1778,27 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
        if (IS_ERR(iod))
                return PTR_ERR(iod);
 
+       prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
+       if (length != prp_len) {
+               status = -ENOMEM;
+               goto unmap;
+       }
+       if (meta_len) {
+               meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
+                                               &meta_dma, GFP_KERNEL);
+               if (!meta) {
+                       status = -ENOMEM;
+                       goto unmap;
+               }
+               if (write) {
+                       if (copy_from_user(meta, (void __user *)io.metadata,
+                                                               meta_len)) {
+                               status = -EFAULT;
+                               goto unmap;
+                       }
+               }
+       }
+
        memset(&c, 0, sizeof(c));
        c.rw.opcode = io.opcode;
        c.rw.flags = io.flags;
@@ -1783,75 +1810,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
        c.rw.reftag = cpu_to_le32(io.reftag);
        c.rw.apptag = cpu_to_le16(io.apptag);
        c.rw.appmask = cpu_to_le16(io.appmask);
-
-       if (meta_len) {
-               meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata,
-                                                               meta_len);
-               if (IS_ERR(meta_iod)) {
-                       status = PTR_ERR(meta_iod);
-                       meta_iod = NULL;
-                       goto unmap;
-               }
-
-               meta_mem = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
-                                               &meta_dma_addr, GFP_KERNEL);
-               if (!meta_mem) {
-                       status = -ENOMEM;
-                       goto unmap;
-               }
-
-               if (io.opcode & 1) {
-                       int meta_offset = 0;
-
-                       for (i = 0; i < meta_iod->nents; i++) {
-                               meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
-                                               meta_iod->sg[i].offset;
-                               memcpy(meta_mem + meta_offset, meta,
-                                               meta_iod->sg[i].length);
-                               kunmap_atomic(meta);
-                               meta_offset += meta_iod->sg[i].length;
-                       }
-               }
-
-               c.rw.metadata = cpu_to_le64(meta_dma_addr);
-       }
-
-       length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
        c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
        c.rw.prp2 = cpu_to_le64(iod->first_dma);
-
-       if (length != (io.nblocks + 1) << ns->lba_shift)
-               status = -ENOMEM;
-       else
-               status = nvme_submit_io_cmd(dev, ns, &c, NULL);
-
-       if (meta_len) {
-               if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) {
-                       int meta_offset = 0;
-
-                       for (i = 0; i < meta_iod->nents; i++) {
-                               meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
-                                               meta_iod->sg[i].offset;
-                               memcpy(meta, meta_mem + meta_offset,
-                                               meta_iod->sg[i].length);
-                               kunmap_atomic(meta);
-                               meta_offset += meta_iod->sg[i].length;
-                       }
-               }
-
-               dma_free_coherent(&dev->pci_dev->dev, meta_len, meta_mem,
-                                                               meta_dma_addr);
-       }
-
+       c.rw.metadata = cpu_to_le64(meta_dma);
+       status = nvme_submit_io_cmd(dev, ns, &c, NULL);
  unmap:
-       nvme_unmap_user_pages(dev, io.opcode & 1, iod);
+       nvme_unmap_user_pages(dev, write, iod);
        nvme_free_iod(dev, iod);
-
-       if (meta_iod) {
-               nvme_unmap_user_pages(dev, io.opcode & 1, meta_iod);
-               nvme_free_iod(dev, meta_iod);
+       if (meta) {
+               if (status == NVME_SC_SUCCESS && !write) {
+                       if (copy_to_user((void __user *)io.metadata, meta,
+                                                               meta_len))
+                               status = -EFAULT;
+               }
+               dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
        }
-
        return status;
 }
 
@@ -2014,7 +1987,8 @@ static int nvme_revalidate_disk(struct gendisk *disk)
        struct nvme_dev *dev = ns->dev;
        struct nvme_id_ns *id;
        dma_addr_t dma_addr;
-       int lbaf, pi_type, old_ms;
+       u8 lbaf, pi_type;
+       u16 old_ms;
        unsigned short bs;
 
        id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
@@ -2035,6 +2009,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
        lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
        ns->lba_shift = id->lbaf[lbaf].ds;
        ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+       ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
 
        /*
         * If identify namespace failed, use default 512 byte block size so
@@ -2051,14 +2026,14 @@ static int nvme_revalidate_disk(struct gendisk *disk)
        if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
                                ns->ms != old_ms ||
                                bs != queue_logical_block_size(disk->queue) ||
-                               (ns->ms && id->flbas & NVME_NS_FLBAS_META_EXT)))
+                               (ns->ms && ns->ext)))
                blk_integrity_unregister(disk);
 
        ns->pi_type = pi_type;
        blk_queue_logical_block_size(ns->queue, bs);
 
        if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) &&
-                               !(id->flbas & NVME_NS_FLBAS_META_EXT))
+                                                               !ns->ext)
                nvme_init_integrity(ns);
 
        if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
index 0adad4a5419b7cbd7560422de51f9abc97a755a0..8dbd05e70f095b1bd7246677c695693b47e3f5fe 100644 (file)
@@ -117,8 +117,9 @@ struct nvme_ns {
 
        unsigned ns_id;
        int lba_shift;
-       int ms;
-       int pi_type;
+       u16 ms;
+       bool ext;
+       u8 pi_type;
        u64 mode_select_num_blocks;
        u32 mode_select_block_len;
 };