libata: fix ATAPI DMA alignment issues
authorJeff Garzik <jgarzik@pobox.com>
Wed, 5 Oct 2005 11:13:30 +0000 (07:13 -0400)
committerJeff Garzik <jgarzik@pobox.com>
Wed, 5 Oct 2005 11:13:30 +0000 (07:13 -0400)
ATAPI needs to be padded to next 4 byte boundary, if misaligned.

Original work by me, many fixes from Tejun Heo.

drivers/scsi/ahci.c
drivers/scsi/libata-core.c
drivers/scsi/libata-scsi.c
drivers/scsi/sata_qstor.c
drivers/scsi/sata_sx4.c
include/linux/libata.h

index c2c8fa828e24e9d227aaf93a5efc4367dab40872..6e4bb36f8d7cd79165b96cf694d56bc61a1a1f18 100644 (file)
@@ -314,8 +314,15 @@ static int ahci_port_start(struct ata_port *ap)
                return -ENOMEM;
        memset(pp, 0, sizeof(*pp));
 
+       ap->pad = dma_alloc_coherent(dev, ATA_DMA_PAD_BUF_SZ, &ap->pad_dma, GFP_KERNEL);
+       if (!ap->pad) {
+               kfree(pp);
+               return -ENOMEM;
+       }
+
        mem = dma_alloc_coherent(dev, AHCI_PORT_PRIV_DMA_SZ, &mem_dma, GFP_KERNEL);
        if (!mem) {
+               dma_free_coherent(dev, ATA_DMA_PAD_BUF_SZ, ap->pad, ap->pad_dma);
                kfree(pp);
                return -ENOMEM;
        }
@@ -391,6 +398,7 @@ static void ahci_port_stop(struct ata_port *ap)
        ap->private_data = NULL;
        dma_free_coherent(dev, AHCI_PORT_PRIV_DMA_SZ,
                          pp->cmd_slot, pp->cmd_slot_dma);
+       dma_free_coherent(dev, ATA_DMA_PAD_BUF_SZ, ap->pad, ap->pad_dma);
        kfree(pp);
 }
 
@@ -476,23 +484,23 @@ static void ahci_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
 static void ahci_fill_sg(struct ata_queued_cmd *qc)
 {
        struct ahci_port_priv *pp = qc->ap->private_data;
-       unsigned int i;
+       struct scatterlist *sg;
+       struct ahci_sg *ahci_sg;
 
        VPRINTK("ENTER\n");
 
        /*
         * Next, the S/G list.
         */
-       for (i = 0; i < qc->n_elem; i++) {
-               u32 sg_len;
-               dma_addr_t addr;
-
-               addr = sg_dma_address(&qc->sg[i]);
-               sg_len = sg_dma_len(&qc->sg[i]);
-
-               pp->cmd_tbl_sg[i].addr = cpu_to_le32(addr & 0xffffffff);
-               pp->cmd_tbl_sg[i].addr_hi = cpu_to_le32((addr >> 16) >> 16);
-               pp->cmd_tbl_sg[i].flags_size = cpu_to_le32(sg_len - 1);
+       ahci_sg = pp->cmd_tbl_sg;
+       ata_for_each_sg(sg, qc) {
+               dma_addr_t addr = sg_dma_address(sg);
+               u32 sg_len = sg_dma_len(sg);
+
+               ahci_sg->addr = cpu_to_le32(addr & 0xffffffff);
+               ahci_sg->addr_hi = cpu_to_le32((addr >> 16) >> 16);
+               ahci_sg->flags_size = cpu_to_le32(sg_len - 1);
+               ahci_sg++;
        }
 }
 
index e5b01997117a9965249cb734b7c00f43b7d9bbd8..943b44c3c16fc196b3cc75988d45c39ad61bcc3e 100644 (file)
@@ -2156,8 +2156,9 @@ static void ata_dev_set_xfermode(struct ata_port *ap, struct ata_device *dev)
 static void ata_sg_clean(struct ata_queued_cmd *qc)
 {
        struct ata_port *ap = qc->ap;
-       struct scatterlist *sg = qc->sg;
+       struct scatterlist *sg = qc->__sg;
        int dir = qc->dma_dir;
+       void *pad_buf = NULL;
 
        assert(qc->flags & ATA_QCFLAG_DMAMAP);
        assert(sg != NULL);
@@ -2167,14 +2168,35 @@ static void ata_sg_clean(struct ata_queued_cmd *qc)
 
        DPRINTK("unmapping %u sg elements\n", qc->n_elem);
 
-       if (qc->flags & ATA_QCFLAG_SG)
+       /* if we padded the buffer out to 32-bit bound, and data
+        * xfer direction is from-device, we must copy from the
+        * pad buffer back into the supplied buffer
+        */
+       if (qc->pad_len && !(qc->tf.flags & ATA_TFLAG_WRITE))
+               pad_buf = ap->pad + (qc->tag * ATA_DMA_PAD_SZ);
+
+       if (qc->flags & ATA_QCFLAG_SG) {
                dma_unmap_sg(ap->host_set->dev, sg, qc->n_elem, dir);
-       else
+               /* restore last sg */
+               sg[qc->orig_n_elem - 1].length += qc->pad_len;
+               if (pad_buf) {
+                       struct scatterlist *psg = &qc->pad_sgent;
+                       void *addr = kmap_atomic(psg->page, KM_IRQ0);
+                       memcpy(addr + psg->offset, pad_buf, qc->pad_len);
+                       kunmap_atomic(psg->page, KM_IRQ0);
+               }
+       } else {
                dma_unmap_single(ap->host_set->dev, sg_dma_address(&sg[0]),
                                 sg_dma_len(&sg[0]), dir);
+               /* restore sg */
+               sg->length += qc->pad_len;
+               if (pad_buf)
+                       memcpy(qc->buf_virt + sg->length - qc->pad_len,
+                              pad_buf, qc->pad_len);
+       }
 
        qc->flags &= ~ATA_QCFLAG_DMAMAP;
-       qc->sg = NULL;
+       qc->__sg = NULL;
 }
 
 /**
@@ -2190,15 +2212,15 @@ static void ata_sg_clean(struct ata_queued_cmd *qc)
  */
 static void ata_fill_sg(struct ata_queued_cmd *qc)
 {
-       struct scatterlist *sg = qc->sg;
        struct ata_port *ap = qc->ap;
-       unsigned int idx, nelem;
+       struct scatterlist *sg;
+       unsigned int idx;
 
-       assert(sg != NULL);
+       assert(qc->__sg != NULL);
        assert(qc->n_elem > 0);
 
        idx = 0;
-       for (nelem = qc->n_elem; nelem; nelem--,sg++) {
+       ata_for_each_sg(sg, qc) {
                u32 addr, offset;
                u32 sg_len, len;
 
@@ -2289,11 +2311,12 @@ void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf, unsigned int buflen)
        qc->flags |= ATA_QCFLAG_SINGLE;
 
        memset(&qc->sgent, 0, sizeof(qc->sgent));
-       qc->sg = &qc->sgent;
+       qc->__sg = &qc->sgent;
        qc->n_elem = 1;
+       qc->orig_n_elem = 1;
        qc->buf_virt = buf;
 
-       sg = qc->sg;
+       sg = qc->__sg;
        sg->page = virt_to_page(buf);
        sg->offset = (unsigned long) buf & ~PAGE_MASK;
        sg->length = buflen;
@@ -2317,8 +2340,9 @@ void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
                 unsigned int n_elem)
 {
        qc->flags |= ATA_QCFLAG_SG;
-       qc->sg = sg;
+       qc->__sg = sg;
        qc->n_elem = n_elem;
+       qc->orig_n_elem = n_elem;
 }
 
 /**
@@ -2338,9 +2362,32 @@ static int ata_sg_setup_one(struct ata_queued_cmd *qc)
 {
        struct ata_port *ap = qc->ap;
        int dir = qc->dma_dir;
-       struct scatterlist *sg = qc->sg;
+       struct scatterlist *sg = qc->__sg;
        dma_addr_t dma_address;
 
+       /* we must lengthen transfers to end on a 32-bit boundary */
+       qc->pad_len = sg->length & 3;
+       if (qc->pad_len) {
+               void *pad_buf = ap->pad + (qc->tag * ATA_DMA_PAD_SZ);
+               struct scatterlist *psg = &qc->pad_sgent;
+
+               assert(qc->dev->class == ATA_DEV_ATAPI);
+
+               memset(pad_buf, 0, ATA_DMA_PAD_SZ);
+
+               if (qc->tf.flags & ATA_TFLAG_WRITE)
+                       memcpy(pad_buf, qc->buf_virt + sg->length - qc->pad_len,
+                              qc->pad_len);
+
+               sg_dma_address(psg) = ap->pad_dma + (qc->tag * ATA_DMA_PAD_SZ);
+               sg_dma_len(psg) = ATA_DMA_PAD_SZ;
+               /* trim sg */
+               sg->length -= qc->pad_len;
+
+               DPRINTK("padding done, sg->length=%u pad_len=%u\n",
+                       sg->length, qc->pad_len);
+       }
+
        dma_address = dma_map_single(ap->host_set->dev, qc->buf_virt,
                                     sg->length, dir);
        if (dma_mapping_error(dma_address))
@@ -2372,12 +2419,47 @@ static int ata_sg_setup_one(struct ata_queued_cmd *qc)
 static int ata_sg_setup(struct ata_queued_cmd *qc)
 {
        struct ata_port *ap = qc->ap;
-       struct scatterlist *sg = qc->sg;
+       struct scatterlist *sg = qc->__sg;
+       struct scatterlist *lsg = &sg[qc->n_elem - 1];
        int n_elem, dir;
 
        VPRINTK("ENTER, ata%u\n", ap->id);
        assert(qc->flags & ATA_QCFLAG_SG);
 
+       /* we must lengthen transfers to end on a 32-bit boundary */
+       qc->pad_len = lsg->length & 3;
+       if (qc->pad_len) {
+               void *pad_buf = ap->pad + (qc->tag * ATA_DMA_PAD_SZ);
+               struct scatterlist *psg = &qc->pad_sgent;
+               unsigned int offset;
+
+               assert(qc->dev->class == ATA_DEV_ATAPI);
+
+               memset(pad_buf, 0, ATA_DMA_PAD_SZ);
+
+               /*
+                * psg->page/offset are used to copy to-be-written
+                * data in this function or read data in ata_sg_clean.
+                */
+               offset = lsg->offset + lsg->length - qc->pad_len;
+               psg->page = nth_page(lsg->page, offset >> PAGE_SHIFT);
+               psg->offset = offset_in_page(offset);
+
+               if (qc->tf.flags & ATA_TFLAG_WRITE) {
+                       void *addr = kmap_atomic(psg->page, KM_IRQ0);
+                       memcpy(pad_buf, addr + psg->offset, qc->pad_len);
+                       kunmap_atomic(psg->page, KM_IRQ0);
+               }
+
+               sg_dma_address(psg) = ap->pad_dma + (qc->tag * ATA_DMA_PAD_SZ);
+               sg_dma_len(psg) = ATA_DMA_PAD_SZ;
+               /* trim last sg */
+               lsg->length -= qc->pad_len;
+
+               DPRINTK("padding done, sg[%d].length=%u pad_len=%u\n",
+                       qc->n_elem - 1, lsg->length, qc->pad_len);
+       }
+
        dir = qc->dma_dir;
        n_elem = dma_map_sg(ap->host_set->dev, sg, qc->n_elem, dir);
        if (n_elem < 1)
@@ -2655,7 +2737,7 @@ static void ata_data_xfer(struct ata_port *ap, unsigned char *buf,
 static void ata_pio_sector(struct ata_queued_cmd *qc)
 {
        int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
-       struct scatterlist *sg = qc->sg;
+       struct scatterlist *sg = qc->__sg;
        struct ata_port *ap = qc->ap;
        struct page *page;
        unsigned int offset;
@@ -2705,7 +2787,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
 {
        int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
-       struct scatterlist *sg = qc->sg;
+       struct scatterlist *sg = qc->__sg;
        struct ata_port *ap = qc->ap;
        struct page *page;
        unsigned char *buf;
@@ -2738,7 +2820,7 @@ next_sg:
                return;
        }
 
-       sg = &qc->sg[qc->cursg];
+       sg = &qc->__sg[qc->cursg];
 
        page = sg->page;
        offset = sg->offset + qc->cursg_ofs;
@@ -3145,7 +3227,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
 
        qc = ata_qc_new(ap);
        if (qc) {
-               qc->sg = NULL;
+               qc->__sg = NULL;
                qc->flags = 0;
                qc->scsicmd = NULL;
                qc->ap = ap;
@@ -3837,6 +3919,12 @@ int ata_port_start (struct ata_port *ap)
        if (!ap->prd)
                return -ENOMEM;
 
+       ap->pad = dma_alloc_coherent(dev, ATA_DMA_PAD_BUF_SZ, &ap->pad_dma, GFP_KERNEL);
+       if (!ap->pad) {
+               dma_free_coherent(dev, ATA_PRD_TBL_SZ, ap->prd, ap->prd_dma);
+               return -ENOMEM;
+       }
+
        DPRINTK("prd alloc, virt %p, dma %llx\n", ap->prd, (unsigned long long) ap->prd_dma);
 
        return 0;
@@ -3859,6 +3947,7 @@ void ata_port_stop (struct ata_port *ap)
        struct device *dev = ap->host_set->dev;
 
        dma_free_coherent(dev, ATA_PRD_TBL_SZ, ap->prd, ap->prd_dma);
+       dma_free_coherent(dev, ATA_DMA_PAD_BUF_SZ, ap->pad, ap->pad_dma);
 }
 
 void ata_host_stop (struct ata_host_set *host_set)
index 104fd9a63e734ddc752f68de92215a208242a0d1..ee3f1050fb5f640d9db1fa7321c06bdf5b6f2792 100644 (file)
@@ -150,10 +150,10 @@ struct ata_queued_cmd *ata_scsi_qc_new(struct ata_port *ap,
                qc->scsidone = done;
 
                if (cmd->use_sg) {
-                       qc->sg = (struct scatterlist *) cmd->request_buffer;
+                       qc->__sg = (struct scatterlist *) cmd->request_buffer;
                        qc->n_elem = cmd->use_sg;
                } else {
-                       qc->sg = &qc->sgent;
+                       qc->__sg = &qc->sgent;
                        qc->n_elem = 1;
                }
        } else {
@@ -364,6 +364,16 @@ int ata_scsi_slave_config(struct scsi_device *sdev)
                         */
                        blk_queue_max_sectors(sdev->request_queue, 2048);
                }
+
+               /*
+                * SATA DMA transfers must be multiples of 4 byte, so
+                * we need to pad ATAPI transfers using an extra sg.
+                * Decrement max hw segments accordingly.
+                */
+               if (dev->class == ATA_DEV_ATAPI) {
+                       request_queue_t *q = sdev->request_queue;
+                       blk_queue_max_hw_segments(q, q->max_hw_segments - 1);
+               }
        }
 
        return 0;       /* scsi layer doesn't check return value, sigh */
index ffcdeb68641cf5a2493abf7be5b68b1da32f329c..69a9b1cf6f9cb56fed4742ae27dce2e14b0ea5a6 100644 (file)
@@ -268,16 +268,17 @@ static void qs_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val)
 
 static void qs_fill_sg(struct ata_queued_cmd *qc)
 {
-       struct scatterlist *sg = qc->sg;
+       struct scatterlist *sg;
        struct ata_port *ap = qc->ap;
        struct qs_port_priv *pp = ap->private_data;
        unsigned int nelem;
        u8 *prd = pp->pkt + QS_CPB_BYTES;
 
-       assert(sg != NULL);
+       assert(qc->__sg != NULL);
        assert(qc->n_elem > 0);
 
-       for (nelem = 0; nelem < qc->n_elem; nelem++,sg++) {
+       nelem = 0;
+       ata_for_each_sg(sg, qc) {
                u64 addr;
                u32 len;
 
@@ -291,6 +292,7 @@ static void qs_fill_sg(struct ata_queued_cmd *qc)
 
                VPRINTK("PRD[%u] = (0x%llX, 0x%X)\n", nelem,
                                        (unsigned long long)addr, len);
+               nelem++;
        }
 }
 
index 540a851911723b5329e67f6f63981acba6975339..79fdbbab513e87b433def9b5761bb671b24bd971 100644 (file)
@@ -449,14 +449,14 @@ static inline void pdc20621_host_pkt(struct ata_taskfile *tf, u8 *buf,
 
 static void pdc20621_dma_prep(struct ata_queued_cmd *qc)
 {
-       struct scatterlist *sg = qc->sg;
+       struct scatterlist *sg;
        struct ata_port *ap = qc->ap;
        struct pdc_port_priv *pp = ap->private_data;
        void __iomem *mmio = ap->host_set->mmio_base;
        struct pdc_host_priv *hpriv = ap->host_set->private_data;
        void __iomem *dimm_mmio = hpriv->dimm_mmio;
        unsigned int portno = ap->port_no;
-       unsigned int i, last, idx, total_len = 0, sgt_len;
+       unsigned int i, idx, total_len = 0, sgt_len;
        u32 *buf = (u32 *) &pp->dimm_buf[PDC_DIMM_HEADER_SZ];
 
        assert(qc->flags & ATA_QCFLAG_DMAMAP);
@@ -469,12 +469,11 @@ static void pdc20621_dma_prep(struct ata_queued_cmd *qc)
        /*
         * Build S/G table
         */
-       last = qc->n_elem;
        idx = 0;
-       for (i = 0; i < last; i++) {
-               buf[idx++] = cpu_to_le32(sg_dma_address(&sg[i]));
-               buf[idx++] = cpu_to_le32(sg_dma_len(&sg[i]));
-               total_len += sg_dma_len(&sg[i]);
+       ata_for_each_sg(sg, qc) {
+               buf[idx++] = cpu_to_le32(sg_dma_address(sg));
+               buf[idx++] = cpu_to_le32(sg_dma_len(sg));
+               total_len += sg_dma_len(sg);
        }
        buf[idx - 1] |= cpu_to_le32(ATA_PRD_EOT);
        sgt_len = idx * 4;
index ceee1fc42c600d0aa0d045edb37db8c253fec457..3ab67622ef933220746d1a362bb9e7217e1f25d0 100644 (file)
@@ -154,6 +154,10 @@ enum {
        ATA_SHIFT_UDMA          = 0,
        ATA_SHIFT_MWDMA         = 8,
        ATA_SHIFT_PIO           = 11,
+
+       /* size of buffer to pad xfers ending on unaligned boundaries */
+       ATA_DMA_PAD_SZ          = 4,
+       ATA_DMA_PAD_BUF_SZ      = ATA_DMA_PAD_SZ * ATA_MAX_QUEUE,
 };
 
 enum pio_task_states {
@@ -237,9 +241,12 @@ struct ata_queued_cmd {
        unsigned long           flags;          /* ATA_QCFLAG_xxx */
        unsigned int            tag;
        unsigned int            n_elem;
+       unsigned int            orig_n_elem;
 
        int                     dma_dir;
 
+       unsigned int            pad_len;
+
        unsigned int            nsect;
        unsigned int            cursect;
 
@@ -250,9 +257,11 @@ struct ata_queued_cmd {
        unsigned int            cursg_ofs;
 
        struct scatterlist      sgent;
+       struct scatterlist      pad_sgent;
        void                    *buf_virt;
 
-       struct scatterlist      *sg;
+       /* DO NOT iterate over __sg manually, use ata_for_each_sg() */
+       struct scatterlist      *__sg;
 
        ata_qc_cb_t             complete_fn;
 
@@ -295,6 +304,9 @@ struct ata_port {
        struct ata_prd          *prd;    /* our SG list */
        dma_addr_t              prd_dma; /* and its DMA mapping */
 
+       void                    *pad;   /* array of DMA pad buffers */
+       dma_addr_t              pad_dma;
+
        struct ata_ioports      ioaddr; /* ATA cmd/ctl/dma register blocks */
 
        u8                      ctl;    /* cache of ATA control register */
@@ -458,6 +470,19 @@ extern int pci_test_config_bits(struct pci_dev *pdev, struct pci_bits *bits);
 #endif /* CONFIG_PCI */
 
 
+static inline struct scatterlist *
+ata_qc_next_sg(struct scatterlist *sg, struct ata_queued_cmd *qc)
+{
+       if (sg == &qc->pad_sgent)
+               return NULL;
+       if (++sg - qc->__sg < qc->n_elem)
+               return sg;
+       return qc->pad_len ? &qc->pad_sgent : NULL;
+}
+
+#define ata_for_each_sg(sg, qc) \
+       for (sg = qc->__sg; sg; sg = ata_qc_next_sg(sg, qc))
+
 static inline unsigned int ata_tag_valid(unsigned int tag)
 {
        return (tag < ATA_MAX_QUEUE) ? 1 : 0;