x86, AMD IOMMU: remove unnecessary set_bit_string
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / x86 / kernel / amd_iommu.c
index 47e80b5814bf6bd5e6113a141650e62a3ee3fb44..329b2c3f2fed8302b142eb29d7861efee9a8c306 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/proto.h>
 #include <asm/gart.h>
 #include <asm/amd_iommu_types.h>
+#include <asm/amd_iommu.h>
 
 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
 
@@ -40,6 +41,11 @@ struct command {
 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
                             struct unity_map_entry *e);
 
+static int iommu_has_npcache(struct amd_iommu *iommu)
+{
+       return iommu->cap & IOMMU_CAP_NPCACHE;
+}
+
 static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
 {
        u32 tail, head;
@@ -295,7 +301,6 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
                                0, boundary_size, 0);
 
        if (likely(address != -1)) {
-               set_bit_string(dom->bitmap, address, pages);
                dom->next_bit = address + pages;
                address <<= PAGE_SHIFT;
        } else
@@ -536,3 +541,416 @@ static int get_device_resources(struct device *dev,
        return 1;
 }
 
+static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
+                                    struct dma_ops_domain *dom,
+                                    unsigned long address,
+                                    phys_addr_t paddr,
+                                    int direction)
+{
+       u64 *pte, __pte;
+
+       WARN_ON(address > dom->aperture_size);
+
+       paddr &= PAGE_MASK;
+
+       pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
+       pte += IOMMU_PTE_L0_INDEX(address);
+
+       __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
+
+       if (direction == DMA_TO_DEVICE)
+               __pte |= IOMMU_PTE_IR;
+       else if (direction == DMA_FROM_DEVICE)
+               __pte |= IOMMU_PTE_IW;
+       else if (direction == DMA_BIDIRECTIONAL)
+               __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
+
+       WARN_ON(*pte);
+
+       *pte = __pte;
+
+       return (dma_addr_t)address;
+}
+
+static void dma_ops_domain_unmap(struct amd_iommu *iommu,
+                                struct dma_ops_domain *dom,
+                                unsigned long address)
+{
+       u64 *pte;
+
+       if (address >= dom->aperture_size)
+               return;
+
+       WARN_ON(address & 0xfffULL || address > dom->aperture_size);
+
+       pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
+       pte += IOMMU_PTE_L0_INDEX(address);
+
+       WARN_ON(!*pte);
+
+       *pte = 0ULL;
+}
+
+static dma_addr_t __map_single(struct device *dev,
+                              struct amd_iommu *iommu,
+                              struct dma_ops_domain *dma_dom,
+                              phys_addr_t paddr,
+                              size_t size,
+                              int dir)
+{
+       dma_addr_t offset = paddr & ~PAGE_MASK;
+       dma_addr_t address, start;
+       unsigned int pages;
+       int i;
+
+       pages = to_pages(paddr, size);
+       paddr &= PAGE_MASK;
+
+       address = dma_ops_alloc_addresses(dev, dma_dom, pages);
+       if (unlikely(address == bad_dma_address))
+               goto out;
+
+       start = address;
+       for (i = 0; i < pages; ++i) {
+               dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+               paddr += PAGE_SIZE;
+               start += PAGE_SIZE;
+       }
+       address += offset;
+
+out:
+       return address;
+}
+
+static void __unmap_single(struct amd_iommu *iommu,
+                          struct dma_ops_domain *dma_dom,
+                          dma_addr_t dma_addr,
+                          size_t size,
+                          int dir)
+{
+       dma_addr_t i, start;
+       unsigned int pages;
+
+       if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
+               return;
+
+       pages = to_pages(dma_addr, size);
+       dma_addr &= PAGE_MASK;
+       start = dma_addr;
+
+       for (i = 0; i < pages; ++i) {
+               dma_ops_domain_unmap(iommu, dma_dom, start);
+               start += PAGE_SIZE;
+       }
+
+       dma_ops_free_addresses(dma_dom, dma_addr, pages);
+}
+
+static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
+                            size_t size, int dir)
+{
+       unsigned long flags;
+       struct amd_iommu *iommu;
+       struct protection_domain *domain;
+       u16 devid;
+       dma_addr_t addr;
+
+       get_device_resources(dev, &iommu, &domain, &devid);
+
+       if (iommu == NULL || domain == NULL)
+               return (dma_addr_t)paddr;
+
+       spin_lock_irqsave(&domain->lock, flags);
+       addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
+       if (addr == bad_dma_address)
+               goto out;
+
+       if (iommu_has_npcache(iommu))
+               iommu_flush_pages(iommu, domain->id, addr, size);
+
+       if (iommu->need_sync)
+               iommu_completion_wait(iommu);
+
+out:
+       spin_unlock_irqrestore(&domain->lock, flags);
+
+       return addr;
+}
+
+static void unmap_single(struct device *dev, dma_addr_t dma_addr,
+                        size_t size, int dir)
+{
+       unsigned long flags;
+       struct amd_iommu *iommu;
+       struct protection_domain *domain;
+       u16 devid;
+
+       if (!get_device_resources(dev, &iommu, &domain, &devid))
+               return;
+
+       spin_lock_irqsave(&domain->lock, flags);
+
+       __unmap_single(iommu, domain->priv, dma_addr, size, dir);
+
+       iommu_flush_pages(iommu, domain->id, dma_addr, size);
+
+       if (iommu->need_sync)
+               iommu_completion_wait(iommu);
+
+       spin_unlock_irqrestore(&domain->lock, flags);
+}
+
+static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
+                          int nelems, int dir)
+{
+       struct scatterlist *s;
+       int i;
+
+       for_each_sg(sglist, s, nelems, i) {
+               s->dma_address = (dma_addr_t)sg_phys(s);
+               s->dma_length  = s->length;
+       }
+
+       return nelems;
+}
+
+static int map_sg(struct device *dev, struct scatterlist *sglist,
+                 int nelems, int dir)
+{
+       unsigned long flags;
+       struct amd_iommu *iommu;
+       struct protection_domain *domain;
+       u16 devid;
+       int i;
+       struct scatterlist *s;
+       phys_addr_t paddr;
+       int mapped_elems = 0;
+
+       get_device_resources(dev, &iommu, &domain, &devid);
+
+       if (!iommu || !domain)
+               return map_sg_no_iommu(dev, sglist, nelems, dir);
+
+       spin_lock_irqsave(&domain->lock, flags);
+
+       for_each_sg(sglist, s, nelems, i) {
+               paddr = sg_phys(s);
+
+               s->dma_address = __map_single(dev, iommu, domain->priv,
+                                             paddr, s->length, dir);
+
+               if (s->dma_address) {
+                       s->dma_length = s->length;
+                       mapped_elems++;
+               } else
+                       goto unmap;
+               if (iommu_has_npcache(iommu))
+                       iommu_flush_pages(iommu, domain->id, s->dma_address,
+                                         s->dma_length);
+       }
+
+       if (iommu->need_sync)
+               iommu_completion_wait(iommu);
+
+out:
+       spin_unlock_irqrestore(&domain->lock, flags);
+
+       return mapped_elems;
+unmap:
+       for_each_sg(sglist, s, mapped_elems, i) {
+               if (s->dma_address)
+                       __unmap_single(iommu, domain->priv, s->dma_address,
+                                      s->dma_length, dir);
+               s->dma_address = s->dma_length = 0;
+       }
+
+       mapped_elems = 0;
+
+       goto out;
+}
+
+static void unmap_sg(struct device *dev, struct scatterlist *sglist,
+                    int nelems, int dir)
+{
+       unsigned long flags;
+       struct amd_iommu *iommu;
+       struct protection_domain *domain;
+       struct scatterlist *s;
+       u16 devid;
+       int i;
+
+       if (!get_device_resources(dev, &iommu, &domain, &devid))
+               return;
+
+       spin_lock_irqsave(&domain->lock, flags);
+
+       for_each_sg(sglist, s, nelems, i) {
+               __unmap_single(iommu, domain->priv, s->dma_address,
+                              s->dma_length, dir);
+               iommu_flush_pages(iommu, domain->id, s->dma_address,
+                                 s->dma_length);
+               s->dma_address = s->dma_length = 0;
+       }
+
+       if (iommu->need_sync)
+               iommu_completion_wait(iommu);
+
+       spin_unlock_irqrestore(&domain->lock, flags);
+}
+
+static void *alloc_coherent(struct device *dev, size_t size,
+                           dma_addr_t *dma_addr, gfp_t flag)
+{
+       unsigned long flags;
+       void *virt_addr;
+       struct amd_iommu *iommu;
+       struct protection_domain *domain;
+       u16 devid;
+       phys_addr_t paddr;
+
+       virt_addr = (void *)__get_free_pages(flag, get_order(size));
+       if (!virt_addr)
+               return 0;
+
+       memset(virt_addr, 0, size);
+       paddr = virt_to_phys(virt_addr);
+
+       get_device_resources(dev, &iommu, &domain, &devid);
+
+       if (!iommu || !domain) {
+               *dma_addr = (dma_addr_t)paddr;
+               return virt_addr;
+       }
+
+       spin_lock_irqsave(&domain->lock, flags);
+
+       *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
+                                size, DMA_BIDIRECTIONAL);
+
+       if (*dma_addr == bad_dma_address) {
+               free_pages((unsigned long)virt_addr, get_order(size));
+               virt_addr = NULL;
+               goto out;
+       }
+
+       if (iommu_has_npcache(iommu))
+               iommu_flush_pages(iommu, domain->id, *dma_addr, size);
+
+       if (iommu->need_sync)
+               iommu_completion_wait(iommu);
+
+out:
+       spin_unlock_irqrestore(&domain->lock, flags);
+
+       return virt_addr;
+}
+
+static void free_coherent(struct device *dev, size_t size,
+                         void *virt_addr, dma_addr_t dma_addr)
+{
+       unsigned long flags;
+       struct amd_iommu *iommu;
+       struct protection_domain *domain;
+       u16 devid;
+
+       get_device_resources(dev, &iommu, &domain, &devid);
+
+       if (!iommu || !domain)
+               goto free_mem;
+
+       spin_lock_irqsave(&domain->lock, flags);
+
+       __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
+       iommu_flush_pages(iommu, domain->id, dma_addr, size);
+
+       if (iommu->need_sync)
+               iommu_completion_wait(iommu);
+
+       spin_unlock_irqrestore(&domain->lock, flags);
+
+free_mem:
+       free_pages((unsigned long)virt_addr, get_order(size));
+}
+
+/*
+ * If the driver core informs the DMA layer if a driver grabs a device
+ * we don't need to preallocate the protection domains anymore.
+ * For now we have to.
+ */
+void prealloc_protection_domains(void)
+{
+       struct pci_dev *dev = NULL;
+       struct dma_ops_domain *dma_dom;
+       struct amd_iommu *iommu;
+       int order = amd_iommu_aperture_order;
+       u16 devid;
+
+       while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+               devid = (dev->bus->number << 8) | dev->devfn;
+               if (devid >= amd_iommu_last_bdf)
+                       continue;
+               devid = amd_iommu_alias_table[devid];
+               if (domain_for_device(devid))
+                       continue;
+               iommu = amd_iommu_rlookup_table[devid];
+               if (!iommu)
+                       continue;
+               dma_dom = dma_ops_domain_alloc(iommu, order);
+               if (!dma_dom)
+                       continue;
+               init_unity_mappings_for_device(dma_dom, devid);
+               set_device_domain(iommu, &dma_dom->domain, devid);
+               printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ",
+                      dma_dom->domain.id);
+               print_devid(devid, 1);
+       }
+}
+
+static struct dma_mapping_ops amd_iommu_dma_ops = {
+       .alloc_coherent = alloc_coherent,
+       .free_coherent = free_coherent,
+       .map_single = map_single,
+       .unmap_single = unmap_single,
+       .map_sg = map_sg,
+       .unmap_sg = unmap_sg,
+};
+
+int __init amd_iommu_init_dma_ops(void)
+{
+       struct amd_iommu *iommu;
+       int order = amd_iommu_aperture_order;
+       int ret;
+
+       list_for_each_entry(iommu, &amd_iommu_list, list) {
+               iommu->default_dom = dma_ops_domain_alloc(iommu, order);
+               if (iommu->default_dom == NULL)
+                       return -ENOMEM;
+               ret = iommu_init_unity_mappings(iommu);
+               if (ret)
+                       goto free_domains;
+       }
+
+       if (amd_iommu_isolate)
+               prealloc_protection_domains();
+
+       iommu_detected = 1;
+       force_iommu = 1;
+       bad_dma_address = 0;
+#ifdef CONFIG_GART_IOMMU
+       gart_iommu_aperture_disabled = 1;
+       gart_iommu_aperture = 0;
+#endif
+
+       dma_ops = &amd_iommu_dma_ops;
+
+       return 0;
+
+free_domains:
+
+       list_for_each_entry(iommu, &amd_iommu_list, list) {
+               if (iommu->default_dom)
+                       dma_ops_domain_free(iommu->default_dom);
+       }
+
+       return ret;
+}