2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
42 #define ROOT_SIZE VTD_PAGE_SIZE
43 #define CONTEXT_SIZE VTD_PAGE_SIZE
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48 #define IOAPIC_RANGE_START (0xfee00000)
49 #define IOAPIC_RANGE_END (0xfeefffff)
50 #define IOVA_START_ADDR (0x1000)
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
63 * 12-63: Context Ptr (12 - (haw-1))
70 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
71 static inline bool root_present(struct root_entry
*root
)
73 return (root
->val
& 1);
75 static inline void set_root_present(struct root_entry
*root
)
79 static inline void set_root_value(struct root_entry
*root
, unsigned long value
)
81 root
->val
|= value
& VTD_PAGE_MASK
;
84 static inline struct context_entry
*
85 get_context_addr_from_root(struct root_entry
*root
)
87 return (struct context_entry
*)
88 (root_present(root
)?phys_to_virt(
89 root
->val
& VTD_PAGE_MASK
) :
96 * 1: fault processing disable
97 * 2-3: translation type
98 * 12-63: address space root
104 struct context_entry
{
108 #define context_present(c) ((c).lo & 1)
109 #define context_fault_disable(c) (((c).lo >> 1) & 1)
110 #define context_translation_type(c) (((c).lo >> 2) & 3)
111 #define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
112 #define context_address_width(c) ((c).hi & 7)
113 #define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
115 #define context_set_present(c) do {(c).lo |= 1;} while (0)
116 #define context_set_fault_enable(c) \
117 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
118 #define context_set_translation_type(c, val) \
120 (c).lo &= (((u64)-1) << 4) | 3; \
121 (c).lo |= ((val) & 3) << 2; \
123 #define CONTEXT_TT_MULTI_LEVEL 0
124 #define context_set_address_root(c, val) \
125 do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
126 #define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
127 #define context_set_domain_id(c, val) \
128 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
129 #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
137 * 12-63: Host physcial address
142 #define dma_clear_pte(p) do {(p).val = 0;} while (0)
144 #define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
145 #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
146 #define dma_set_pte_prot(p, prot) \
147 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
148 #define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
149 #define dma_set_pte_addr(p, addr) do {\
150 (p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
151 #define dma_pte_present(p) (((p).val & 3) != 0)
154 int id
; /* domain id */
155 struct intel_iommu
*iommu
; /* back pointer to owning iommu */
157 struct list_head devices
; /* all devices' list */
158 struct iova_domain iovad
; /* iova's that belong to this domain */
160 struct dma_pte
*pgd
; /* virtual address */
161 spinlock_t mapping_lock
; /* page table lock */
162 int gaw
; /* max guest address width */
164 /* adjusted guest address width, 0 is level 2 30-bit */
167 #define DOMAIN_FLAG_MULTIPLE_DEVICES 1
171 /* PCI domain-device relationship */
172 struct device_domain_info
{
173 struct list_head link
; /* link to domain siblings */
174 struct list_head global
; /* link to global list */
175 u8 bus
; /* PCI bus numer */
176 u8 devfn
; /* PCI devfn number */
177 struct pci_dev
*dev
; /* it's NULL for PCIE-to-PCI bridge */
178 struct dmar_domain
*domain
; /* pointer to domain */
181 static void flush_unmaps_timeout(unsigned long data
);
183 DEFINE_TIMER(unmap_timer
, flush_unmaps_timeout
, 0, 0);
185 #define HIGH_WATER_MARK 250
186 struct deferred_flush_tables
{
188 struct iova
*iova
[HIGH_WATER_MARK
];
189 struct dmar_domain
*domain
[HIGH_WATER_MARK
];
192 static struct deferred_flush_tables
*deferred_flush
;
194 /* bitmap for indexing intel_iommus */
195 static int g_num_of_iommus
;
197 static DEFINE_SPINLOCK(async_umap_flush_lock
);
198 static LIST_HEAD(unmaps_to_do
);
201 static long list_size
;
203 static void domain_remove_dev_info(struct dmar_domain
*domain
);
206 static int __initdata dmar_map_gfx
= 1;
207 static int dmar_forcedac
;
208 static int intel_iommu_strict
;
210 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
211 static DEFINE_SPINLOCK(device_domain_lock
);
212 static LIST_HEAD(device_domain_list
);
214 static int __init
intel_iommu_setup(char *str
)
219 if (!strncmp(str
, "off", 3)) {
221 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
222 } else if (!strncmp(str
, "igfx_off", 8)) {
225 "Intel-IOMMU: disable GFX device mapping\n");
226 } else if (!strncmp(str
, "forcedac", 8)) {
228 "Intel-IOMMU: Forcing DAC for PCI devices\n");
230 } else if (!strncmp(str
, "strict", 6)) {
232 "Intel-IOMMU: disable batched IOTLB flush\n");
233 intel_iommu_strict
= 1;
236 str
+= strcspn(str
, ",");
242 __setup("intel_iommu=", intel_iommu_setup
);
244 static struct kmem_cache
*iommu_domain_cache
;
245 static struct kmem_cache
*iommu_devinfo_cache
;
246 static struct kmem_cache
*iommu_iova_cache
;
248 static inline void *iommu_kmem_cache_alloc(struct kmem_cache
*cachep
)
253 /* trying to avoid low memory issues */
254 flags
= current
->flags
& PF_MEMALLOC
;
255 current
->flags
|= PF_MEMALLOC
;
256 vaddr
= kmem_cache_alloc(cachep
, GFP_ATOMIC
);
257 current
->flags
&= (~PF_MEMALLOC
| flags
);
262 static inline void *alloc_pgtable_page(void)
267 /* trying to avoid low memory issues */
268 flags
= current
->flags
& PF_MEMALLOC
;
269 current
->flags
|= PF_MEMALLOC
;
270 vaddr
= (void *)get_zeroed_page(GFP_ATOMIC
);
271 current
->flags
&= (~PF_MEMALLOC
| flags
);
275 static inline void free_pgtable_page(void *vaddr
)
277 free_page((unsigned long)vaddr
);
280 static inline void *alloc_domain_mem(void)
282 return iommu_kmem_cache_alloc(iommu_domain_cache
);
285 static void free_domain_mem(void *vaddr
)
287 kmem_cache_free(iommu_domain_cache
, vaddr
);
290 static inline void * alloc_devinfo_mem(void)
292 return iommu_kmem_cache_alloc(iommu_devinfo_cache
);
295 static inline void free_devinfo_mem(void *vaddr
)
297 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
300 struct iova
*alloc_iova_mem(void)
302 return iommu_kmem_cache_alloc(iommu_iova_cache
);
305 void free_iova_mem(struct iova
*iova
)
307 kmem_cache_free(iommu_iova_cache
, iova
);
310 /* Gets context entry for a given bus and devfn */
311 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
314 struct root_entry
*root
;
315 struct context_entry
*context
;
316 unsigned long phy_addr
;
319 spin_lock_irqsave(&iommu
->lock
, flags
);
320 root
= &iommu
->root_entry
[bus
];
321 context
= get_context_addr_from_root(root
);
323 context
= (struct context_entry
*)alloc_pgtable_page();
325 spin_unlock_irqrestore(&iommu
->lock
, flags
);
328 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
329 phy_addr
= virt_to_phys((void *)context
);
330 set_root_value(root
, phy_addr
);
331 set_root_present(root
);
332 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
334 spin_unlock_irqrestore(&iommu
->lock
, flags
);
335 return &context
[devfn
];
338 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
340 struct root_entry
*root
;
341 struct context_entry
*context
;
345 spin_lock_irqsave(&iommu
->lock
, flags
);
346 root
= &iommu
->root_entry
[bus
];
347 context
= get_context_addr_from_root(root
);
352 ret
= context_present(context
[devfn
]);
354 spin_unlock_irqrestore(&iommu
->lock
, flags
);
358 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
360 struct root_entry
*root
;
361 struct context_entry
*context
;
364 spin_lock_irqsave(&iommu
->lock
, flags
);
365 root
= &iommu
->root_entry
[bus
];
366 context
= get_context_addr_from_root(root
);
368 context_clear_entry(context
[devfn
]);
369 __iommu_flush_cache(iommu
, &context
[devfn
], \
372 spin_unlock_irqrestore(&iommu
->lock
, flags
);
375 static void free_context_table(struct intel_iommu
*iommu
)
377 struct root_entry
*root
;
380 struct context_entry
*context
;
382 spin_lock_irqsave(&iommu
->lock
, flags
);
383 if (!iommu
->root_entry
) {
386 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
387 root
= &iommu
->root_entry
[i
];
388 context
= get_context_addr_from_root(root
);
390 free_pgtable_page(context
);
392 free_pgtable_page(iommu
->root_entry
);
393 iommu
->root_entry
= NULL
;
395 spin_unlock_irqrestore(&iommu
->lock
, flags
);
398 /* page table handling */
399 #define LEVEL_STRIDE (9)
400 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
402 static inline int agaw_to_level(int agaw
)
407 static inline int agaw_to_width(int agaw
)
409 return 30 + agaw
* LEVEL_STRIDE
;
413 static inline int width_to_agaw(int width
)
415 return (width
- 30) / LEVEL_STRIDE
;
418 static inline unsigned int level_to_offset_bits(int level
)
420 return (12 + (level
- 1) * LEVEL_STRIDE
);
423 static inline int address_level_offset(u64 addr
, int level
)
425 return ((addr
>> level_to_offset_bits(level
)) & LEVEL_MASK
);
428 static inline u64
level_mask(int level
)
430 return ((u64
)-1 << level_to_offset_bits(level
));
433 static inline u64
level_size(int level
)
435 return ((u64
)1 << level_to_offset_bits(level
));
438 static inline u64
align_to_level(u64 addr
, int level
)
440 return ((addr
+ level_size(level
) - 1) & level_mask(level
));
443 static struct dma_pte
* addr_to_dma_pte(struct dmar_domain
*domain
, u64 addr
)
445 int addr_width
= agaw_to_width(domain
->agaw
);
446 struct dma_pte
*parent
, *pte
= NULL
;
447 int level
= agaw_to_level(domain
->agaw
);
451 BUG_ON(!domain
->pgd
);
453 addr
&= (((u64
)1) << addr_width
) - 1;
454 parent
= domain
->pgd
;
456 spin_lock_irqsave(&domain
->mapping_lock
, flags
);
460 offset
= address_level_offset(addr
, level
);
461 pte
= &parent
[offset
];
465 if (!dma_pte_present(*pte
)) {
466 tmp_page
= alloc_pgtable_page();
469 spin_unlock_irqrestore(&domain
->mapping_lock
,
473 __iommu_flush_cache(domain
->iommu
, tmp_page
,
475 dma_set_pte_addr(*pte
, virt_to_phys(tmp_page
));
477 * high level table always sets r/w, last level page
478 * table control read/write
480 dma_set_pte_readable(*pte
);
481 dma_set_pte_writable(*pte
);
482 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
484 parent
= phys_to_virt(dma_pte_addr(*pte
));
488 spin_unlock_irqrestore(&domain
->mapping_lock
, flags
);
492 /* return address's pte at specific level */
493 static struct dma_pte
*dma_addr_level_pte(struct dmar_domain
*domain
, u64 addr
,
496 struct dma_pte
*parent
, *pte
= NULL
;
497 int total
= agaw_to_level(domain
->agaw
);
500 parent
= domain
->pgd
;
501 while (level
<= total
) {
502 offset
= address_level_offset(addr
, total
);
503 pte
= &parent
[offset
];
507 if (!dma_pte_present(*pte
))
509 parent
= phys_to_virt(dma_pte_addr(*pte
));
515 /* clear one page's page table */
516 static void dma_pte_clear_one(struct dmar_domain
*domain
, u64 addr
)
518 struct dma_pte
*pte
= NULL
;
520 /* get last level pte */
521 pte
= dma_addr_level_pte(domain
, addr
, 1);
525 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
529 /* clear last level pte, a tlb flush should be followed */
530 static void dma_pte_clear_range(struct dmar_domain
*domain
, u64 start
, u64 end
)
532 int addr_width
= agaw_to_width(domain
->agaw
);
534 start
&= (((u64
)1) << addr_width
) - 1;
535 end
&= (((u64
)1) << addr_width
) - 1;
536 /* in case it's partial page */
537 start
= PAGE_ALIGN(start
);
540 /* we don't need lock here, nobody else touches the iova range */
541 while (start
< end
) {
542 dma_pte_clear_one(domain
, start
);
543 start
+= VTD_PAGE_SIZE
;
547 /* free page table pages. last level pte should already be cleared */
548 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
551 int addr_width
= agaw_to_width(domain
->agaw
);
553 int total
= agaw_to_level(domain
->agaw
);
557 start
&= (((u64
)1) << addr_width
) - 1;
558 end
&= (((u64
)1) << addr_width
) - 1;
560 /* we don't need lock here, nobody else touches the iova range */
562 while (level
<= total
) {
563 tmp
= align_to_level(start
, level
);
564 if (tmp
>= end
|| (tmp
+ level_size(level
) > end
))
568 pte
= dma_addr_level_pte(domain
, tmp
, level
);
571 phys_to_virt(dma_pte_addr(*pte
)));
573 __iommu_flush_cache(domain
->iommu
,
576 tmp
+= level_size(level
);
581 if (start
== 0 && end
>= ((((u64
)1) << addr_width
) - 1)) {
582 free_pgtable_page(domain
->pgd
);
588 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
590 struct root_entry
*root
;
593 root
= (struct root_entry
*)alloc_pgtable_page();
597 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
599 spin_lock_irqsave(&iommu
->lock
, flags
);
600 iommu
->root_entry
= root
;
601 spin_unlock_irqrestore(&iommu
->lock
, flags
);
606 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
612 addr
= iommu
->root_entry
;
614 spin_lock_irqsave(&iommu
->register_lock
, flag
);
615 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
617 cmd
= iommu
->gcmd
| DMA_GCMD_SRTP
;
618 writel(cmd
, iommu
->reg
+ DMAR_GCMD_REG
);
620 /* Make sure hardware complete it */
621 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
622 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
624 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
627 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
632 if (!cap_rwbf(iommu
->cap
))
634 val
= iommu
->gcmd
| DMA_GCMD_WBF
;
636 spin_lock_irqsave(&iommu
->register_lock
, flag
);
637 writel(val
, iommu
->reg
+ DMAR_GCMD_REG
);
639 /* Make sure hardware complete it */
640 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
641 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
643 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
646 /* return value determine if we need a write buffer flush */
647 static int __iommu_flush_context(struct intel_iommu
*iommu
,
648 u16 did
, u16 source_id
, u8 function_mask
, u64 type
,
649 int non_present_entry_flush
)
655 * In the non-present entry flush case, if hardware doesn't cache
656 * non-present entry we do nothing and if hardware cache non-present
657 * entry, we flush entries of domain 0 (the domain id is used to cache
658 * any non-present entries)
660 if (non_present_entry_flush
) {
661 if (!cap_caching_mode(iommu
->cap
))
668 case DMA_CCMD_GLOBAL_INVL
:
669 val
= DMA_CCMD_GLOBAL_INVL
;
671 case DMA_CCMD_DOMAIN_INVL
:
672 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
674 case DMA_CCMD_DEVICE_INVL
:
675 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
676 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
683 spin_lock_irqsave(&iommu
->register_lock
, flag
);
684 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
686 /* Make sure hardware complete it */
687 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
688 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
690 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
692 /* flush context entry will implicitly flush write buffer */
696 /* return value determine if we need a write buffer flush */
697 static int __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
698 u64 addr
, unsigned int size_order
, u64 type
,
699 int non_present_entry_flush
)
701 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
702 u64 val
= 0, val_iva
= 0;
706 * In the non-present entry flush case, if hardware doesn't cache
707 * non-present entry we do nothing and if hardware cache non-present
708 * entry, we flush entries of domain 0 (the domain id is used to cache
709 * any non-present entries)
711 if (non_present_entry_flush
) {
712 if (!cap_caching_mode(iommu
->cap
))
719 case DMA_TLB_GLOBAL_FLUSH
:
720 /* global flush doesn't need set IVA_REG */
721 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
723 case DMA_TLB_DSI_FLUSH
:
724 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
726 case DMA_TLB_PSI_FLUSH
:
727 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
728 /* Note: always flush non-leaf currently */
729 val_iva
= size_order
| addr
;
734 /* Note: set drain read/write */
737 * This is probably to be super secure.. Looks like we can
738 * ignore it without any impact.
740 if (cap_read_drain(iommu
->cap
))
741 val
|= DMA_TLB_READ_DRAIN
;
743 if (cap_write_drain(iommu
->cap
))
744 val
|= DMA_TLB_WRITE_DRAIN
;
746 spin_lock_irqsave(&iommu
->register_lock
, flag
);
747 /* Note: Only uses first TLB reg currently */
749 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
750 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
752 /* Make sure hardware complete it */
753 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
754 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
756 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
758 /* check IOTLB invalidation granularity */
759 if (DMA_TLB_IAIG(val
) == 0)
760 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
761 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
762 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
763 (unsigned long long)DMA_TLB_IIRG(type
),
764 (unsigned long long)DMA_TLB_IAIG(val
));
765 /* flush iotlb entry will implicitly flush write buffer */
769 static int iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
770 u64 addr
, unsigned int pages
, int non_present_entry_flush
)
774 BUG_ON(addr
& (~VTD_PAGE_MASK
));
777 /* Fallback to domain selective flush if no PSI support */
778 if (!cap_pgsel_inv(iommu
->cap
))
779 return iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
781 non_present_entry_flush
);
784 * PSI requires page size to be 2 ^ x, and the base address is naturally
785 * aligned to the size
787 mask
= ilog2(__roundup_pow_of_two(pages
));
788 /* Fallback to domain selective flush if size is too big */
789 if (mask
> cap_max_amask_val(iommu
->cap
))
790 return iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
791 DMA_TLB_DSI_FLUSH
, non_present_entry_flush
);
793 return iommu
->flush
.flush_iotlb(iommu
, did
, addr
, mask
,
795 non_present_entry_flush
);
798 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
803 spin_lock_irqsave(&iommu
->register_lock
, flags
);
804 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
805 pmen
&= ~DMA_PMEN_EPM
;
806 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
808 /* wait for the protected region status bit to clear */
809 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
810 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
812 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
815 static int iommu_enable_translation(struct intel_iommu
*iommu
)
820 spin_lock_irqsave(&iommu
->register_lock
, flags
);
821 writel(iommu
->gcmd
|DMA_GCMD_TE
, iommu
->reg
+ DMAR_GCMD_REG
);
823 /* Make sure hardware complete it */
824 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
825 readl
, (sts
& DMA_GSTS_TES
), sts
);
827 iommu
->gcmd
|= DMA_GCMD_TE
;
828 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
832 static int iommu_disable_translation(struct intel_iommu
*iommu
)
837 spin_lock_irqsave(&iommu
->register_lock
, flag
);
838 iommu
->gcmd
&= ~DMA_GCMD_TE
;
839 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
841 /* Make sure hardware complete it */
842 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
843 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
845 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
849 /* iommu interrupt handling. Most stuff are MSI-like. */
851 static const char *fault_reason_strings
[] =
854 "Present bit in root entry is clear",
855 "Present bit in context entry is clear",
856 "Invalid context entry",
857 "Access beyond MGAW",
858 "PTE Write access is not set",
859 "PTE Read access is not set",
860 "Next page table ptr is invalid",
861 "Root table address invalid",
862 "Context table ptr is invalid",
863 "non-zero reserved fields in RTP",
864 "non-zero reserved fields in CTP",
865 "non-zero reserved fields in PTE",
867 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
869 const char *dmar_get_fault_reason(u8 fault_reason
)
871 if (fault_reason
> MAX_FAULT_REASON_IDX
)
874 return fault_reason_strings
[fault_reason
];
877 void dmar_msi_unmask(unsigned int irq
)
879 struct intel_iommu
*iommu
= get_irq_data(irq
);
883 spin_lock_irqsave(&iommu
->register_lock
, flag
);
884 writel(0, iommu
->reg
+ DMAR_FECTL_REG
);
885 /* Read a reg to force flush the post write */
886 readl(iommu
->reg
+ DMAR_FECTL_REG
);
887 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
890 void dmar_msi_mask(unsigned int irq
)
893 struct intel_iommu
*iommu
= get_irq_data(irq
);
896 spin_lock_irqsave(&iommu
->register_lock
, flag
);
897 writel(DMA_FECTL_IM
, iommu
->reg
+ DMAR_FECTL_REG
);
898 /* Read a reg to force flush the post write */
899 readl(iommu
->reg
+ DMAR_FECTL_REG
);
900 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
903 void dmar_msi_write(int irq
, struct msi_msg
*msg
)
905 struct intel_iommu
*iommu
= get_irq_data(irq
);
908 spin_lock_irqsave(&iommu
->register_lock
, flag
);
909 writel(msg
->data
, iommu
->reg
+ DMAR_FEDATA_REG
);
910 writel(msg
->address_lo
, iommu
->reg
+ DMAR_FEADDR_REG
);
911 writel(msg
->address_hi
, iommu
->reg
+ DMAR_FEUADDR_REG
);
912 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
915 void dmar_msi_read(int irq
, struct msi_msg
*msg
)
917 struct intel_iommu
*iommu
= get_irq_data(irq
);
920 spin_lock_irqsave(&iommu
->register_lock
, flag
);
921 msg
->data
= readl(iommu
->reg
+ DMAR_FEDATA_REG
);
922 msg
->address_lo
= readl(iommu
->reg
+ DMAR_FEADDR_REG
);
923 msg
->address_hi
= readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
924 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
927 static int iommu_page_fault_do_one(struct intel_iommu
*iommu
, int type
,
928 u8 fault_reason
, u16 source_id
, unsigned long long addr
)
932 reason
= dmar_get_fault_reason(fault_reason
);
935 "DMAR:[%s] Request device [%02x:%02x.%d] "
937 "DMAR:[fault reason %02d] %s\n",
938 (type
? "DMA Read" : "DMA Write"),
939 (source_id
>> 8), PCI_SLOT(source_id
& 0xFF),
940 PCI_FUNC(source_id
& 0xFF), addr
, fault_reason
, reason
);
944 #define PRIMARY_FAULT_REG_LEN (16)
945 static irqreturn_t
iommu_page_fault(int irq
, void *dev_id
)
947 struct intel_iommu
*iommu
= dev_id
;
948 int reg
, fault_index
;
952 spin_lock_irqsave(&iommu
->register_lock
, flag
);
953 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
955 /* TBD: ignore advanced fault log currently */
956 if (!(fault_status
& DMA_FSTS_PPF
))
959 fault_index
= dma_fsts_fault_record_index(fault_status
);
960 reg
= cap_fault_reg_offset(iommu
->cap
);
968 /* highest 32 bits */
969 data
= readl(iommu
->reg
+ reg
+
970 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
971 if (!(data
& DMA_FRCD_F
))
974 fault_reason
= dma_frcd_fault_reason(data
);
975 type
= dma_frcd_type(data
);
977 data
= readl(iommu
->reg
+ reg
+
978 fault_index
* PRIMARY_FAULT_REG_LEN
+ 8);
979 source_id
= dma_frcd_source_id(data
);
981 guest_addr
= dmar_readq(iommu
->reg
+ reg
+
982 fault_index
* PRIMARY_FAULT_REG_LEN
);
983 guest_addr
= dma_frcd_page_addr(guest_addr
);
984 /* clear the fault */
985 writel(DMA_FRCD_F
, iommu
->reg
+ reg
+
986 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
988 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
990 iommu_page_fault_do_one(iommu
, type
, fault_reason
,
991 source_id
, guest_addr
);
994 if (fault_index
> cap_num_fault_regs(iommu
->cap
))
996 spin_lock_irqsave(&iommu
->register_lock
, flag
);
999 /* clear primary fault overflow */
1000 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
1001 if (fault_status
& DMA_FSTS_PFO
)
1002 writel(DMA_FSTS_PFO
, iommu
->reg
+ DMAR_FSTS_REG
);
1004 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1008 int dmar_set_interrupt(struct intel_iommu
*iommu
)
1014 printk(KERN_ERR
"IOMMU: no free vectors\n");
1018 set_irq_data(irq
, iommu
);
1021 ret
= arch_setup_dmar_msi(irq
);
1023 set_irq_data(irq
, NULL
);
1029 /* Force fault register is cleared */
1030 iommu_page_fault(irq
, iommu
);
1032 ret
= request_irq(irq
, iommu_page_fault
, 0, iommu
->name
, iommu
);
1034 printk(KERN_ERR
"IOMMU: can't request irq\n");
1038 static int iommu_init_domains(struct intel_iommu
*iommu
)
1040 unsigned long ndomains
;
1041 unsigned long nlongs
;
1043 ndomains
= cap_ndoms(iommu
->cap
);
1044 pr_debug("Number of Domains supportd <%ld>\n", ndomains
);
1045 nlongs
= BITS_TO_LONGS(ndomains
);
1047 /* TBD: there might be 64K domains,
1048 * consider other allocation for future chip
1050 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1051 if (!iommu
->domain_ids
) {
1052 printk(KERN_ERR
"Allocating domain id array failed\n");
1055 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
1057 if (!iommu
->domains
) {
1058 printk(KERN_ERR
"Allocating domain array failed\n");
1059 kfree(iommu
->domain_ids
);
1063 spin_lock_init(&iommu
->lock
);
1066 * if Caching mode is set, then invalid translations are tagged
1067 * with domainid 0. Hence we need to pre-allocate it.
1069 if (cap_caching_mode(iommu
->cap
))
1070 set_bit(0, iommu
->domain_ids
);
1075 static void domain_exit(struct dmar_domain
*domain
);
1077 void free_dmar_iommu(struct intel_iommu
*iommu
)
1079 struct dmar_domain
*domain
;
1082 i
= find_first_bit(iommu
->domain_ids
, cap_ndoms(iommu
->cap
));
1083 for (; i
< cap_ndoms(iommu
->cap
); ) {
1084 domain
= iommu
->domains
[i
];
1085 clear_bit(i
, iommu
->domain_ids
);
1086 domain_exit(domain
);
1087 i
= find_next_bit(iommu
->domain_ids
,
1088 cap_ndoms(iommu
->cap
), i
+1);
1091 if (iommu
->gcmd
& DMA_GCMD_TE
)
1092 iommu_disable_translation(iommu
);
1095 set_irq_data(iommu
->irq
, NULL
);
1096 /* This will mask the irq */
1097 free_irq(iommu
->irq
, iommu
);
1098 destroy_irq(iommu
->irq
);
1101 kfree(iommu
->domains
);
1102 kfree(iommu
->domain_ids
);
1104 /* free context mapping */
1105 free_context_table(iommu
);
1108 static struct dmar_domain
* iommu_alloc_domain(struct intel_iommu
*iommu
)
1111 unsigned long ndomains
;
1112 struct dmar_domain
*domain
;
1113 unsigned long flags
;
1115 domain
= alloc_domain_mem();
1119 ndomains
= cap_ndoms(iommu
->cap
);
1121 spin_lock_irqsave(&iommu
->lock
, flags
);
1122 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1123 if (num
>= ndomains
) {
1124 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1125 free_domain_mem(domain
);
1126 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1130 set_bit(num
, iommu
->domain_ids
);
1132 domain
->iommu
= iommu
;
1133 iommu
->domains
[num
] = domain
;
1134 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1139 static void iommu_free_domain(struct dmar_domain
*domain
)
1141 unsigned long flags
;
1143 spin_lock_irqsave(&domain
->iommu
->lock
, flags
);
1144 clear_bit(domain
->id
, domain
->iommu
->domain_ids
);
1145 spin_unlock_irqrestore(&domain
->iommu
->lock
, flags
);
1148 static struct iova_domain reserved_iova_list
;
1149 static struct lock_class_key reserved_alloc_key
;
1150 static struct lock_class_key reserved_rbtree_key
;
1152 static void dmar_init_reserved_ranges(void)
1154 struct pci_dev
*pdev
= NULL
;
1159 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1161 lockdep_set_class(&reserved_iova_list
.iova_alloc_lock
,
1162 &reserved_alloc_key
);
1163 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1164 &reserved_rbtree_key
);
1166 /* IOAPIC ranges shouldn't be accessed by DMA */
1167 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1168 IOVA_PFN(IOAPIC_RANGE_END
));
1170 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1172 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1173 for_each_pci_dev(pdev
) {
1176 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1177 r
= &pdev
->resource
[i
];
1178 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1182 size
= r
->end
- addr
;
1183 size
= PAGE_ALIGN(size
);
1184 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(addr
),
1185 IOVA_PFN(size
+ addr
) - 1);
1187 printk(KERN_ERR
"Reserve iova failed\n");
1193 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1195 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1198 static inline int guestwidth_to_adjustwidth(int gaw
)
1201 int r
= (gaw
- 12) % 9;
1212 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1214 struct intel_iommu
*iommu
;
1215 int adjust_width
, agaw
;
1216 unsigned long sagaw
;
1218 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1219 spin_lock_init(&domain
->mapping_lock
);
1221 domain_reserve_special_ranges(domain
);
1223 /* calculate AGAW */
1224 iommu
= domain
->iommu
;
1225 if (guest_width
> cap_mgaw(iommu
->cap
))
1226 guest_width
= cap_mgaw(iommu
->cap
);
1227 domain
->gaw
= guest_width
;
1228 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1229 agaw
= width_to_agaw(adjust_width
);
1230 sagaw
= cap_sagaw(iommu
->cap
);
1231 if (!test_bit(agaw
, &sagaw
)) {
1232 /* hardware doesn't support it, choose a bigger one */
1233 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1234 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1238 domain
->agaw
= agaw
;
1239 INIT_LIST_HEAD(&domain
->devices
);
1241 /* always allocate the top pgd */
1242 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page();
1245 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1249 static void domain_exit(struct dmar_domain
*domain
)
1253 /* Domain 0 is reserved, so dont process it */
1257 domain_remove_dev_info(domain
);
1259 put_iova_domain(&domain
->iovad
);
1260 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
1261 end
= end
& (~PAGE_MASK
);
1264 dma_pte_clear_range(domain
, 0, end
);
1266 /* free page tables */
1267 dma_pte_free_pagetable(domain
, 0, end
);
1269 iommu_free_domain(domain
);
1270 free_domain_mem(domain
);
1273 static int domain_context_mapping_one(struct dmar_domain
*domain
,
1276 struct context_entry
*context
;
1277 struct intel_iommu
*iommu
= domain
->iommu
;
1278 unsigned long flags
;
1280 pr_debug("Set context mapping for %02x:%02x.%d\n",
1281 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1282 BUG_ON(!domain
->pgd
);
1283 context
= device_to_context_entry(iommu
, bus
, devfn
);
1286 spin_lock_irqsave(&iommu
->lock
, flags
);
1287 if (context_present(*context
)) {
1288 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1292 context_set_domain_id(*context
, domain
->id
);
1293 context_set_address_width(*context
, domain
->agaw
);
1294 context_set_address_root(*context
, virt_to_phys(domain
->pgd
));
1295 context_set_translation_type(*context
, CONTEXT_TT_MULTI_LEVEL
);
1296 context_set_fault_enable(*context
);
1297 context_set_present(*context
);
1298 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
1300 /* it's a non-present to present mapping */
1301 if (iommu
->flush
.flush_context(iommu
, domain
->id
,
1302 (((u16
)bus
) << 8) | devfn
, DMA_CCMD_MASK_NOBIT
,
1303 DMA_CCMD_DEVICE_INVL
, 1))
1304 iommu_flush_write_buffer(iommu
);
1306 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_DSI_FLUSH
, 0);
1308 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1313 domain_context_mapping(struct dmar_domain
*domain
, struct pci_dev
*pdev
)
1316 struct pci_dev
*tmp
, *parent
;
1318 ret
= domain_context_mapping_one(domain
, pdev
->bus
->number
,
1323 /* dependent device mapping */
1324 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1327 /* Secondary interface's bus number and devfn 0 */
1328 parent
= pdev
->bus
->self
;
1329 while (parent
!= tmp
) {
1330 ret
= domain_context_mapping_one(domain
, parent
->bus
->number
,
1334 parent
= parent
->bus
->self
;
1336 if (tmp
->is_pcie
) /* this is a PCIE-to-PCI bridge */
1337 return domain_context_mapping_one(domain
,
1338 tmp
->subordinate
->number
, 0);
1339 else /* this is a legacy PCI bridge */
1340 return domain_context_mapping_one(domain
,
1341 tmp
->bus
->number
, tmp
->devfn
);
1344 static int domain_context_mapped(struct dmar_domain
*domain
,
1345 struct pci_dev
*pdev
)
1348 struct pci_dev
*tmp
, *parent
;
1350 ret
= device_context_mapped(domain
->iommu
,
1351 pdev
->bus
->number
, pdev
->devfn
);
1354 /* dependent device mapping */
1355 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1358 /* Secondary interface's bus number and devfn 0 */
1359 parent
= pdev
->bus
->self
;
1360 while (parent
!= tmp
) {
1361 ret
= device_context_mapped(domain
->iommu
, parent
->bus
->number
,
1365 parent
= parent
->bus
->self
;
1368 return device_context_mapped(domain
->iommu
,
1369 tmp
->subordinate
->number
, 0);
1371 return device_context_mapped(domain
->iommu
,
1372 tmp
->bus
->number
, tmp
->devfn
);
1376 domain_page_mapping(struct dmar_domain
*domain
, dma_addr_t iova
,
1377 u64 hpa
, size_t size
, int prot
)
1379 u64 start_pfn
, end_pfn
;
1380 struct dma_pte
*pte
;
1382 int addr_width
= agaw_to_width(domain
->agaw
);
1384 hpa
&= (((u64
)1) << addr_width
) - 1;
1386 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1389 start_pfn
= ((u64
)hpa
) >> VTD_PAGE_SHIFT
;
1390 end_pfn
= (VTD_PAGE_ALIGN(((u64
)hpa
) + size
)) >> VTD_PAGE_SHIFT
;
1392 while (start_pfn
< end_pfn
) {
1393 pte
= addr_to_dma_pte(domain
, iova
+ VTD_PAGE_SIZE
* index
);
1396 /* We don't need lock here, nobody else
1397 * touches the iova range
1399 BUG_ON(dma_pte_addr(*pte
));
1400 dma_set_pte_addr(*pte
, start_pfn
<< VTD_PAGE_SHIFT
);
1401 dma_set_pte_prot(*pte
, prot
);
1402 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
1409 static void detach_domain_for_dev(struct dmar_domain
*domain
, u8 bus
, u8 devfn
)
1411 clear_context_table(domain
->iommu
, bus
, devfn
);
1412 domain
->iommu
->flush
.flush_context(domain
->iommu
, 0, 0, 0,
1413 DMA_CCMD_GLOBAL_INVL
, 0);
1414 domain
->iommu
->flush
.flush_iotlb(domain
->iommu
, 0, 0, 0,
1415 DMA_TLB_GLOBAL_FLUSH
, 0);
1418 static void domain_remove_dev_info(struct dmar_domain
*domain
)
1420 struct device_domain_info
*info
;
1421 unsigned long flags
;
1423 spin_lock_irqsave(&device_domain_lock
, flags
);
1424 while (!list_empty(&domain
->devices
)) {
1425 info
= list_entry(domain
->devices
.next
,
1426 struct device_domain_info
, link
);
1427 list_del(&info
->link
);
1428 list_del(&info
->global
);
1430 info
->dev
->dev
.archdata
.iommu
= NULL
;
1431 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1433 detach_domain_for_dev(info
->domain
, info
->bus
, info
->devfn
);
1434 free_devinfo_mem(info
);
1436 spin_lock_irqsave(&device_domain_lock
, flags
);
1438 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1443 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1445 static struct dmar_domain
*
1446 find_domain(struct pci_dev
*pdev
)
1448 struct device_domain_info
*info
;
1450 /* No lock here, assumes no domain exit in normal case */
1451 info
= pdev
->dev
.archdata
.iommu
;
1453 return info
->domain
;
1457 /* domain is initialized */
1458 static struct dmar_domain
*get_domain_for_dev(struct pci_dev
*pdev
, int gaw
)
1460 struct dmar_domain
*domain
, *found
= NULL
;
1461 struct intel_iommu
*iommu
;
1462 struct dmar_drhd_unit
*drhd
;
1463 struct device_domain_info
*info
, *tmp
;
1464 struct pci_dev
*dev_tmp
;
1465 unsigned long flags
;
1466 int bus
= 0, devfn
= 0;
1468 domain
= find_domain(pdev
);
1472 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
1474 if (dev_tmp
->is_pcie
) {
1475 bus
= dev_tmp
->subordinate
->number
;
1478 bus
= dev_tmp
->bus
->number
;
1479 devfn
= dev_tmp
->devfn
;
1481 spin_lock_irqsave(&device_domain_lock
, flags
);
1482 list_for_each_entry(info
, &device_domain_list
, global
) {
1483 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1484 found
= info
->domain
;
1488 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1489 /* pcie-pci bridge already has a domain, uses it */
1496 /* Allocate new domain for the device */
1497 drhd
= dmar_find_matched_drhd_unit(pdev
);
1499 printk(KERN_ERR
"IOMMU: can't find DMAR for device %s\n",
1503 iommu
= drhd
->iommu
;
1505 domain
= iommu_alloc_domain(iommu
);
1509 if (domain_init(domain
, gaw
)) {
1510 domain_exit(domain
);
1514 /* register pcie-to-pci device */
1516 info
= alloc_devinfo_mem();
1518 domain_exit(domain
);
1522 info
->devfn
= devfn
;
1524 info
->domain
= domain
;
1525 /* This domain is shared by devices under p2p bridge */
1526 domain
->flags
|= DOMAIN_FLAG_MULTIPLE_DEVICES
;
1528 /* pcie-to-pci bridge already has a domain, uses it */
1530 spin_lock_irqsave(&device_domain_lock
, flags
);
1531 list_for_each_entry(tmp
, &device_domain_list
, global
) {
1532 if (tmp
->bus
== bus
&& tmp
->devfn
== devfn
) {
1533 found
= tmp
->domain
;
1538 free_devinfo_mem(info
);
1539 domain_exit(domain
);
1542 list_add(&info
->link
, &domain
->devices
);
1543 list_add(&info
->global
, &device_domain_list
);
1545 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1549 info
= alloc_devinfo_mem();
1552 info
->bus
= pdev
->bus
->number
;
1553 info
->devfn
= pdev
->devfn
;
1555 info
->domain
= domain
;
1556 spin_lock_irqsave(&device_domain_lock
, flags
);
1557 /* somebody is fast */
1558 found
= find_domain(pdev
);
1559 if (found
!= NULL
) {
1560 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1561 if (found
!= domain
) {
1562 domain_exit(domain
);
1565 free_devinfo_mem(info
);
1568 list_add(&info
->link
, &domain
->devices
);
1569 list_add(&info
->global
, &device_domain_list
);
1570 pdev
->dev
.archdata
.iommu
= info
;
1571 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1574 /* recheck it here, maybe others set it */
1575 return find_domain(pdev
);
1578 static int iommu_prepare_identity_map(struct pci_dev
*pdev
,
1579 unsigned long long start
,
1580 unsigned long long end
)
1582 struct dmar_domain
*domain
;
1584 unsigned long long base
;
1588 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1589 pci_name(pdev
), start
, end
);
1590 /* page table init */
1591 domain
= get_domain_for_dev(pdev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1595 /* The address might not be aligned */
1596 base
= start
& PAGE_MASK
;
1598 size
= PAGE_ALIGN(size
);
1599 if (!reserve_iova(&domain
->iovad
, IOVA_PFN(base
),
1600 IOVA_PFN(base
+ size
) - 1)) {
1601 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
1606 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1607 size
, base
, pci_name(pdev
));
1609 * RMRR range might have overlap with physical memory range,
1612 dma_pte_clear_range(domain
, base
, base
+ size
);
1614 ret
= domain_page_mapping(domain
, base
, base
, size
,
1615 DMA_PTE_READ
|DMA_PTE_WRITE
);
1619 /* context entry init */
1620 ret
= domain_context_mapping(domain
, pdev
);
1624 domain_exit(domain
);
1629 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
1630 struct pci_dev
*pdev
)
1632 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1634 return iommu_prepare_identity_map(pdev
, rmrr
->base_address
,
1635 rmrr
->end_address
+ 1);
1638 #ifdef CONFIG_DMAR_GFX_WA
1639 struct iommu_prepare_data
{
1640 struct pci_dev
*pdev
;
1644 static int __init
iommu_prepare_work_fn(unsigned long start_pfn
,
1645 unsigned long end_pfn
, void *datax
)
1647 struct iommu_prepare_data
*data
;
1649 data
= (struct iommu_prepare_data
*)datax
;
1651 data
->ret
= iommu_prepare_identity_map(data
->pdev
,
1652 start_pfn
<<PAGE_SHIFT
, end_pfn
<<PAGE_SHIFT
);
1657 static int __init
iommu_prepare_with_active_regions(struct pci_dev
*pdev
)
1660 struct iommu_prepare_data data
;
1665 for_each_online_node(nid
) {
1666 work_with_active_regions(nid
, iommu_prepare_work_fn
, &data
);
1673 static void __init
iommu_prepare_gfx_mapping(void)
1675 struct pci_dev
*pdev
= NULL
;
1678 for_each_pci_dev(pdev
) {
1679 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
||
1680 !IS_GFX_DEVICE(pdev
))
1682 printk(KERN_INFO
"IOMMU: gfx device %s 1-1 mapping\n",
1684 ret
= iommu_prepare_with_active_regions(pdev
);
1686 printk(KERN_ERR
"IOMMU: mapping reserved region failed\n");
1691 #ifdef CONFIG_DMAR_FLOPPY_WA
1692 static inline void iommu_prepare_isa(void)
1694 struct pci_dev
*pdev
;
1697 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
1701 printk(KERN_INFO
"IOMMU: Prepare 0-16M unity mapping for LPC\n");
1702 ret
= iommu_prepare_identity_map(pdev
, 0, 16*1024*1024);
1705 printk("IOMMU: Failed to create 0-64M identity map, "
1706 "floppy might not work\n");
1710 static inline void iommu_prepare_isa(void)
1714 #endif /* !CONFIG_DMAR_FLPY_WA */
1716 static int __init
init_dmars(void)
1718 struct dmar_drhd_unit
*drhd
;
1719 struct dmar_rmrr_unit
*rmrr
;
1720 struct pci_dev
*pdev
;
1721 struct intel_iommu
*iommu
;
1722 int i
, ret
, unit
= 0;
1727 * initialize and program root entry to not present
1730 for_each_drhd_unit(drhd
) {
1733 * lock not needed as this is only incremented in the single
1734 * threaded kernel __init code path all other access are read
1739 deferred_flush
= kzalloc(g_num_of_iommus
*
1740 sizeof(struct deferred_flush_tables
), GFP_KERNEL
);
1741 if (!deferred_flush
) {
1746 for_each_drhd_unit(drhd
) {
1750 iommu
= drhd
->iommu
;
1752 ret
= iommu_init_domains(iommu
);
1758 * we could share the same root & context tables
1759 * amoung all IOMMU's. Need to Split it later.
1761 ret
= iommu_alloc_root_entry(iommu
);
1763 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
1768 for_each_drhd_unit(drhd
) {
1772 iommu
= drhd
->iommu
;
1773 if (dmar_enable_qi(iommu
)) {
1775 * Queued Invalidate not enabled, use Register Based
1778 iommu
->flush
.flush_context
= __iommu_flush_context
;
1779 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
1780 printk(KERN_INFO
"IOMMU 0x%Lx: using Register based "
1782 (unsigned long long)drhd
->reg_base_addr
);
1784 iommu
->flush
.flush_context
= qi_flush_context
;
1785 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
1786 printk(KERN_INFO
"IOMMU 0x%Lx: using Queued "
1788 (unsigned long long)drhd
->reg_base_addr
);
1794 * for each dev attached to rmrr
1796 * locate drhd for dev, alloc domain for dev
1797 * allocate free domain
1798 * allocate page table entries for rmrr
1799 * if context not allocated for bus
1800 * allocate and init context
1801 * set present in root table for this bus
1802 * init context with domain, translation etc
1806 for_each_rmrr_units(rmrr
) {
1807 for (i
= 0; i
< rmrr
->devices_cnt
; i
++) {
1808 pdev
= rmrr
->devices
[i
];
1809 /* some BIOS lists non-exist devices in DMAR table */
1812 ret
= iommu_prepare_rmrr_dev(rmrr
, pdev
);
1815 "IOMMU: mapping reserved region failed\n");
1819 iommu_prepare_gfx_mapping();
1821 iommu_prepare_isa();
1826 * global invalidate context cache
1827 * global invalidate iotlb
1828 * enable translation
1830 for_each_drhd_unit(drhd
) {
1833 iommu
= drhd
->iommu
;
1834 sprintf (iommu
->name
, "dmar%d", unit
++);
1836 iommu_flush_write_buffer(iommu
);
1838 ret
= dmar_set_interrupt(iommu
);
1842 iommu_set_root_entry(iommu
);
1844 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
,
1846 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
,
1848 iommu_disable_protect_mem_regions(iommu
);
1850 ret
= iommu_enable_translation(iommu
);
1857 for_each_drhd_unit(drhd
) {
1860 iommu
= drhd
->iommu
;
1866 static inline u64
aligned_size(u64 host_addr
, size_t size
)
1869 addr
= (host_addr
& (~PAGE_MASK
)) + size
;
1870 return PAGE_ALIGN(addr
);
1874 iommu_alloc_iova(struct dmar_domain
*domain
, size_t size
, u64 end
)
1878 /* Make sure it's in range */
1879 end
= min_t(u64
, DOMAIN_MAX_ADDR(domain
->gaw
), end
);
1880 if (!size
|| (IOVA_START_ADDR
+ size
> end
))
1883 piova
= alloc_iova(&domain
->iovad
,
1884 size
>> PAGE_SHIFT
, IOVA_PFN(end
), 1);
1888 static struct iova
*
1889 __intel_alloc_iova(struct device
*dev
, struct dmar_domain
*domain
,
1890 size_t size
, u64 dma_mask
)
1892 struct pci_dev
*pdev
= to_pci_dev(dev
);
1893 struct iova
*iova
= NULL
;
1895 if (dma_mask
<= DMA_32BIT_MASK
|| dmar_forcedac
)
1896 iova
= iommu_alloc_iova(domain
, size
, dma_mask
);
1899 * First try to allocate an io virtual address in
1900 * DMA_32BIT_MASK and if that fails then try allocating
1903 iova
= iommu_alloc_iova(domain
, size
, DMA_32BIT_MASK
);
1905 iova
= iommu_alloc_iova(domain
, size
, dma_mask
);
1909 printk(KERN_ERR
"Allocating iova for %s failed", pci_name(pdev
));
1916 static struct dmar_domain
*
1917 get_valid_domain_for_dev(struct pci_dev
*pdev
)
1919 struct dmar_domain
*domain
;
1922 domain
= get_domain_for_dev(pdev
,
1923 DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1926 "Allocating domain for %s failed", pci_name(pdev
));
1930 /* make sure context mapping is ok */
1931 if (unlikely(!domain_context_mapped(domain
, pdev
))) {
1932 ret
= domain_context_mapping(domain
, pdev
);
1935 "Domain context map for %s failed",
1944 static dma_addr_t
__intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
1945 size_t size
, int dir
, u64 dma_mask
)
1947 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
1948 struct dmar_domain
*domain
;
1949 phys_addr_t start_paddr
;
1954 BUG_ON(dir
== DMA_NONE
);
1955 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1958 domain
= get_valid_domain_for_dev(pdev
);
1962 size
= aligned_size((u64
)paddr
, size
);
1964 iova
= __intel_alloc_iova(hwdev
, domain
, size
, pdev
->dma_mask
);
1968 start_paddr
= (phys_addr_t
)iova
->pfn_lo
<< PAGE_SHIFT
;
1971 * Check if DMAR supports zero-length reads on write only
1974 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
1975 !cap_zlr(domain
->iommu
->cap
))
1976 prot
|= DMA_PTE_READ
;
1977 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
1978 prot
|= DMA_PTE_WRITE
;
1980 * paddr - (paddr + size) might be partial page, we should map the whole
1981 * page. Note: if two part of one page are separately mapped, we
1982 * might have two guest_addr mapping to the same host paddr, but this
1983 * is not a big problem
1985 ret
= domain_page_mapping(domain
, start_paddr
,
1986 ((u64
)paddr
) & PAGE_MASK
, size
, prot
);
1990 /* it's a non-present to present mapping */
1991 ret
= iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
1992 start_paddr
, size
>> VTD_PAGE_SHIFT
, 1);
1994 iommu_flush_write_buffer(domain
->iommu
);
1996 return start_paddr
+ ((u64
)paddr
& (~PAGE_MASK
));
2000 __free_iova(&domain
->iovad
, iova
);
2001 printk(KERN_ERR
"Device %s request: %lx@%llx dir %d --- failed\n",
2002 pci_name(pdev
), size
, (unsigned long long)paddr
, dir
);
2006 dma_addr_t
intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
2007 size_t size
, int dir
)
2009 return __intel_map_single(hwdev
, paddr
, size
, dir
,
2010 to_pci_dev(hwdev
)->dma_mask
);
2013 static void flush_unmaps(void)
2019 /* just flush them all */
2020 for (i
= 0; i
< g_num_of_iommus
; i
++) {
2021 if (deferred_flush
[i
].next
) {
2022 struct intel_iommu
*iommu
=
2023 deferred_flush
[i
].domain
[0]->iommu
;
2025 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
2026 DMA_TLB_GLOBAL_FLUSH
, 0);
2027 for (j
= 0; j
< deferred_flush
[i
].next
; j
++) {
2028 __free_iova(&deferred_flush
[i
].domain
[j
]->iovad
,
2029 deferred_flush
[i
].iova
[j
]);
2031 deferred_flush
[i
].next
= 0;
2038 static void flush_unmaps_timeout(unsigned long data
)
2040 unsigned long flags
;
2042 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2044 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2047 static void add_unmap(struct dmar_domain
*dom
, struct iova
*iova
)
2049 unsigned long flags
;
2052 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2053 if (list_size
== HIGH_WATER_MARK
)
2056 iommu_id
= dom
->iommu
->seq_id
;
2058 next
= deferred_flush
[iommu_id
].next
;
2059 deferred_flush
[iommu_id
].domain
[next
] = dom
;
2060 deferred_flush
[iommu_id
].iova
[next
] = iova
;
2061 deferred_flush
[iommu_id
].next
++;
2064 mod_timer(&unmap_timer
, jiffies
+ msecs_to_jiffies(10));
2068 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2071 void intel_unmap_single(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
2074 struct pci_dev
*pdev
= to_pci_dev(dev
);
2075 struct dmar_domain
*domain
;
2076 unsigned long start_addr
;
2079 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2081 domain
= find_domain(pdev
);
2084 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
2088 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2089 size
= aligned_size((u64
)dev_addr
, size
);
2091 pr_debug("Device %s unmapping: %lx@%llx\n",
2092 pci_name(pdev
), size
, (unsigned long long)start_addr
);
2094 /* clear the whole page */
2095 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2096 /* free page tables */
2097 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2098 if (intel_iommu_strict
) {
2099 if (iommu_flush_iotlb_psi(domain
->iommu
,
2100 domain
->id
, start_addr
, size
>> VTD_PAGE_SHIFT
, 0))
2101 iommu_flush_write_buffer(domain
->iommu
);
2103 __free_iova(&domain
->iovad
, iova
);
2105 add_unmap(domain
, iova
);
2107 * queue up the release of the unmap to save the 1/6th of the
2108 * cpu used up by the iotlb flush operation...
2113 void *intel_alloc_coherent(struct device
*hwdev
, size_t size
,
2114 dma_addr_t
*dma_handle
, gfp_t flags
)
2119 size
= PAGE_ALIGN(size
);
2120 order
= get_order(size
);
2121 flags
&= ~(GFP_DMA
| GFP_DMA32
);
2123 vaddr
= (void *)__get_free_pages(flags
, order
);
2126 memset(vaddr
, 0, size
);
2128 *dma_handle
= __intel_map_single(hwdev
, virt_to_bus(vaddr
), size
,
2130 hwdev
->coherent_dma_mask
);
2133 free_pages((unsigned long)vaddr
, order
);
2137 void intel_free_coherent(struct device
*hwdev
, size_t size
, void *vaddr
,
2138 dma_addr_t dma_handle
)
2142 size
= PAGE_ALIGN(size
);
2143 order
= get_order(size
);
2145 intel_unmap_single(hwdev
, dma_handle
, size
, DMA_BIDIRECTIONAL
);
2146 free_pages((unsigned long)vaddr
, order
);
2149 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2151 void intel_unmap_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
2152 int nelems
, int dir
)
2155 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2156 struct dmar_domain
*domain
;
2157 unsigned long start_addr
;
2161 struct scatterlist
*sg
;
2163 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2166 domain
= find_domain(pdev
);
2168 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
2171 for_each_sg(sglist
, sg
, nelems
, i
) {
2172 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2173 size
+= aligned_size((u64
)addr
, sg
->length
);
2176 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2178 /* clear the whole page */
2179 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2180 /* free page tables */
2181 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2183 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
, start_addr
,
2184 size
>> VTD_PAGE_SHIFT
, 0))
2185 iommu_flush_write_buffer(domain
->iommu
);
2188 __free_iova(&domain
->iovad
, iova
);
2191 static int intel_nontranslate_map_sg(struct device
*hddev
,
2192 struct scatterlist
*sglist
, int nelems
, int dir
)
2195 struct scatterlist
*sg
;
2197 for_each_sg(sglist
, sg
, nelems
, i
) {
2198 BUG_ON(!sg_page(sg
));
2199 sg
->dma_address
= virt_to_bus(SG_ENT_VIRT_ADDRESS(sg
));
2200 sg
->dma_length
= sg
->length
;
2205 int intel_map_sg(struct device
*hwdev
, struct scatterlist
*sglist
, int nelems
,
2210 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2211 struct dmar_domain
*domain
;
2215 struct iova
*iova
= NULL
;
2217 struct scatterlist
*sg
;
2218 unsigned long start_addr
;
2220 BUG_ON(dir
== DMA_NONE
);
2221 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2222 return intel_nontranslate_map_sg(hwdev
, sglist
, nelems
, dir
);
2224 domain
= get_valid_domain_for_dev(pdev
);
2228 for_each_sg(sglist
, sg
, nelems
, i
) {
2229 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2230 addr
= (void *)virt_to_phys(addr
);
2231 size
+= aligned_size((u64
)addr
, sg
->length
);
2234 iova
= __intel_alloc_iova(hwdev
, domain
, size
, pdev
->dma_mask
);
2236 sglist
->dma_length
= 0;
2241 * Check if DMAR supports zero-length reads on write only
2244 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2245 !cap_zlr(domain
->iommu
->cap
))
2246 prot
|= DMA_PTE_READ
;
2247 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2248 prot
|= DMA_PTE_WRITE
;
2250 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT
;
2252 for_each_sg(sglist
, sg
, nelems
, i
) {
2253 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2254 addr
= (void *)virt_to_phys(addr
);
2255 size
= aligned_size((u64
)addr
, sg
->length
);
2256 ret
= domain_page_mapping(domain
, start_addr
+ offset
,
2257 ((u64
)addr
) & PAGE_MASK
,
2260 /* clear the page */
2261 dma_pte_clear_range(domain
, start_addr
,
2262 start_addr
+ offset
);
2263 /* free page tables */
2264 dma_pte_free_pagetable(domain
, start_addr
,
2265 start_addr
+ offset
);
2267 __free_iova(&domain
->iovad
, iova
);
2270 sg
->dma_address
= start_addr
+ offset
+
2271 ((u64
)addr
& (~PAGE_MASK
));
2272 sg
->dma_length
= sg
->length
;
2276 /* it's a non-present to present mapping */
2277 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
2278 start_addr
, offset
>> VTD_PAGE_SHIFT
, 1))
2279 iommu_flush_write_buffer(domain
->iommu
);
2283 static struct dma_mapping_ops intel_dma_ops
= {
2284 .alloc_coherent
= intel_alloc_coherent
,
2285 .free_coherent
= intel_free_coherent
,
2286 .map_single
= intel_map_single
,
2287 .unmap_single
= intel_unmap_single
,
2288 .map_sg
= intel_map_sg
,
2289 .unmap_sg
= intel_unmap_sg
,
2292 static inline int iommu_domain_cache_init(void)
2296 iommu_domain_cache
= kmem_cache_create("iommu_domain",
2297 sizeof(struct dmar_domain
),
2302 if (!iommu_domain_cache
) {
2303 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
2310 static inline int iommu_devinfo_cache_init(void)
2314 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
2315 sizeof(struct device_domain_info
),
2319 if (!iommu_devinfo_cache
) {
2320 printk(KERN_ERR
"Couldn't create devinfo cache\n");
2327 static inline int iommu_iova_cache_init(void)
2331 iommu_iova_cache
= kmem_cache_create("iommu_iova",
2332 sizeof(struct iova
),
2336 if (!iommu_iova_cache
) {
2337 printk(KERN_ERR
"Couldn't create iova cache\n");
2344 static int __init
iommu_init_mempool(void)
2347 ret
= iommu_iova_cache_init();
2351 ret
= iommu_domain_cache_init();
2355 ret
= iommu_devinfo_cache_init();
2359 kmem_cache_destroy(iommu_domain_cache
);
2361 kmem_cache_destroy(iommu_iova_cache
);
2366 static void __init
iommu_exit_mempool(void)
2368 kmem_cache_destroy(iommu_devinfo_cache
);
2369 kmem_cache_destroy(iommu_domain_cache
);
2370 kmem_cache_destroy(iommu_iova_cache
);
2374 static void __init
init_no_remapping_devices(void)
2376 struct dmar_drhd_unit
*drhd
;
2378 for_each_drhd_unit(drhd
) {
2379 if (!drhd
->include_all
) {
2381 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2382 if (drhd
->devices
[i
] != NULL
)
2384 /* ignore DMAR unit if no pci devices exist */
2385 if (i
== drhd
->devices_cnt
)
2393 for_each_drhd_unit(drhd
) {
2395 if (drhd
->ignored
|| drhd
->include_all
)
2398 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2399 if (drhd
->devices
[i
] &&
2400 !IS_GFX_DEVICE(drhd
->devices
[i
]))
2403 if (i
< drhd
->devices_cnt
)
2406 /* bypass IOMMU if it is just for gfx devices */
2408 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
2409 if (!drhd
->devices
[i
])
2411 drhd
->devices
[i
]->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
2416 int __init
intel_iommu_init(void)
2420 if (dmar_table_init())
2423 if (dmar_dev_scope_init())
2427 * Check the need for DMA-remapping initialization now.
2428 * Above initialization will also be used by Interrupt-remapping.
2430 if (no_iommu
|| swiotlb
|| dmar_disabled
)
2433 iommu_init_mempool();
2434 dmar_init_reserved_ranges();
2436 init_no_remapping_devices();
2440 printk(KERN_ERR
"IOMMU: dmar init failed\n");
2441 put_iova_domain(&reserved_iova_list
);
2442 iommu_exit_mempool();
2446 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2448 init_timer(&unmap_timer
);
2450 dma_ops
= &intel_dma_ops
;
2454 void intel_iommu_domain_exit(struct dmar_domain
*domain
)
2458 /* Domain 0 is reserved, so dont process it */
2462 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
2463 end
= end
& (~VTD_PAGE_MASK
);
2466 dma_pte_clear_range(domain
, 0, end
);
2468 /* free page tables */
2469 dma_pte_free_pagetable(domain
, 0, end
);
2471 iommu_free_domain(domain
);
2472 free_domain_mem(domain
);
2474 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit
);
2476 struct dmar_domain
*intel_iommu_domain_alloc(struct pci_dev
*pdev
)
2478 struct dmar_drhd_unit
*drhd
;
2479 struct dmar_domain
*domain
;
2480 struct intel_iommu
*iommu
;
2482 drhd
= dmar_find_matched_drhd_unit(pdev
);
2484 printk(KERN_ERR
"intel_iommu_domain_alloc: drhd == NULL\n");
2488 iommu
= drhd
->iommu
;
2491 "intel_iommu_domain_alloc: iommu == NULL\n");
2494 domain
= iommu_alloc_domain(iommu
);
2497 "intel_iommu_domain_alloc: domain == NULL\n");
2500 if (domain_init(domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2502 "intel_iommu_domain_alloc: domain_init() failed\n");
2503 intel_iommu_domain_exit(domain
);
2508 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc
);
2510 int intel_iommu_context_mapping(
2511 struct dmar_domain
*domain
, struct pci_dev
*pdev
)
2514 rc
= domain_context_mapping(domain
, pdev
);
2517 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping
);
2519 int intel_iommu_page_mapping(
2520 struct dmar_domain
*domain
, dma_addr_t iova
,
2521 u64 hpa
, size_t size
, int prot
)
2524 rc
= domain_page_mapping(domain
, iova
, hpa
, size
, prot
);
2527 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping
);
2529 void intel_iommu_detach_dev(struct dmar_domain
*domain
, u8 bus
, u8 devfn
)
2531 detach_domain_for_dev(domain
, bus
, devfn
);
2533 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev
);
2535 struct dmar_domain
*
2536 intel_iommu_find_domain(struct pci_dev
*pdev
)
2538 return find_domain(pdev
);
2540 EXPORT_SYMBOL_GPL(intel_iommu_find_domain
);
2542 int intel_iommu_found(void)
2544 return g_num_of_iommus
;
2546 EXPORT_SYMBOL_GPL(intel_iommu_found
);
2548 u64
intel_iommu_iova_to_pfn(struct dmar_domain
*domain
, u64 iova
)
2550 struct dma_pte
*pte
;
2554 pte
= addr_to_dma_pte(domain
, iova
);
2557 pfn
= dma_pte_addr(*pte
);
2559 return pfn
>> VTD_PAGE_SHIFT
;
2561 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn
);