Merge branch 'iommu/page-sizes' into x86/amd
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / iommu / intel-iommu.c
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
22 */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/export.h>
28 #include <linux/slab.h>
29 #include <linux/irq.h>
30 #include <linux/interrupt.h>
31 #include <linux/spinlock.h>
32 #include <linux/pci.h>
33 #include <linux/dmar.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/mempool.h>
36 #include <linux/timer.h>
37 #include <linux/iova.h>
38 #include <linux/iommu.h>
39 #include <linux/intel-iommu.h>
40 #include <linux/syscore_ops.h>
41 #include <linux/tboot.h>
42 #include <linux/dmi.h>
43 #include <linux/pci-ats.h>
44 #include <asm/cacheflush.h>
45 #include <asm/iommu.h>
46
47 #define ROOT_SIZE VTD_PAGE_SIZE
48 #define CONTEXT_SIZE VTD_PAGE_SIZE
49
50 #define IS_BRIDGE_HOST_DEVICE(pdev) \
51 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START (0xfee00000)
57 #define IOAPIC_RANGE_END (0xfeefffff)
58 #define IOVA_START_ADDR (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63
64 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
65 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
66
67 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
68 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
69 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
70 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
71 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
72
73 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
74 #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
75 #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
76
77 /* page table handling */
78 #define LEVEL_STRIDE (9)
79 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
80
81 /*
82 * This bitmap is used to advertise the page sizes our hardware support
83 * to the IOMMU core, which will then use this information to split
84 * physically contiguous memory regions it is mapping into page sizes
85 * that we support.
86 *
87 * Traditionally the IOMMU core just handed us the mappings directly,
88 * after making sure the size is an order of a 4KiB page and that the
89 * mapping has natural alignment.
90 *
91 * To retain this behavior, we currently advertise that we support
92 * all page sizes that are an order of 4KiB.
93 *
94 * If at some point we'd like to utilize the IOMMU core's new behavior,
95 * we could change this to advertise the real page sizes we support.
96 */
97 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
98
99 static inline int agaw_to_level(int agaw)
100 {
101 return agaw + 2;
102 }
103
104 static inline int agaw_to_width(int agaw)
105 {
106 return 30 + agaw * LEVEL_STRIDE;
107 }
108
109 static inline int width_to_agaw(int width)
110 {
111 return (width - 30) / LEVEL_STRIDE;
112 }
113
114 static inline unsigned int level_to_offset_bits(int level)
115 {
116 return (level - 1) * LEVEL_STRIDE;
117 }
118
119 static inline int pfn_level_offset(unsigned long pfn, int level)
120 {
121 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
122 }
123
124 static inline unsigned long level_mask(int level)
125 {
126 return -1UL << level_to_offset_bits(level);
127 }
128
129 static inline unsigned long level_size(int level)
130 {
131 return 1UL << level_to_offset_bits(level);
132 }
133
134 static inline unsigned long align_to_level(unsigned long pfn, int level)
135 {
136 return (pfn + level_size(level) - 1) & level_mask(level);
137 }
138
139 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
140 {
141 return 1 << ((lvl - 1) * LEVEL_STRIDE);
142 }
143
144 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
145 are never going to work. */
146 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
147 {
148 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
149 }
150
151 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
152 {
153 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
154 }
155 static inline unsigned long page_to_dma_pfn(struct page *pg)
156 {
157 return mm_to_dma_pfn(page_to_pfn(pg));
158 }
159 static inline unsigned long virt_to_dma_pfn(void *p)
160 {
161 return page_to_dma_pfn(virt_to_page(p));
162 }
163
164 /* global iommu list, set NULL for ignored DMAR units */
165 static struct intel_iommu **g_iommus;
166
167 static void __init check_tylersburg_isoch(void);
168 static int rwbf_quirk;
169
170 /*
171 * set to 1 to panic kernel if can't successfully enable VT-d
172 * (used when kernel is launched w/ TXT)
173 */
174 static int force_on = 0;
175
176 /*
177 * 0: Present
178 * 1-11: Reserved
179 * 12-63: Context Ptr (12 - (haw-1))
180 * 64-127: Reserved
181 */
182 struct root_entry {
183 u64 val;
184 u64 rsvd1;
185 };
186 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
187 static inline bool root_present(struct root_entry *root)
188 {
189 return (root->val & 1);
190 }
191 static inline void set_root_present(struct root_entry *root)
192 {
193 root->val |= 1;
194 }
195 static inline void set_root_value(struct root_entry *root, unsigned long value)
196 {
197 root->val |= value & VTD_PAGE_MASK;
198 }
199
200 static inline struct context_entry *
201 get_context_addr_from_root(struct root_entry *root)
202 {
203 return (struct context_entry *)
204 (root_present(root)?phys_to_virt(
205 root->val & VTD_PAGE_MASK) :
206 NULL);
207 }
208
209 /*
210 * low 64 bits:
211 * 0: present
212 * 1: fault processing disable
213 * 2-3: translation type
214 * 12-63: address space root
215 * high 64 bits:
216 * 0-2: address width
217 * 3-6: aval
218 * 8-23: domain id
219 */
220 struct context_entry {
221 u64 lo;
222 u64 hi;
223 };
224
225 static inline bool context_present(struct context_entry *context)
226 {
227 return (context->lo & 1);
228 }
229 static inline void context_set_present(struct context_entry *context)
230 {
231 context->lo |= 1;
232 }
233
234 static inline void context_set_fault_enable(struct context_entry *context)
235 {
236 context->lo &= (((u64)-1) << 2) | 1;
237 }
238
239 static inline void context_set_translation_type(struct context_entry *context,
240 unsigned long value)
241 {
242 context->lo &= (((u64)-1) << 4) | 3;
243 context->lo |= (value & 3) << 2;
244 }
245
246 static inline void context_set_address_root(struct context_entry *context,
247 unsigned long value)
248 {
249 context->lo |= value & VTD_PAGE_MASK;
250 }
251
252 static inline void context_set_address_width(struct context_entry *context,
253 unsigned long value)
254 {
255 context->hi |= value & 7;
256 }
257
258 static inline void context_set_domain_id(struct context_entry *context,
259 unsigned long value)
260 {
261 context->hi |= (value & ((1 << 16) - 1)) << 8;
262 }
263
264 static inline void context_clear_entry(struct context_entry *context)
265 {
266 context->lo = 0;
267 context->hi = 0;
268 }
269
270 /*
271 * 0: readable
272 * 1: writable
273 * 2-6: reserved
274 * 7: super page
275 * 8-10: available
276 * 11: snoop behavior
277 * 12-63: Host physcial address
278 */
279 struct dma_pte {
280 u64 val;
281 };
282
283 static inline void dma_clear_pte(struct dma_pte *pte)
284 {
285 pte->val = 0;
286 }
287
288 static inline void dma_set_pte_readable(struct dma_pte *pte)
289 {
290 pte->val |= DMA_PTE_READ;
291 }
292
293 static inline void dma_set_pte_writable(struct dma_pte *pte)
294 {
295 pte->val |= DMA_PTE_WRITE;
296 }
297
298 static inline void dma_set_pte_snp(struct dma_pte *pte)
299 {
300 pte->val |= DMA_PTE_SNP;
301 }
302
303 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
304 {
305 pte->val = (pte->val & ~3) | (prot & 3);
306 }
307
308 static inline u64 dma_pte_addr(struct dma_pte *pte)
309 {
310 #ifdef CONFIG_64BIT
311 return pte->val & VTD_PAGE_MASK;
312 #else
313 /* Must have a full atomic 64-bit read */
314 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
315 #endif
316 }
317
318 static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
319 {
320 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
321 }
322
323 static inline bool dma_pte_present(struct dma_pte *pte)
324 {
325 return (pte->val & 3) != 0;
326 }
327
328 static inline bool dma_pte_superpage(struct dma_pte *pte)
329 {
330 return (pte->val & (1 << 7));
331 }
332
333 static inline int first_pte_in_page(struct dma_pte *pte)
334 {
335 return !((unsigned long)pte & ~VTD_PAGE_MASK);
336 }
337
338 /*
339 * This domain is a statically identity mapping domain.
340 * 1. This domain creats a static 1:1 mapping to all usable memory.
341 * 2. It maps to each iommu if successful.
342 * 3. Each iommu mapps to this domain if successful.
343 */
344 static struct dmar_domain *si_domain;
345 static int hw_pass_through = 1;
346
347 /* devices under the same p2p bridge are owned in one domain */
348 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
349
350 /* domain represents a virtual machine, more than one devices
351 * across iommus may be owned in one domain, e.g. kvm guest.
352 */
353 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
354
355 /* si_domain contains mulitple devices */
356 #define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
357
358 struct dmar_domain {
359 int id; /* domain id */
360 int nid; /* node id */
361 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
362
363 struct list_head devices; /* all devices' list */
364 struct iova_domain iovad; /* iova's that belong to this domain */
365
366 struct dma_pte *pgd; /* virtual address */
367 int gaw; /* max guest address width */
368
369 /* adjusted guest address width, 0 is level 2 30-bit */
370 int agaw;
371
372 int flags; /* flags to find out type of domain */
373
374 int iommu_coherency;/* indicate coherency of iommu access */
375 int iommu_snooping; /* indicate snooping control feature*/
376 int iommu_count; /* reference count of iommu */
377 int iommu_superpage;/* Level of superpages supported:
378 0 == 4KiB (no superpages), 1 == 2MiB,
379 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
380 spinlock_t iommu_lock; /* protect iommu set in domain */
381 u64 max_addr; /* maximum mapped address */
382 };
383
384 /* PCI domain-device relationship */
385 struct device_domain_info {
386 struct list_head link; /* link to domain siblings */
387 struct list_head global; /* link to global list */
388 int segment; /* PCI domain */
389 u8 bus; /* PCI bus number */
390 u8 devfn; /* PCI devfn number */
391 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
392 struct intel_iommu *iommu; /* IOMMU used by this device */
393 struct dmar_domain *domain; /* pointer to domain */
394 };
395
396 static void flush_unmaps_timeout(unsigned long data);
397
398 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
399
400 #define HIGH_WATER_MARK 250
401 struct deferred_flush_tables {
402 int next;
403 struct iova *iova[HIGH_WATER_MARK];
404 struct dmar_domain *domain[HIGH_WATER_MARK];
405 };
406
407 static struct deferred_flush_tables *deferred_flush;
408
409 /* bitmap for indexing intel_iommus */
410 static int g_num_of_iommus;
411
412 static DEFINE_SPINLOCK(async_umap_flush_lock);
413 static LIST_HEAD(unmaps_to_do);
414
415 static int timer_on;
416 static long list_size;
417
418 static void domain_remove_dev_info(struct dmar_domain *domain);
419
420 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
421 int dmar_disabled = 0;
422 #else
423 int dmar_disabled = 1;
424 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
425
426 static int dmar_map_gfx = 1;
427 static int dmar_forcedac;
428 static int intel_iommu_strict;
429 static int intel_iommu_superpage = 1;
430
431 int intel_iommu_gfx_mapped;
432 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
433
434 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
435 static DEFINE_SPINLOCK(device_domain_lock);
436 static LIST_HEAD(device_domain_list);
437
438 static struct iommu_ops intel_iommu_ops;
439
440 static int __init intel_iommu_setup(char *str)
441 {
442 if (!str)
443 return -EINVAL;
444 while (*str) {
445 if (!strncmp(str, "on", 2)) {
446 dmar_disabled = 0;
447 printk(KERN_INFO "Intel-IOMMU: enabled\n");
448 } else if (!strncmp(str, "off", 3)) {
449 dmar_disabled = 1;
450 printk(KERN_INFO "Intel-IOMMU: disabled\n");
451 } else if (!strncmp(str, "igfx_off", 8)) {
452 dmar_map_gfx = 0;
453 printk(KERN_INFO
454 "Intel-IOMMU: disable GFX device mapping\n");
455 } else if (!strncmp(str, "forcedac", 8)) {
456 printk(KERN_INFO
457 "Intel-IOMMU: Forcing DAC for PCI devices\n");
458 dmar_forcedac = 1;
459 } else if (!strncmp(str, "strict", 6)) {
460 printk(KERN_INFO
461 "Intel-IOMMU: disable batched IOTLB flush\n");
462 intel_iommu_strict = 1;
463 } else if (!strncmp(str, "sp_off", 6)) {
464 printk(KERN_INFO
465 "Intel-IOMMU: disable supported super page\n");
466 intel_iommu_superpage = 0;
467 }
468
469 str += strcspn(str, ",");
470 while (*str == ',')
471 str++;
472 }
473 return 0;
474 }
475 __setup("intel_iommu=", intel_iommu_setup);
476
477 static struct kmem_cache *iommu_domain_cache;
478 static struct kmem_cache *iommu_devinfo_cache;
479 static struct kmem_cache *iommu_iova_cache;
480
481 static inline void *alloc_pgtable_page(int node)
482 {
483 struct page *page;
484 void *vaddr = NULL;
485
486 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
487 if (page)
488 vaddr = page_address(page);
489 return vaddr;
490 }
491
492 static inline void free_pgtable_page(void *vaddr)
493 {
494 free_page((unsigned long)vaddr);
495 }
496
497 static inline void *alloc_domain_mem(void)
498 {
499 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
500 }
501
502 static void free_domain_mem(void *vaddr)
503 {
504 kmem_cache_free(iommu_domain_cache, vaddr);
505 }
506
507 static inline void * alloc_devinfo_mem(void)
508 {
509 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
510 }
511
512 static inline void free_devinfo_mem(void *vaddr)
513 {
514 kmem_cache_free(iommu_devinfo_cache, vaddr);
515 }
516
517 struct iova *alloc_iova_mem(void)
518 {
519 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
520 }
521
522 void free_iova_mem(struct iova *iova)
523 {
524 kmem_cache_free(iommu_iova_cache, iova);
525 }
526
527
528 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
529 {
530 unsigned long sagaw;
531 int agaw = -1;
532
533 sagaw = cap_sagaw(iommu->cap);
534 for (agaw = width_to_agaw(max_gaw);
535 agaw >= 0; agaw--) {
536 if (test_bit(agaw, &sagaw))
537 break;
538 }
539
540 return agaw;
541 }
542
543 /*
544 * Calculate max SAGAW for each iommu.
545 */
546 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
547 {
548 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
549 }
550
551 /*
552 * calculate agaw for each iommu.
553 * "SAGAW" may be different across iommus, use a default agaw, and
554 * get a supported less agaw for iommus that don't support the default agaw.
555 */
556 int iommu_calculate_agaw(struct intel_iommu *iommu)
557 {
558 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
559 }
560
561 /* This functionin only returns single iommu in a domain */
562 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
563 {
564 int iommu_id;
565
566 /* si_domain and vm domain should not get here. */
567 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
568 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
569
570 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
571 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
572 return NULL;
573
574 return g_iommus[iommu_id];
575 }
576
577 static void domain_update_iommu_coherency(struct dmar_domain *domain)
578 {
579 int i;
580
581 domain->iommu_coherency = 1;
582
583 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
584 if (!ecap_coherent(g_iommus[i]->ecap)) {
585 domain->iommu_coherency = 0;
586 break;
587 }
588 }
589 }
590
591 static void domain_update_iommu_snooping(struct dmar_domain *domain)
592 {
593 int i;
594
595 domain->iommu_snooping = 1;
596
597 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
598 if (!ecap_sc_support(g_iommus[i]->ecap)) {
599 domain->iommu_snooping = 0;
600 break;
601 }
602 }
603 }
604
605 static void domain_update_iommu_superpage(struct dmar_domain *domain)
606 {
607 struct dmar_drhd_unit *drhd;
608 struct intel_iommu *iommu = NULL;
609 int mask = 0xf;
610
611 if (!intel_iommu_superpage) {
612 domain->iommu_superpage = 0;
613 return;
614 }
615
616 /* set iommu_superpage to the smallest common denominator */
617 for_each_active_iommu(iommu, drhd) {
618 mask &= cap_super_page_val(iommu->cap);
619 if (!mask) {
620 break;
621 }
622 }
623 domain->iommu_superpage = fls(mask);
624 }
625
626 /* Some capabilities may be different across iommus */
627 static void domain_update_iommu_cap(struct dmar_domain *domain)
628 {
629 domain_update_iommu_coherency(domain);
630 domain_update_iommu_snooping(domain);
631 domain_update_iommu_superpage(domain);
632 }
633
634 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
635 {
636 struct dmar_drhd_unit *drhd = NULL;
637 int i;
638
639 for_each_drhd_unit(drhd) {
640 if (drhd->ignored)
641 continue;
642 if (segment != drhd->segment)
643 continue;
644
645 for (i = 0; i < drhd->devices_cnt; i++) {
646 if (drhd->devices[i] &&
647 drhd->devices[i]->bus->number == bus &&
648 drhd->devices[i]->devfn == devfn)
649 return drhd->iommu;
650 if (drhd->devices[i] &&
651 drhd->devices[i]->subordinate &&
652 drhd->devices[i]->subordinate->number <= bus &&
653 drhd->devices[i]->subordinate->subordinate >= bus)
654 return drhd->iommu;
655 }
656
657 if (drhd->include_all)
658 return drhd->iommu;
659 }
660
661 return NULL;
662 }
663
664 static void domain_flush_cache(struct dmar_domain *domain,
665 void *addr, int size)
666 {
667 if (!domain->iommu_coherency)
668 clflush_cache_range(addr, size);
669 }
670
671 /* Gets context entry for a given bus and devfn */
672 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
673 u8 bus, u8 devfn)
674 {
675 struct root_entry *root;
676 struct context_entry *context;
677 unsigned long phy_addr;
678 unsigned long flags;
679
680 spin_lock_irqsave(&iommu->lock, flags);
681 root = &iommu->root_entry[bus];
682 context = get_context_addr_from_root(root);
683 if (!context) {
684 context = (struct context_entry *)
685 alloc_pgtable_page(iommu->node);
686 if (!context) {
687 spin_unlock_irqrestore(&iommu->lock, flags);
688 return NULL;
689 }
690 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
691 phy_addr = virt_to_phys((void *)context);
692 set_root_value(root, phy_addr);
693 set_root_present(root);
694 __iommu_flush_cache(iommu, root, sizeof(*root));
695 }
696 spin_unlock_irqrestore(&iommu->lock, flags);
697 return &context[devfn];
698 }
699
700 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
701 {
702 struct root_entry *root;
703 struct context_entry *context;
704 int ret;
705 unsigned long flags;
706
707 spin_lock_irqsave(&iommu->lock, flags);
708 root = &iommu->root_entry[bus];
709 context = get_context_addr_from_root(root);
710 if (!context) {
711 ret = 0;
712 goto out;
713 }
714 ret = context_present(&context[devfn]);
715 out:
716 spin_unlock_irqrestore(&iommu->lock, flags);
717 return ret;
718 }
719
720 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
721 {
722 struct root_entry *root;
723 struct context_entry *context;
724 unsigned long flags;
725
726 spin_lock_irqsave(&iommu->lock, flags);
727 root = &iommu->root_entry[bus];
728 context = get_context_addr_from_root(root);
729 if (context) {
730 context_clear_entry(&context[devfn]);
731 __iommu_flush_cache(iommu, &context[devfn], \
732 sizeof(*context));
733 }
734 spin_unlock_irqrestore(&iommu->lock, flags);
735 }
736
737 static void free_context_table(struct intel_iommu *iommu)
738 {
739 struct root_entry *root;
740 int i;
741 unsigned long flags;
742 struct context_entry *context;
743
744 spin_lock_irqsave(&iommu->lock, flags);
745 if (!iommu->root_entry) {
746 goto out;
747 }
748 for (i = 0; i < ROOT_ENTRY_NR; i++) {
749 root = &iommu->root_entry[i];
750 context = get_context_addr_from_root(root);
751 if (context)
752 free_pgtable_page(context);
753 }
754 free_pgtable_page(iommu->root_entry);
755 iommu->root_entry = NULL;
756 out:
757 spin_unlock_irqrestore(&iommu->lock, flags);
758 }
759
760 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
761 unsigned long pfn, int target_level)
762 {
763 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
764 struct dma_pte *parent, *pte = NULL;
765 int level = agaw_to_level(domain->agaw);
766 int offset;
767
768 BUG_ON(!domain->pgd);
769 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
770 parent = domain->pgd;
771
772 while (level > 0) {
773 void *tmp_page;
774
775 offset = pfn_level_offset(pfn, level);
776 pte = &parent[offset];
777 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
778 break;
779 if (level == target_level)
780 break;
781
782 if (!dma_pte_present(pte)) {
783 uint64_t pteval;
784
785 tmp_page = alloc_pgtable_page(domain->nid);
786
787 if (!tmp_page)
788 return NULL;
789
790 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
791 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
792 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
793 /* Someone else set it while we were thinking; use theirs. */
794 free_pgtable_page(tmp_page);
795 } else {
796 dma_pte_addr(pte);
797 domain_flush_cache(domain, pte, sizeof(*pte));
798 }
799 }
800 parent = phys_to_virt(dma_pte_addr(pte));
801 level--;
802 }
803
804 return pte;
805 }
806
807
808 /* return address's pte at specific level */
809 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
810 unsigned long pfn,
811 int level, int *large_page)
812 {
813 struct dma_pte *parent, *pte = NULL;
814 int total = agaw_to_level(domain->agaw);
815 int offset;
816
817 parent = domain->pgd;
818 while (level <= total) {
819 offset = pfn_level_offset(pfn, total);
820 pte = &parent[offset];
821 if (level == total)
822 return pte;
823
824 if (!dma_pte_present(pte)) {
825 *large_page = total;
826 break;
827 }
828
829 if (pte->val & DMA_PTE_LARGE_PAGE) {
830 *large_page = total;
831 return pte;
832 }
833
834 parent = phys_to_virt(dma_pte_addr(pte));
835 total--;
836 }
837 return NULL;
838 }
839
840 /* clear last level pte, a tlb flush should be followed */
841 static int dma_pte_clear_range(struct dmar_domain *domain,
842 unsigned long start_pfn,
843 unsigned long last_pfn)
844 {
845 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
846 unsigned int large_page = 1;
847 struct dma_pte *first_pte, *pte;
848 int order;
849
850 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
851 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
852 BUG_ON(start_pfn > last_pfn);
853
854 /* we don't need lock here; nobody else touches the iova range */
855 do {
856 large_page = 1;
857 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
858 if (!pte) {
859 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
860 continue;
861 }
862 do {
863 dma_clear_pte(pte);
864 start_pfn += lvl_to_nr_pages(large_page);
865 pte++;
866 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
867
868 domain_flush_cache(domain, first_pte,
869 (void *)pte - (void *)first_pte);
870
871 } while (start_pfn && start_pfn <= last_pfn);
872
873 order = (large_page - 1) * 9;
874 return order;
875 }
876
877 /* free page table pages. last level pte should already be cleared */
878 static void dma_pte_free_pagetable(struct dmar_domain *domain,
879 unsigned long start_pfn,
880 unsigned long last_pfn)
881 {
882 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
883 struct dma_pte *first_pte, *pte;
884 int total = agaw_to_level(domain->agaw);
885 int level;
886 unsigned long tmp;
887 int large_page = 2;
888
889 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
890 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
891 BUG_ON(start_pfn > last_pfn);
892
893 /* We don't need lock here; nobody else touches the iova range */
894 level = 2;
895 while (level <= total) {
896 tmp = align_to_level(start_pfn, level);
897
898 /* If we can't even clear one PTE at this level, we're done */
899 if (tmp + level_size(level) - 1 > last_pfn)
900 return;
901
902 do {
903 large_page = level;
904 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
905 if (large_page > level)
906 level = large_page + 1;
907 if (!pte) {
908 tmp = align_to_level(tmp + 1, level + 1);
909 continue;
910 }
911 do {
912 if (dma_pte_present(pte)) {
913 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
914 dma_clear_pte(pte);
915 }
916 pte++;
917 tmp += level_size(level);
918 } while (!first_pte_in_page(pte) &&
919 tmp + level_size(level) - 1 <= last_pfn);
920
921 domain_flush_cache(domain, first_pte,
922 (void *)pte - (void *)first_pte);
923
924 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
925 level++;
926 }
927 /* free pgd */
928 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
929 free_pgtable_page(domain->pgd);
930 domain->pgd = NULL;
931 }
932 }
933
934 /* iommu handling */
935 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
936 {
937 struct root_entry *root;
938 unsigned long flags;
939
940 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
941 if (!root)
942 return -ENOMEM;
943
944 __iommu_flush_cache(iommu, root, ROOT_SIZE);
945
946 spin_lock_irqsave(&iommu->lock, flags);
947 iommu->root_entry = root;
948 spin_unlock_irqrestore(&iommu->lock, flags);
949
950 return 0;
951 }
952
953 static void iommu_set_root_entry(struct intel_iommu *iommu)
954 {
955 void *addr;
956 u32 sts;
957 unsigned long flag;
958
959 addr = iommu->root_entry;
960
961 raw_spin_lock_irqsave(&iommu->register_lock, flag);
962 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
963
964 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
965
966 /* Make sure hardware complete it */
967 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
968 readl, (sts & DMA_GSTS_RTPS), sts);
969
970 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
971 }
972
973 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
974 {
975 u32 val;
976 unsigned long flag;
977
978 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
979 return;
980
981 raw_spin_lock_irqsave(&iommu->register_lock, flag);
982 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
983
984 /* Make sure hardware complete it */
985 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
986 readl, (!(val & DMA_GSTS_WBFS)), val);
987
988 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
989 }
990
991 /* return value determine if we need a write buffer flush */
992 static void __iommu_flush_context(struct intel_iommu *iommu,
993 u16 did, u16 source_id, u8 function_mask,
994 u64 type)
995 {
996 u64 val = 0;
997 unsigned long flag;
998
999 switch (type) {
1000 case DMA_CCMD_GLOBAL_INVL:
1001 val = DMA_CCMD_GLOBAL_INVL;
1002 break;
1003 case DMA_CCMD_DOMAIN_INVL:
1004 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1005 break;
1006 case DMA_CCMD_DEVICE_INVL:
1007 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1008 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1009 break;
1010 default:
1011 BUG();
1012 }
1013 val |= DMA_CCMD_ICC;
1014
1015 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1016 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1017
1018 /* Make sure hardware complete it */
1019 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1020 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1021
1022 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1023 }
1024
1025 /* return value determine if we need a write buffer flush */
1026 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1027 u64 addr, unsigned int size_order, u64 type)
1028 {
1029 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1030 u64 val = 0, val_iva = 0;
1031 unsigned long flag;
1032
1033 switch (type) {
1034 case DMA_TLB_GLOBAL_FLUSH:
1035 /* global flush doesn't need set IVA_REG */
1036 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1037 break;
1038 case DMA_TLB_DSI_FLUSH:
1039 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1040 break;
1041 case DMA_TLB_PSI_FLUSH:
1042 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1043 /* Note: always flush non-leaf currently */
1044 val_iva = size_order | addr;
1045 break;
1046 default:
1047 BUG();
1048 }
1049 /* Note: set drain read/write */
1050 #if 0
1051 /*
1052 * This is probably to be super secure.. Looks like we can
1053 * ignore it without any impact.
1054 */
1055 if (cap_read_drain(iommu->cap))
1056 val |= DMA_TLB_READ_DRAIN;
1057 #endif
1058 if (cap_write_drain(iommu->cap))
1059 val |= DMA_TLB_WRITE_DRAIN;
1060
1061 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1062 /* Note: Only uses first TLB reg currently */
1063 if (val_iva)
1064 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1065 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1066
1067 /* Make sure hardware complete it */
1068 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1069 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1070
1071 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1072
1073 /* check IOTLB invalidation granularity */
1074 if (DMA_TLB_IAIG(val) == 0)
1075 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1076 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1077 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1078 (unsigned long long)DMA_TLB_IIRG(type),
1079 (unsigned long long)DMA_TLB_IAIG(val));
1080 }
1081
1082 static struct device_domain_info *iommu_support_dev_iotlb(
1083 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1084 {
1085 int found = 0;
1086 unsigned long flags;
1087 struct device_domain_info *info;
1088 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1089
1090 if (!ecap_dev_iotlb_support(iommu->ecap))
1091 return NULL;
1092
1093 if (!iommu->qi)
1094 return NULL;
1095
1096 spin_lock_irqsave(&device_domain_lock, flags);
1097 list_for_each_entry(info, &domain->devices, link)
1098 if (info->bus == bus && info->devfn == devfn) {
1099 found = 1;
1100 break;
1101 }
1102 spin_unlock_irqrestore(&device_domain_lock, flags);
1103
1104 if (!found || !info->dev)
1105 return NULL;
1106
1107 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1108 return NULL;
1109
1110 if (!dmar_find_matched_atsr_unit(info->dev))
1111 return NULL;
1112
1113 info->iommu = iommu;
1114
1115 return info;
1116 }
1117
1118 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1119 {
1120 if (!info)
1121 return;
1122
1123 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1124 }
1125
1126 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1127 {
1128 if (!info->dev || !pci_ats_enabled(info->dev))
1129 return;
1130
1131 pci_disable_ats(info->dev);
1132 }
1133
1134 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1135 u64 addr, unsigned mask)
1136 {
1137 u16 sid, qdep;
1138 unsigned long flags;
1139 struct device_domain_info *info;
1140
1141 spin_lock_irqsave(&device_domain_lock, flags);
1142 list_for_each_entry(info, &domain->devices, link) {
1143 if (!info->dev || !pci_ats_enabled(info->dev))
1144 continue;
1145
1146 sid = info->bus << 8 | info->devfn;
1147 qdep = pci_ats_queue_depth(info->dev);
1148 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1149 }
1150 spin_unlock_irqrestore(&device_domain_lock, flags);
1151 }
1152
1153 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1154 unsigned long pfn, unsigned int pages, int map)
1155 {
1156 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1157 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1158
1159 BUG_ON(pages == 0);
1160
1161 /*
1162 * Fallback to domain selective flush if no PSI support or the size is
1163 * too big.
1164 * PSI requires page size to be 2 ^ x, and the base address is naturally
1165 * aligned to the size
1166 */
1167 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1168 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1169 DMA_TLB_DSI_FLUSH);
1170 else
1171 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1172 DMA_TLB_PSI_FLUSH);
1173
1174 /*
1175 * In caching mode, changes of pages from non-present to present require
1176 * flush. However, device IOTLB doesn't need to be flushed in this case.
1177 */
1178 if (!cap_caching_mode(iommu->cap) || !map)
1179 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1180 }
1181
1182 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1183 {
1184 u32 pmen;
1185 unsigned long flags;
1186
1187 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1188 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1189 pmen &= ~DMA_PMEN_EPM;
1190 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1191
1192 /* wait for the protected region status bit to clear */
1193 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1194 readl, !(pmen & DMA_PMEN_PRS), pmen);
1195
1196 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1197 }
1198
1199 static int iommu_enable_translation(struct intel_iommu *iommu)
1200 {
1201 u32 sts;
1202 unsigned long flags;
1203
1204 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1205 iommu->gcmd |= DMA_GCMD_TE;
1206 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1207
1208 /* Make sure hardware complete it */
1209 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1210 readl, (sts & DMA_GSTS_TES), sts);
1211
1212 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1213 return 0;
1214 }
1215
1216 static int iommu_disable_translation(struct intel_iommu *iommu)
1217 {
1218 u32 sts;
1219 unsigned long flag;
1220
1221 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1222 iommu->gcmd &= ~DMA_GCMD_TE;
1223 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1224
1225 /* Make sure hardware complete it */
1226 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1227 readl, (!(sts & DMA_GSTS_TES)), sts);
1228
1229 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1230 return 0;
1231 }
1232
1233
1234 static int iommu_init_domains(struct intel_iommu *iommu)
1235 {
1236 unsigned long ndomains;
1237 unsigned long nlongs;
1238
1239 ndomains = cap_ndoms(iommu->cap);
1240 pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1241 ndomains);
1242 nlongs = BITS_TO_LONGS(ndomains);
1243
1244 spin_lock_init(&iommu->lock);
1245
1246 /* TBD: there might be 64K domains,
1247 * consider other allocation for future chip
1248 */
1249 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1250 if (!iommu->domain_ids) {
1251 printk(KERN_ERR "Allocating domain id array failed\n");
1252 return -ENOMEM;
1253 }
1254 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1255 GFP_KERNEL);
1256 if (!iommu->domains) {
1257 printk(KERN_ERR "Allocating domain array failed\n");
1258 return -ENOMEM;
1259 }
1260
1261 /*
1262 * if Caching mode is set, then invalid translations are tagged
1263 * with domainid 0. Hence we need to pre-allocate it.
1264 */
1265 if (cap_caching_mode(iommu->cap))
1266 set_bit(0, iommu->domain_ids);
1267 return 0;
1268 }
1269
1270
1271 static void domain_exit(struct dmar_domain *domain);
1272 static void vm_domain_exit(struct dmar_domain *domain);
1273
1274 void free_dmar_iommu(struct intel_iommu *iommu)
1275 {
1276 struct dmar_domain *domain;
1277 int i;
1278 unsigned long flags;
1279
1280 if ((iommu->domains) && (iommu->domain_ids)) {
1281 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1282 domain = iommu->domains[i];
1283 clear_bit(i, iommu->domain_ids);
1284
1285 spin_lock_irqsave(&domain->iommu_lock, flags);
1286 if (--domain->iommu_count == 0) {
1287 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1288 vm_domain_exit(domain);
1289 else
1290 domain_exit(domain);
1291 }
1292 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1293 }
1294 }
1295
1296 if (iommu->gcmd & DMA_GCMD_TE)
1297 iommu_disable_translation(iommu);
1298
1299 if (iommu->irq) {
1300 irq_set_handler_data(iommu->irq, NULL);
1301 /* This will mask the irq */
1302 free_irq(iommu->irq, iommu);
1303 destroy_irq(iommu->irq);
1304 }
1305
1306 kfree(iommu->domains);
1307 kfree(iommu->domain_ids);
1308
1309 g_iommus[iommu->seq_id] = NULL;
1310
1311 /* if all iommus are freed, free g_iommus */
1312 for (i = 0; i < g_num_of_iommus; i++) {
1313 if (g_iommus[i])
1314 break;
1315 }
1316
1317 if (i == g_num_of_iommus)
1318 kfree(g_iommus);
1319
1320 /* free context mapping */
1321 free_context_table(iommu);
1322 }
1323
1324 static struct dmar_domain *alloc_domain(void)
1325 {
1326 struct dmar_domain *domain;
1327
1328 domain = alloc_domain_mem();
1329 if (!domain)
1330 return NULL;
1331
1332 domain->nid = -1;
1333 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1334 domain->flags = 0;
1335
1336 return domain;
1337 }
1338
1339 static int iommu_attach_domain(struct dmar_domain *domain,
1340 struct intel_iommu *iommu)
1341 {
1342 int num;
1343 unsigned long ndomains;
1344 unsigned long flags;
1345
1346 ndomains = cap_ndoms(iommu->cap);
1347
1348 spin_lock_irqsave(&iommu->lock, flags);
1349
1350 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1351 if (num >= ndomains) {
1352 spin_unlock_irqrestore(&iommu->lock, flags);
1353 printk(KERN_ERR "IOMMU: no free domain ids\n");
1354 return -ENOMEM;
1355 }
1356
1357 domain->id = num;
1358 set_bit(num, iommu->domain_ids);
1359 set_bit(iommu->seq_id, &domain->iommu_bmp);
1360 iommu->domains[num] = domain;
1361 spin_unlock_irqrestore(&iommu->lock, flags);
1362
1363 return 0;
1364 }
1365
1366 static void iommu_detach_domain(struct dmar_domain *domain,
1367 struct intel_iommu *iommu)
1368 {
1369 unsigned long flags;
1370 int num, ndomains;
1371 int found = 0;
1372
1373 spin_lock_irqsave(&iommu->lock, flags);
1374 ndomains = cap_ndoms(iommu->cap);
1375 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1376 if (iommu->domains[num] == domain) {
1377 found = 1;
1378 break;
1379 }
1380 }
1381
1382 if (found) {
1383 clear_bit(num, iommu->domain_ids);
1384 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1385 iommu->domains[num] = NULL;
1386 }
1387 spin_unlock_irqrestore(&iommu->lock, flags);
1388 }
1389
1390 static struct iova_domain reserved_iova_list;
1391 static struct lock_class_key reserved_rbtree_key;
1392
1393 static int dmar_init_reserved_ranges(void)
1394 {
1395 struct pci_dev *pdev = NULL;
1396 struct iova *iova;
1397 int i;
1398
1399 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1400
1401 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1402 &reserved_rbtree_key);
1403
1404 /* IOAPIC ranges shouldn't be accessed by DMA */
1405 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1406 IOVA_PFN(IOAPIC_RANGE_END));
1407 if (!iova) {
1408 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1409 return -ENODEV;
1410 }
1411
1412 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1413 for_each_pci_dev(pdev) {
1414 struct resource *r;
1415
1416 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1417 r = &pdev->resource[i];
1418 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1419 continue;
1420 iova = reserve_iova(&reserved_iova_list,
1421 IOVA_PFN(r->start),
1422 IOVA_PFN(r->end));
1423 if (!iova) {
1424 printk(KERN_ERR "Reserve iova failed\n");
1425 return -ENODEV;
1426 }
1427 }
1428 }
1429 return 0;
1430 }
1431
1432 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1433 {
1434 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1435 }
1436
1437 static inline int guestwidth_to_adjustwidth(int gaw)
1438 {
1439 int agaw;
1440 int r = (gaw - 12) % 9;
1441
1442 if (r == 0)
1443 agaw = gaw;
1444 else
1445 agaw = gaw + 9 - r;
1446 if (agaw > 64)
1447 agaw = 64;
1448 return agaw;
1449 }
1450
1451 static int domain_init(struct dmar_domain *domain, int guest_width)
1452 {
1453 struct intel_iommu *iommu;
1454 int adjust_width, agaw;
1455 unsigned long sagaw;
1456
1457 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1458 spin_lock_init(&domain->iommu_lock);
1459
1460 domain_reserve_special_ranges(domain);
1461
1462 /* calculate AGAW */
1463 iommu = domain_get_iommu(domain);
1464 if (guest_width > cap_mgaw(iommu->cap))
1465 guest_width = cap_mgaw(iommu->cap);
1466 domain->gaw = guest_width;
1467 adjust_width = guestwidth_to_adjustwidth(guest_width);
1468 agaw = width_to_agaw(adjust_width);
1469 sagaw = cap_sagaw(iommu->cap);
1470 if (!test_bit(agaw, &sagaw)) {
1471 /* hardware doesn't support it, choose a bigger one */
1472 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1473 agaw = find_next_bit(&sagaw, 5, agaw);
1474 if (agaw >= 5)
1475 return -ENODEV;
1476 }
1477 domain->agaw = agaw;
1478 INIT_LIST_HEAD(&domain->devices);
1479
1480 if (ecap_coherent(iommu->ecap))
1481 domain->iommu_coherency = 1;
1482 else
1483 domain->iommu_coherency = 0;
1484
1485 if (ecap_sc_support(iommu->ecap))
1486 domain->iommu_snooping = 1;
1487 else
1488 domain->iommu_snooping = 0;
1489
1490 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1491 domain->iommu_count = 1;
1492 domain->nid = iommu->node;
1493
1494 /* always allocate the top pgd */
1495 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1496 if (!domain->pgd)
1497 return -ENOMEM;
1498 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1499 return 0;
1500 }
1501
1502 static void domain_exit(struct dmar_domain *domain)
1503 {
1504 struct dmar_drhd_unit *drhd;
1505 struct intel_iommu *iommu;
1506
1507 /* Domain 0 is reserved, so dont process it */
1508 if (!domain)
1509 return;
1510
1511 /* Flush any lazy unmaps that may reference this domain */
1512 if (!intel_iommu_strict)
1513 flush_unmaps_timeout(0);
1514
1515 domain_remove_dev_info(domain);
1516 /* destroy iovas */
1517 put_iova_domain(&domain->iovad);
1518
1519 /* clear ptes */
1520 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1521
1522 /* free page tables */
1523 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1524
1525 for_each_active_iommu(iommu, drhd)
1526 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1527 iommu_detach_domain(domain, iommu);
1528
1529 free_domain_mem(domain);
1530 }
1531
1532 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1533 u8 bus, u8 devfn, int translation)
1534 {
1535 struct context_entry *context;
1536 unsigned long flags;
1537 struct intel_iommu *iommu;
1538 struct dma_pte *pgd;
1539 unsigned long num;
1540 unsigned long ndomains;
1541 int id;
1542 int agaw;
1543 struct device_domain_info *info = NULL;
1544
1545 pr_debug("Set context mapping for %02x:%02x.%d\n",
1546 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1547
1548 BUG_ON(!domain->pgd);
1549 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1550 translation != CONTEXT_TT_MULTI_LEVEL);
1551
1552 iommu = device_to_iommu(segment, bus, devfn);
1553 if (!iommu)
1554 return -ENODEV;
1555
1556 context = device_to_context_entry(iommu, bus, devfn);
1557 if (!context)
1558 return -ENOMEM;
1559 spin_lock_irqsave(&iommu->lock, flags);
1560 if (context_present(context)) {
1561 spin_unlock_irqrestore(&iommu->lock, flags);
1562 return 0;
1563 }
1564
1565 id = domain->id;
1566 pgd = domain->pgd;
1567
1568 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1569 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1570 int found = 0;
1571
1572 /* find an available domain id for this device in iommu */
1573 ndomains = cap_ndoms(iommu->cap);
1574 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1575 if (iommu->domains[num] == domain) {
1576 id = num;
1577 found = 1;
1578 break;
1579 }
1580 }
1581
1582 if (found == 0) {
1583 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1584 if (num >= ndomains) {
1585 spin_unlock_irqrestore(&iommu->lock, flags);
1586 printk(KERN_ERR "IOMMU: no free domain ids\n");
1587 return -EFAULT;
1588 }
1589
1590 set_bit(num, iommu->domain_ids);
1591 iommu->domains[num] = domain;
1592 id = num;
1593 }
1594
1595 /* Skip top levels of page tables for
1596 * iommu which has less agaw than default.
1597 * Unnecessary for PT mode.
1598 */
1599 if (translation != CONTEXT_TT_PASS_THROUGH) {
1600 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1601 pgd = phys_to_virt(dma_pte_addr(pgd));
1602 if (!dma_pte_present(pgd)) {
1603 spin_unlock_irqrestore(&iommu->lock, flags);
1604 return -ENOMEM;
1605 }
1606 }
1607 }
1608 }
1609
1610 context_set_domain_id(context, id);
1611
1612 if (translation != CONTEXT_TT_PASS_THROUGH) {
1613 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1614 translation = info ? CONTEXT_TT_DEV_IOTLB :
1615 CONTEXT_TT_MULTI_LEVEL;
1616 }
1617 /*
1618 * In pass through mode, AW must be programmed to indicate the largest
1619 * AGAW value supported by hardware. And ASR is ignored by hardware.
1620 */
1621 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1622 context_set_address_width(context, iommu->msagaw);
1623 else {
1624 context_set_address_root(context, virt_to_phys(pgd));
1625 context_set_address_width(context, iommu->agaw);
1626 }
1627
1628 context_set_translation_type(context, translation);
1629 context_set_fault_enable(context);
1630 context_set_present(context);
1631 domain_flush_cache(domain, context, sizeof(*context));
1632
1633 /*
1634 * It's a non-present to present mapping. If hardware doesn't cache
1635 * non-present entry we only need to flush the write-buffer. If the
1636 * _does_ cache non-present entries, then it does so in the special
1637 * domain #0, which we have to flush:
1638 */
1639 if (cap_caching_mode(iommu->cap)) {
1640 iommu->flush.flush_context(iommu, 0,
1641 (((u16)bus) << 8) | devfn,
1642 DMA_CCMD_MASK_NOBIT,
1643 DMA_CCMD_DEVICE_INVL);
1644 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1645 } else {
1646 iommu_flush_write_buffer(iommu);
1647 }
1648 iommu_enable_dev_iotlb(info);
1649 spin_unlock_irqrestore(&iommu->lock, flags);
1650
1651 spin_lock_irqsave(&domain->iommu_lock, flags);
1652 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1653 domain->iommu_count++;
1654 if (domain->iommu_count == 1)
1655 domain->nid = iommu->node;
1656 domain_update_iommu_cap(domain);
1657 }
1658 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1659 return 0;
1660 }
1661
1662 static int
1663 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1664 int translation)
1665 {
1666 int ret;
1667 struct pci_dev *tmp, *parent;
1668
1669 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1670 pdev->bus->number, pdev->devfn,
1671 translation);
1672 if (ret)
1673 return ret;
1674
1675 /* dependent device mapping */
1676 tmp = pci_find_upstream_pcie_bridge(pdev);
1677 if (!tmp)
1678 return 0;
1679 /* Secondary interface's bus number and devfn 0 */
1680 parent = pdev->bus->self;
1681 while (parent != tmp) {
1682 ret = domain_context_mapping_one(domain,
1683 pci_domain_nr(parent->bus),
1684 parent->bus->number,
1685 parent->devfn, translation);
1686 if (ret)
1687 return ret;
1688 parent = parent->bus->self;
1689 }
1690 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1691 return domain_context_mapping_one(domain,
1692 pci_domain_nr(tmp->subordinate),
1693 tmp->subordinate->number, 0,
1694 translation);
1695 else /* this is a legacy PCI bridge */
1696 return domain_context_mapping_one(domain,
1697 pci_domain_nr(tmp->bus),
1698 tmp->bus->number,
1699 tmp->devfn,
1700 translation);
1701 }
1702
1703 static int domain_context_mapped(struct pci_dev *pdev)
1704 {
1705 int ret;
1706 struct pci_dev *tmp, *parent;
1707 struct intel_iommu *iommu;
1708
1709 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1710 pdev->devfn);
1711 if (!iommu)
1712 return -ENODEV;
1713
1714 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1715 if (!ret)
1716 return ret;
1717 /* dependent device mapping */
1718 tmp = pci_find_upstream_pcie_bridge(pdev);
1719 if (!tmp)
1720 return ret;
1721 /* Secondary interface's bus number and devfn 0 */
1722 parent = pdev->bus->self;
1723 while (parent != tmp) {
1724 ret = device_context_mapped(iommu, parent->bus->number,
1725 parent->devfn);
1726 if (!ret)
1727 return ret;
1728 parent = parent->bus->self;
1729 }
1730 if (pci_is_pcie(tmp))
1731 return device_context_mapped(iommu, tmp->subordinate->number,
1732 0);
1733 else
1734 return device_context_mapped(iommu, tmp->bus->number,
1735 tmp->devfn);
1736 }
1737
1738 /* Returns a number of VTD pages, but aligned to MM page size */
1739 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1740 size_t size)
1741 {
1742 host_addr &= ~PAGE_MASK;
1743 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1744 }
1745
1746 /* Return largest possible superpage level for a given mapping */
1747 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1748 unsigned long iov_pfn,
1749 unsigned long phy_pfn,
1750 unsigned long pages)
1751 {
1752 int support, level = 1;
1753 unsigned long pfnmerge;
1754
1755 support = domain->iommu_superpage;
1756
1757 /* To use a large page, the virtual *and* physical addresses
1758 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1759 of them will mean we have to use smaller pages. So just
1760 merge them and check both at once. */
1761 pfnmerge = iov_pfn | phy_pfn;
1762
1763 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1764 pages >>= VTD_STRIDE_SHIFT;
1765 if (!pages)
1766 break;
1767 pfnmerge >>= VTD_STRIDE_SHIFT;
1768 level++;
1769 support--;
1770 }
1771 return level;
1772 }
1773
1774 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1775 struct scatterlist *sg, unsigned long phys_pfn,
1776 unsigned long nr_pages, int prot)
1777 {
1778 struct dma_pte *first_pte = NULL, *pte = NULL;
1779 phys_addr_t uninitialized_var(pteval);
1780 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1781 unsigned long sg_res;
1782 unsigned int largepage_lvl = 0;
1783 unsigned long lvl_pages = 0;
1784
1785 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1786
1787 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1788 return -EINVAL;
1789
1790 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1791
1792 if (sg)
1793 sg_res = 0;
1794 else {
1795 sg_res = nr_pages + 1;
1796 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1797 }
1798
1799 while (nr_pages > 0) {
1800 uint64_t tmp;
1801
1802 if (!sg_res) {
1803 sg_res = aligned_nrpages(sg->offset, sg->length);
1804 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1805 sg->dma_length = sg->length;
1806 pteval = page_to_phys(sg_page(sg)) | prot;
1807 phys_pfn = pteval >> VTD_PAGE_SHIFT;
1808 }
1809
1810 if (!pte) {
1811 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1812
1813 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1814 if (!pte)
1815 return -ENOMEM;
1816 /* It is large page*/
1817 if (largepage_lvl > 1)
1818 pteval |= DMA_PTE_LARGE_PAGE;
1819 else
1820 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1821
1822 }
1823 /* We don't need lock here, nobody else
1824 * touches the iova range
1825 */
1826 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1827 if (tmp) {
1828 static int dumps = 5;
1829 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1830 iov_pfn, tmp, (unsigned long long)pteval);
1831 if (dumps) {
1832 dumps--;
1833 debug_dma_dump_mappings(NULL);
1834 }
1835 WARN_ON(1);
1836 }
1837
1838 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1839
1840 BUG_ON(nr_pages < lvl_pages);
1841 BUG_ON(sg_res < lvl_pages);
1842
1843 nr_pages -= lvl_pages;
1844 iov_pfn += lvl_pages;
1845 phys_pfn += lvl_pages;
1846 pteval += lvl_pages * VTD_PAGE_SIZE;
1847 sg_res -= lvl_pages;
1848
1849 /* If the next PTE would be the first in a new page, then we
1850 need to flush the cache on the entries we've just written.
1851 And then we'll need to recalculate 'pte', so clear it and
1852 let it get set again in the if (!pte) block above.
1853
1854 If we're done (!nr_pages) we need to flush the cache too.
1855
1856 Also if we've been setting superpages, we may need to
1857 recalculate 'pte' and switch back to smaller pages for the
1858 end of the mapping, if the trailing size is not enough to
1859 use another superpage (i.e. sg_res < lvl_pages). */
1860 pte++;
1861 if (!nr_pages || first_pte_in_page(pte) ||
1862 (largepage_lvl > 1 && sg_res < lvl_pages)) {
1863 domain_flush_cache(domain, first_pte,
1864 (void *)pte - (void *)first_pte);
1865 pte = NULL;
1866 }
1867
1868 if (!sg_res && nr_pages)
1869 sg = sg_next(sg);
1870 }
1871 return 0;
1872 }
1873
1874 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1875 struct scatterlist *sg, unsigned long nr_pages,
1876 int prot)
1877 {
1878 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1879 }
1880
1881 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1882 unsigned long phys_pfn, unsigned long nr_pages,
1883 int prot)
1884 {
1885 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1886 }
1887
1888 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1889 {
1890 if (!iommu)
1891 return;
1892
1893 clear_context_table(iommu, bus, devfn);
1894 iommu->flush.flush_context(iommu, 0, 0, 0,
1895 DMA_CCMD_GLOBAL_INVL);
1896 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1897 }
1898
1899 static void domain_remove_dev_info(struct dmar_domain *domain)
1900 {
1901 struct device_domain_info *info;
1902 unsigned long flags;
1903 struct intel_iommu *iommu;
1904
1905 spin_lock_irqsave(&device_domain_lock, flags);
1906 while (!list_empty(&domain->devices)) {
1907 info = list_entry(domain->devices.next,
1908 struct device_domain_info, link);
1909 list_del(&info->link);
1910 list_del(&info->global);
1911 if (info->dev)
1912 info->dev->dev.archdata.iommu = NULL;
1913 spin_unlock_irqrestore(&device_domain_lock, flags);
1914
1915 iommu_disable_dev_iotlb(info);
1916 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1917 iommu_detach_dev(iommu, info->bus, info->devfn);
1918 free_devinfo_mem(info);
1919
1920 spin_lock_irqsave(&device_domain_lock, flags);
1921 }
1922 spin_unlock_irqrestore(&device_domain_lock, flags);
1923 }
1924
1925 /*
1926 * find_domain
1927 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1928 */
1929 static struct dmar_domain *
1930 find_domain(struct pci_dev *pdev)
1931 {
1932 struct device_domain_info *info;
1933
1934 /* No lock here, assumes no domain exit in normal case */
1935 info = pdev->dev.archdata.iommu;
1936 if (info)
1937 return info->domain;
1938 return NULL;
1939 }
1940
1941 /* domain is initialized */
1942 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1943 {
1944 struct dmar_domain *domain, *found = NULL;
1945 struct intel_iommu *iommu;
1946 struct dmar_drhd_unit *drhd;
1947 struct device_domain_info *info, *tmp;
1948 struct pci_dev *dev_tmp;
1949 unsigned long flags;
1950 int bus = 0, devfn = 0;
1951 int segment;
1952 int ret;
1953
1954 domain = find_domain(pdev);
1955 if (domain)
1956 return domain;
1957
1958 segment = pci_domain_nr(pdev->bus);
1959
1960 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1961 if (dev_tmp) {
1962 if (pci_is_pcie(dev_tmp)) {
1963 bus = dev_tmp->subordinate->number;
1964 devfn = 0;
1965 } else {
1966 bus = dev_tmp->bus->number;
1967 devfn = dev_tmp->devfn;
1968 }
1969 spin_lock_irqsave(&device_domain_lock, flags);
1970 list_for_each_entry(info, &device_domain_list, global) {
1971 if (info->segment == segment &&
1972 info->bus == bus && info->devfn == devfn) {
1973 found = info->domain;
1974 break;
1975 }
1976 }
1977 spin_unlock_irqrestore(&device_domain_lock, flags);
1978 /* pcie-pci bridge already has a domain, uses it */
1979 if (found) {
1980 domain = found;
1981 goto found_domain;
1982 }
1983 }
1984
1985 domain = alloc_domain();
1986 if (!domain)
1987 goto error;
1988
1989 /* Allocate new domain for the device */
1990 drhd = dmar_find_matched_drhd_unit(pdev);
1991 if (!drhd) {
1992 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1993 pci_name(pdev));
1994 return NULL;
1995 }
1996 iommu = drhd->iommu;
1997
1998 ret = iommu_attach_domain(domain, iommu);
1999 if (ret) {
2000 free_domain_mem(domain);
2001 goto error;
2002 }
2003
2004 if (domain_init(domain, gaw)) {
2005 domain_exit(domain);
2006 goto error;
2007 }
2008
2009 /* register pcie-to-pci device */
2010 if (dev_tmp) {
2011 info = alloc_devinfo_mem();
2012 if (!info) {
2013 domain_exit(domain);
2014 goto error;
2015 }
2016 info->segment = segment;
2017 info->bus = bus;
2018 info->devfn = devfn;
2019 info->dev = NULL;
2020 info->domain = domain;
2021 /* This domain is shared by devices under p2p bridge */
2022 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2023
2024 /* pcie-to-pci bridge already has a domain, uses it */
2025 found = NULL;
2026 spin_lock_irqsave(&device_domain_lock, flags);
2027 list_for_each_entry(tmp, &device_domain_list, global) {
2028 if (tmp->segment == segment &&
2029 tmp->bus == bus && tmp->devfn == devfn) {
2030 found = tmp->domain;
2031 break;
2032 }
2033 }
2034 if (found) {
2035 spin_unlock_irqrestore(&device_domain_lock, flags);
2036 free_devinfo_mem(info);
2037 domain_exit(domain);
2038 domain = found;
2039 } else {
2040 list_add(&info->link, &domain->devices);
2041 list_add(&info->global, &device_domain_list);
2042 spin_unlock_irqrestore(&device_domain_lock, flags);
2043 }
2044 }
2045
2046 found_domain:
2047 info = alloc_devinfo_mem();
2048 if (!info)
2049 goto error;
2050 info->segment = segment;
2051 info->bus = pdev->bus->number;
2052 info->devfn = pdev->devfn;
2053 info->dev = pdev;
2054 info->domain = domain;
2055 spin_lock_irqsave(&device_domain_lock, flags);
2056 /* somebody is fast */
2057 found = find_domain(pdev);
2058 if (found != NULL) {
2059 spin_unlock_irqrestore(&device_domain_lock, flags);
2060 if (found != domain) {
2061 domain_exit(domain);
2062 domain = found;
2063 }
2064 free_devinfo_mem(info);
2065 return domain;
2066 }
2067 list_add(&info->link, &domain->devices);
2068 list_add(&info->global, &device_domain_list);
2069 pdev->dev.archdata.iommu = info;
2070 spin_unlock_irqrestore(&device_domain_lock, flags);
2071 return domain;
2072 error:
2073 /* recheck it here, maybe others set it */
2074 return find_domain(pdev);
2075 }
2076
2077 static int iommu_identity_mapping;
2078 #define IDENTMAP_ALL 1
2079 #define IDENTMAP_GFX 2
2080 #define IDENTMAP_AZALIA 4
2081
2082 static int iommu_domain_identity_map(struct dmar_domain *domain,
2083 unsigned long long start,
2084 unsigned long long end)
2085 {
2086 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2087 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2088
2089 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2090 dma_to_mm_pfn(last_vpfn))) {
2091 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2092 return -ENOMEM;
2093 }
2094
2095 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2096 start, end, domain->id);
2097 /*
2098 * RMRR range might have overlap with physical memory range,
2099 * clear it first
2100 */
2101 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2102
2103 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2104 last_vpfn - first_vpfn + 1,
2105 DMA_PTE_READ|DMA_PTE_WRITE);
2106 }
2107
2108 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2109 unsigned long long start,
2110 unsigned long long end)
2111 {
2112 struct dmar_domain *domain;
2113 int ret;
2114
2115 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2116 if (!domain)
2117 return -ENOMEM;
2118
2119 /* For _hardware_ passthrough, don't bother. But for software
2120 passthrough, we do it anyway -- it may indicate a memory
2121 range which is reserved in E820, so which didn't get set
2122 up to start with in si_domain */
2123 if (domain == si_domain && hw_pass_through) {
2124 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2125 pci_name(pdev), start, end);
2126 return 0;
2127 }
2128
2129 printk(KERN_INFO
2130 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2131 pci_name(pdev), start, end);
2132
2133 if (end < start) {
2134 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2135 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2136 dmi_get_system_info(DMI_BIOS_VENDOR),
2137 dmi_get_system_info(DMI_BIOS_VERSION),
2138 dmi_get_system_info(DMI_PRODUCT_VERSION));
2139 ret = -EIO;
2140 goto error;
2141 }
2142
2143 if (end >> agaw_to_width(domain->agaw)) {
2144 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2145 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2146 agaw_to_width(domain->agaw),
2147 dmi_get_system_info(DMI_BIOS_VENDOR),
2148 dmi_get_system_info(DMI_BIOS_VERSION),
2149 dmi_get_system_info(DMI_PRODUCT_VERSION));
2150 ret = -EIO;
2151 goto error;
2152 }
2153
2154 ret = iommu_domain_identity_map(domain, start, end);
2155 if (ret)
2156 goto error;
2157
2158 /* context entry init */
2159 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2160 if (ret)
2161 goto error;
2162
2163 return 0;
2164
2165 error:
2166 domain_exit(domain);
2167 return ret;
2168 }
2169
2170 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2171 struct pci_dev *pdev)
2172 {
2173 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2174 return 0;
2175 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2176 rmrr->end_address);
2177 }
2178
2179 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2180 static inline void iommu_prepare_isa(void)
2181 {
2182 struct pci_dev *pdev;
2183 int ret;
2184
2185 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2186 if (!pdev)
2187 return;
2188
2189 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2190 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2191
2192 if (ret)
2193 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2194 "floppy might not work\n");
2195
2196 }
2197 #else
2198 static inline void iommu_prepare_isa(void)
2199 {
2200 return;
2201 }
2202 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2203
2204 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2205
2206 static int __init si_domain_work_fn(unsigned long start_pfn,
2207 unsigned long end_pfn, void *datax)
2208 {
2209 int *ret = datax;
2210
2211 *ret = iommu_domain_identity_map(si_domain,
2212 (uint64_t)start_pfn << PAGE_SHIFT,
2213 (uint64_t)end_pfn << PAGE_SHIFT);
2214 return *ret;
2215
2216 }
2217
2218 static int __init si_domain_init(int hw)
2219 {
2220 struct dmar_drhd_unit *drhd;
2221 struct intel_iommu *iommu;
2222 int nid, ret = 0;
2223
2224 si_domain = alloc_domain();
2225 if (!si_domain)
2226 return -EFAULT;
2227
2228 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2229
2230 for_each_active_iommu(iommu, drhd) {
2231 ret = iommu_attach_domain(si_domain, iommu);
2232 if (ret) {
2233 domain_exit(si_domain);
2234 return -EFAULT;
2235 }
2236 }
2237
2238 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2239 domain_exit(si_domain);
2240 return -EFAULT;
2241 }
2242
2243 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2244
2245 if (hw)
2246 return 0;
2247
2248 for_each_online_node(nid) {
2249 work_with_active_regions(nid, si_domain_work_fn, &ret);
2250 if (ret)
2251 return ret;
2252 }
2253
2254 return 0;
2255 }
2256
2257 static void domain_remove_one_dev_info(struct dmar_domain *domain,
2258 struct pci_dev *pdev);
2259 static int identity_mapping(struct pci_dev *pdev)
2260 {
2261 struct device_domain_info *info;
2262
2263 if (likely(!iommu_identity_mapping))
2264 return 0;
2265
2266 info = pdev->dev.archdata.iommu;
2267 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2268 return (info->domain == si_domain);
2269
2270 return 0;
2271 }
2272
2273 static int domain_add_dev_info(struct dmar_domain *domain,
2274 struct pci_dev *pdev,
2275 int translation)
2276 {
2277 struct device_domain_info *info;
2278 unsigned long flags;
2279 int ret;
2280
2281 info = alloc_devinfo_mem();
2282 if (!info)
2283 return -ENOMEM;
2284
2285 ret = domain_context_mapping(domain, pdev, translation);
2286 if (ret) {
2287 free_devinfo_mem(info);
2288 return ret;
2289 }
2290
2291 info->segment = pci_domain_nr(pdev->bus);
2292 info->bus = pdev->bus->number;
2293 info->devfn = pdev->devfn;
2294 info->dev = pdev;
2295 info->domain = domain;
2296
2297 spin_lock_irqsave(&device_domain_lock, flags);
2298 list_add(&info->link, &domain->devices);
2299 list_add(&info->global, &device_domain_list);
2300 pdev->dev.archdata.iommu = info;
2301 spin_unlock_irqrestore(&device_domain_lock, flags);
2302
2303 return 0;
2304 }
2305
2306 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2307 {
2308 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2309 return 1;
2310
2311 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2312 return 1;
2313
2314 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2315 return 0;
2316
2317 /*
2318 * We want to start off with all devices in the 1:1 domain, and
2319 * take them out later if we find they can't access all of memory.
2320 *
2321 * However, we can't do this for PCI devices behind bridges,
2322 * because all PCI devices behind the same bridge will end up
2323 * with the same source-id on their transactions.
2324 *
2325 * Practically speaking, we can't change things around for these
2326 * devices at run-time, because we can't be sure there'll be no
2327 * DMA transactions in flight for any of their siblings.
2328 *
2329 * So PCI devices (unless they're on the root bus) as well as
2330 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2331 * the 1:1 domain, just in _case_ one of their siblings turns out
2332 * not to be able to map all of memory.
2333 */
2334 if (!pci_is_pcie(pdev)) {
2335 if (!pci_is_root_bus(pdev->bus))
2336 return 0;
2337 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2338 return 0;
2339 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2340 return 0;
2341
2342 /*
2343 * At boot time, we don't yet know if devices will be 64-bit capable.
2344 * Assume that they will -- if they turn out not to be, then we can
2345 * take them out of the 1:1 domain later.
2346 */
2347 if (!startup) {
2348 /*
2349 * If the device's dma_mask is less than the system's memory
2350 * size then this is not a candidate for identity mapping.
2351 */
2352 u64 dma_mask = pdev->dma_mask;
2353
2354 if (pdev->dev.coherent_dma_mask &&
2355 pdev->dev.coherent_dma_mask < dma_mask)
2356 dma_mask = pdev->dev.coherent_dma_mask;
2357
2358 return dma_mask >= dma_get_required_mask(&pdev->dev);
2359 }
2360
2361 return 1;
2362 }
2363
2364 static int __init iommu_prepare_static_identity_mapping(int hw)
2365 {
2366 struct pci_dev *pdev = NULL;
2367 int ret;
2368
2369 ret = si_domain_init(hw);
2370 if (ret)
2371 return -EFAULT;
2372
2373 for_each_pci_dev(pdev) {
2374 /* Skip Host/PCI Bridge devices */
2375 if (IS_BRIDGE_HOST_DEVICE(pdev))
2376 continue;
2377 if (iommu_should_identity_map(pdev, 1)) {
2378 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2379 hw ? "hardware" : "software", pci_name(pdev));
2380
2381 ret = domain_add_dev_info(si_domain, pdev,
2382 hw ? CONTEXT_TT_PASS_THROUGH :
2383 CONTEXT_TT_MULTI_LEVEL);
2384 if (ret)
2385 return ret;
2386 }
2387 }
2388
2389 return 0;
2390 }
2391
2392 static int __init init_dmars(void)
2393 {
2394 struct dmar_drhd_unit *drhd;
2395 struct dmar_rmrr_unit *rmrr;
2396 struct pci_dev *pdev;
2397 struct intel_iommu *iommu;
2398 int i, ret;
2399
2400 /*
2401 * for each drhd
2402 * allocate root
2403 * initialize and program root entry to not present
2404 * endfor
2405 */
2406 for_each_drhd_unit(drhd) {
2407 g_num_of_iommus++;
2408 /*
2409 * lock not needed as this is only incremented in the single
2410 * threaded kernel __init code path all other access are read
2411 * only
2412 */
2413 }
2414
2415 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2416 GFP_KERNEL);
2417 if (!g_iommus) {
2418 printk(KERN_ERR "Allocating global iommu array failed\n");
2419 ret = -ENOMEM;
2420 goto error;
2421 }
2422
2423 deferred_flush = kzalloc(g_num_of_iommus *
2424 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2425 if (!deferred_flush) {
2426 ret = -ENOMEM;
2427 goto error;
2428 }
2429
2430 for_each_drhd_unit(drhd) {
2431 if (drhd->ignored)
2432 continue;
2433
2434 iommu = drhd->iommu;
2435 g_iommus[iommu->seq_id] = iommu;
2436
2437 ret = iommu_init_domains(iommu);
2438 if (ret)
2439 goto error;
2440
2441 /*
2442 * TBD:
2443 * we could share the same root & context tables
2444 * among all IOMMU's. Need to Split it later.
2445 */
2446 ret = iommu_alloc_root_entry(iommu);
2447 if (ret) {
2448 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2449 goto error;
2450 }
2451 if (!ecap_pass_through(iommu->ecap))
2452 hw_pass_through = 0;
2453 }
2454
2455 /*
2456 * Start from the sane iommu hardware state.
2457 */
2458 for_each_drhd_unit(drhd) {
2459 if (drhd->ignored)
2460 continue;
2461
2462 iommu = drhd->iommu;
2463
2464 /*
2465 * If the queued invalidation is already initialized by us
2466 * (for example, while enabling interrupt-remapping) then
2467 * we got the things already rolling from a sane state.
2468 */
2469 if (iommu->qi)
2470 continue;
2471
2472 /*
2473 * Clear any previous faults.
2474 */
2475 dmar_fault(-1, iommu);
2476 /*
2477 * Disable queued invalidation if supported and already enabled
2478 * before OS handover.
2479 */
2480 dmar_disable_qi(iommu);
2481 }
2482
2483 for_each_drhd_unit(drhd) {
2484 if (drhd->ignored)
2485 continue;
2486
2487 iommu = drhd->iommu;
2488
2489 if (dmar_enable_qi(iommu)) {
2490 /*
2491 * Queued Invalidate not enabled, use Register Based
2492 * Invalidate
2493 */
2494 iommu->flush.flush_context = __iommu_flush_context;
2495 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2496 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2497 "invalidation\n",
2498 iommu->seq_id,
2499 (unsigned long long)drhd->reg_base_addr);
2500 } else {
2501 iommu->flush.flush_context = qi_flush_context;
2502 iommu->flush.flush_iotlb = qi_flush_iotlb;
2503 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2504 "invalidation\n",
2505 iommu->seq_id,
2506 (unsigned long long)drhd->reg_base_addr);
2507 }
2508 }
2509
2510 if (iommu_pass_through)
2511 iommu_identity_mapping |= IDENTMAP_ALL;
2512
2513 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2514 iommu_identity_mapping |= IDENTMAP_GFX;
2515 #endif
2516
2517 check_tylersburg_isoch();
2518
2519 /*
2520 * If pass through is not set or not enabled, setup context entries for
2521 * identity mappings for rmrr, gfx, and isa and may fall back to static
2522 * identity mapping if iommu_identity_mapping is set.
2523 */
2524 if (iommu_identity_mapping) {
2525 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2526 if (ret) {
2527 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2528 goto error;
2529 }
2530 }
2531 /*
2532 * For each rmrr
2533 * for each dev attached to rmrr
2534 * do
2535 * locate drhd for dev, alloc domain for dev
2536 * allocate free domain
2537 * allocate page table entries for rmrr
2538 * if context not allocated for bus
2539 * allocate and init context
2540 * set present in root table for this bus
2541 * init context with domain, translation etc
2542 * endfor
2543 * endfor
2544 */
2545 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2546 for_each_rmrr_units(rmrr) {
2547 for (i = 0; i < rmrr->devices_cnt; i++) {
2548 pdev = rmrr->devices[i];
2549 /*
2550 * some BIOS lists non-exist devices in DMAR
2551 * table.
2552 */
2553 if (!pdev)
2554 continue;
2555 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2556 if (ret)
2557 printk(KERN_ERR
2558 "IOMMU: mapping reserved region failed\n");
2559 }
2560 }
2561
2562 iommu_prepare_isa();
2563
2564 /*
2565 * for each drhd
2566 * enable fault log
2567 * global invalidate context cache
2568 * global invalidate iotlb
2569 * enable translation
2570 */
2571 for_each_drhd_unit(drhd) {
2572 if (drhd->ignored) {
2573 /*
2574 * we always have to disable PMRs or DMA may fail on
2575 * this device
2576 */
2577 if (force_on)
2578 iommu_disable_protect_mem_regions(drhd->iommu);
2579 continue;
2580 }
2581 iommu = drhd->iommu;
2582
2583 iommu_flush_write_buffer(iommu);
2584
2585 ret = dmar_set_interrupt(iommu);
2586 if (ret)
2587 goto error;
2588
2589 iommu_set_root_entry(iommu);
2590
2591 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2592 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2593
2594 ret = iommu_enable_translation(iommu);
2595 if (ret)
2596 goto error;
2597
2598 iommu_disable_protect_mem_regions(iommu);
2599 }
2600
2601 return 0;
2602 error:
2603 for_each_drhd_unit(drhd) {
2604 if (drhd->ignored)
2605 continue;
2606 iommu = drhd->iommu;
2607 free_iommu(iommu);
2608 }
2609 kfree(g_iommus);
2610 return ret;
2611 }
2612
2613 /* This takes a number of _MM_ pages, not VTD pages */
2614 static struct iova *intel_alloc_iova(struct device *dev,
2615 struct dmar_domain *domain,
2616 unsigned long nrpages, uint64_t dma_mask)
2617 {
2618 struct pci_dev *pdev = to_pci_dev(dev);
2619 struct iova *iova = NULL;
2620
2621 /* Restrict dma_mask to the width that the iommu can handle */
2622 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2623
2624 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2625 /*
2626 * First try to allocate an io virtual address in
2627 * DMA_BIT_MASK(32) and if that fails then try allocating
2628 * from higher range
2629 */
2630 iova = alloc_iova(&domain->iovad, nrpages,
2631 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2632 if (iova)
2633 return iova;
2634 }
2635 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2636 if (unlikely(!iova)) {
2637 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2638 nrpages, pci_name(pdev));
2639 return NULL;
2640 }
2641
2642 return iova;
2643 }
2644
2645 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2646 {
2647 struct dmar_domain *domain;
2648 int ret;
2649
2650 domain = get_domain_for_dev(pdev,
2651 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2652 if (!domain) {
2653 printk(KERN_ERR
2654 "Allocating domain for %s failed", pci_name(pdev));
2655 return NULL;
2656 }
2657
2658 /* make sure context mapping is ok */
2659 if (unlikely(!domain_context_mapped(pdev))) {
2660 ret = domain_context_mapping(domain, pdev,
2661 CONTEXT_TT_MULTI_LEVEL);
2662 if (ret) {
2663 printk(KERN_ERR
2664 "Domain context map for %s failed",
2665 pci_name(pdev));
2666 return NULL;
2667 }
2668 }
2669
2670 return domain;
2671 }
2672
2673 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2674 {
2675 struct device_domain_info *info;
2676
2677 /* No lock here, assumes no domain exit in normal case */
2678 info = dev->dev.archdata.iommu;
2679 if (likely(info))
2680 return info->domain;
2681
2682 return __get_valid_domain_for_dev(dev);
2683 }
2684
2685 static int iommu_dummy(struct pci_dev *pdev)
2686 {
2687 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2688 }
2689
2690 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2691 static int iommu_no_mapping(struct device *dev)
2692 {
2693 struct pci_dev *pdev;
2694 int found;
2695
2696 if (unlikely(dev->bus != &pci_bus_type))
2697 return 1;
2698
2699 pdev = to_pci_dev(dev);
2700 if (iommu_dummy(pdev))
2701 return 1;
2702
2703 if (!iommu_identity_mapping)
2704 return 0;
2705
2706 found = identity_mapping(pdev);
2707 if (found) {
2708 if (iommu_should_identity_map(pdev, 0))
2709 return 1;
2710 else {
2711 /*
2712 * 32 bit DMA is removed from si_domain and fall back
2713 * to non-identity mapping.
2714 */
2715 domain_remove_one_dev_info(si_domain, pdev);
2716 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2717 pci_name(pdev));
2718 return 0;
2719 }
2720 } else {
2721 /*
2722 * In case of a detached 64 bit DMA device from vm, the device
2723 * is put into si_domain for identity mapping.
2724 */
2725 if (iommu_should_identity_map(pdev, 0)) {
2726 int ret;
2727 ret = domain_add_dev_info(si_domain, pdev,
2728 hw_pass_through ?
2729 CONTEXT_TT_PASS_THROUGH :
2730 CONTEXT_TT_MULTI_LEVEL);
2731 if (!ret) {
2732 printk(KERN_INFO "64bit %s uses identity mapping\n",
2733 pci_name(pdev));
2734 return 1;
2735 }
2736 }
2737 }
2738
2739 return 0;
2740 }
2741
2742 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2743 size_t size, int dir, u64 dma_mask)
2744 {
2745 struct pci_dev *pdev = to_pci_dev(hwdev);
2746 struct dmar_domain *domain;
2747 phys_addr_t start_paddr;
2748 struct iova *iova;
2749 int prot = 0;
2750 int ret;
2751 struct intel_iommu *iommu;
2752 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2753
2754 BUG_ON(dir == DMA_NONE);
2755
2756 if (iommu_no_mapping(hwdev))
2757 return paddr;
2758
2759 domain = get_valid_domain_for_dev(pdev);
2760 if (!domain)
2761 return 0;
2762
2763 iommu = domain_get_iommu(domain);
2764 size = aligned_nrpages(paddr, size);
2765
2766 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2767 if (!iova)
2768 goto error;
2769
2770 /*
2771 * Check if DMAR supports zero-length reads on write only
2772 * mappings..
2773 */
2774 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2775 !cap_zlr(iommu->cap))
2776 prot |= DMA_PTE_READ;
2777 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2778 prot |= DMA_PTE_WRITE;
2779 /*
2780 * paddr - (paddr + size) might be partial page, we should map the whole
2781 * page. Note: if two part of one page are separately mapped, we
2782 * might have two guest_addr mapping to the same host paddr, but this
2783 * is not a big problem
2784 */
2785 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2786 mm_to_dma_pfn(paddr_pfn), size, prot);
2787 if (ret)
2788 goto error;
2789
2790 /* it's a non-present to present mapping. Only flush if caching mode */
2791 if (cap_caching_mode(iommu->cap))
2792 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2793 else
2794 iommu_flush_write_buffer(iommu);
2795
2796 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2797 start_paddr += paddr & ~PAGE_MASK;
2798 return start_paddr;
2799
2800 error:
2801 if (iova)
2802 __free_iova(&domain->iovad, iova);
2803 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2804 pci_name(pdev), size, (unsigned long long)paddr, dir);
2805 return 0;
2806 }
2807
2808 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2809 unsigned long offset, size_t size,
2810 enum dma_data_direction dir,
2811 struct dma_attrs *attrs)
2812 {
2813 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2814 dir, to_pci_dev(dev)->dma_mask);
2815 }
2816
2817 static void flush_unmaps(void)
2818 {
2819 int i, j;
2820
2821 timer_on = 0;
2822
2823 /* just flush them all */
2824 for (i = 0; i < g_num_of_iommus; i++) {
2825 struct intel_iommu *iommu = g_iommus[i];
2826 if (!iommu)
2827 continue;
2828
2829 if (!deferred_flush[i].next)
2830 continue;
2831
2832 /* In caching mode, global flushes turn emulation expensive */
2833 if (!cap_caching_mode(iommu->cap))
2834 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2835 DMA_TLB_GLOBAL_FLUSH);
2836 for (j = 0; j < deferred_flush[i].next; j++) {
2837 unsigned long mask;
2838 struct iova *iova = deferred_flush[i].iova[j];
2839 struct dmar_domain *domain = deferred_flush[i].domain[j];
2840
2841 /* On real hardware multiple invalidations are expensive */
2842 if (cap_caching_mode(iommu->cap))
2843 iommu_flush_iotlb_psi(iommu, domain->id,
2844 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2845 else {
2846 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2847 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2848 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2849 }
2850 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2851 }
2852 deferred_flush[i].next = 0;
2853 }
2854
2855 list_size = 0;
2856 }
2857
2858 static void flush_unmaps_timeout(unsigned long data)
2859 {
2860 unsigned long flags;
2861
2862 spin_lock_irqsave(&async_umap_flush_lock, flags);
2863 flush_unmaps();
2864 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2865 }
2866
2867 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2868 {
2869 unsigned long flags;
2870 int next, iommu_id;
2871 struct intel_iommu *iommu;
2872
2873 spin_lock_irqsave(&async_umap_flush_lock, flags);
2874 if (list_size == HIGH_WATER_MARK)
2875 flush_unmaps();
2876
2877 iommu = domain_get_iommu(dom);
2878 iommu_id = iommu->seq_id;
2879
2880 next = deferred_flush[iommu_id].next;
2881 deferred_flush[iommu_id].domain[next] = dom;
2882 deferred_flush[iommu_id].iova[next] = iova;
2883 deferred_flush[iommu_id].next++;
2884
2885 if (!timer_on) {
2886 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2887 timer_on = 1;
2888 }
2889 list_size++;
2890 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2891 }
2892
2893 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2894 size_t size, enum dma_data_direction dir,
2895 struct dma_attrs *attrs)
2896 {
2897 struct pci_dev *pdev = to_pci_dev(dev);
2898 struct dmar_domain *domain;
2899 unsigned long start_pfn, last_pfn;
2900 struct iova *iova;
2901 struct intel_iommu *iommu;
2902
2903 if (iommu_no_mapping(dev))
2904 return;
2905
2906 domain = find_domain(pdev);
2907 BUG_ON(!domain);
2908
2909 iommu = domain_get_iommu(domain);
2910
2911 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2912 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2913 (unsigned long long)dev_addr))
2914 return;
2915
2916 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2917 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2918
2919 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2920 pci_name(pdev), start_pfn, last_pfn);
2921
2922 /* clear the whole page */
2923 dma_pte_clear_range(domain, start_pfn, last_pfn);
2924
2925 /* free page tables */
2926 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2927
2928 if (intel_iommu_strict) {
2929 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2930 last_pfn - start_pfn + 1, 0);
2931 /* free iova */
2932 __free_iova(&domain->iovad, iova);
2933 } else {
2934 add_unmap(domain, iova);
2935 /*
2936 * queue up the release of the unmap to save the 1/6th of the
2937 * cpu used up by the iotlb flush operation...
2938 */
2939 }
2940 }
2941
2942 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2943 dma_addr_t *dma_handle, gfp_t flags)
2944 {
2945 void *vaddr;
2946 int order;
2947
2948 size = PAGE_ALIGN(size);
2949 order = get_order(size);
2950
2951 if (!iommu_no_mapping(hwdev))
2952 flags &= ~(GFP_DMA | GFP_DMA32);
2953 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2954 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2955 flags |= GFP_DMA;
2956 else
2957 flags |= GFP_DMA32;
2958 }
2959
2960 vaddr = (void *)__get_free_pages(flags, order);
2961 if (!vaddr)
2962 return NULL;
2963 memset(vaddr, 0, size);
2964
2965 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2966 DMA_BIDIRECTIONAL,
2967 hwdev->coherent_dma_mask);
2968 if (*dma_handle)
2969 return vaddr;
2970 free_pages((unsigned long)vaddr, order);
2971 return NULL;
2972 }
2973
2974 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2975 dma_addr_t dma_handle)
2976 {
2977 int order;
2978
2979 size = PAGE_ALIGN(size);
2980 order = get_order(size);
2981
2982 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2983 free_pages((unsigned long)vaddr, order);
2984 }
2985
2986 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2987 int nelems, enum dma_data_direction dir,
2988 struct dma_attrs *attrs)
2989 {
2990 struct pci_dev *pdev = to_pci_dev(hwdev);
2991 struct dmar_domain *domain;
2992 unsigned long start_pfn, last_pfn;
2993 struct iova *iova;
2994 struct intel_iommu *iommu;
2995
2996 if (iommu_no_mapping(hwdev))
2997 return;
2998
2999 domain = find_domain(pdev);
3000 BUG_ON(!domain);
3001
3002 iommu = domain_get_iommu(domain);
3003
3004 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3005 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3006 (unsigned long long)sglist[0].dma_address))
3007 return;
3008
3009 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3010 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3011
3012 /* clear the whole page */
3013 dma_pte_clear_range(domain, start_pfn, last_pfn);
3014
3015 /* free page tables */
3016 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
3017
3018 if (intel_iommu_strict) {
3019 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3020 last_pfn - start_pfn + 1, 0);
3021 /* free iova */
3022 __free_iova(&domain->iovad, iova);
3023 } else {
3024 add_unmap(domain, iova);
3025 /*
3026 * queue up the release of the unmap to save the 1/6th of the
3027 * cpu used up by the iotlb flush operation...
3028 */
3029 }
3030 }
3031
3032 static int intel_nontranslate_map_sg(struct device *hddev,
3033 struct scatterlist *sglist, int nelems, int dir)
3034 {
3035 int i;
3036 struct scatterlist *sg;
3037
3038 for_each_sg(sglist, sg, nelems, i) {
3039 BUG_ON(!sg_page(sg));
3040 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3041 sg->dma_length = sg->length;
3042 }
3043 return nelems;
3044 }
3045
3046 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3047 enum dma_data_direction dir, struct dma_attrs *attrs)
3048 {
3049 int i;
3050 struct pci_dev *pdev = to_pci_dev(hwdev);
3051 struct dmar_domain *domain;
3052 size_t size = 0;
3053 int prot = 0;
3054 struct iova *iova = NULL;
3055 int ret;
3056 struct scatterlist *sg;
3057 unsigned long start_vpfn;
3058 struct intel_iommu *iommu;
3059
3060 BUG_ON(dir == DMA_NONE);
3061 if (iommu_no_mapping(hwdev))
3062 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3063
3064 domain = get_valid_domain_for_dev(pdev);
3065 if (!domain)
3066 return 0;
3067
3068 iommu = domain_get_iommu(domain);
3069
3070 for_each_sg(sglist, sg, nelems, i)
3071 size += aligned_nrpages(sg->offset, sg->length);
3072
3073 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3074 pdev->dma_mask);
3075 if (!iova) {
3076 sglist->dma_length = 0;
3077 return 0;
3078 }
3079
3080 /*
3081 * Check if DMAR supports zero-length reads on write only
3082 * mappings..
3083 */
3084 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3085 !cap_zlr(iommu->cap))
3086 prot |= DMA_PTE_READ;
3087 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3088 prot |= DMA_PTE_WRITE;
3089
3090 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3091
3092 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3093 if (unlikely(ret)) {
3094 /* clear the page */
3095 dma_pte_clear_range(domain, start_vpfn,
3096 start_vpfn + size - 1);
3097 /* free page tables */
3098 dma_pte_free_pagetable(domain, start_vpfn,
3099 start_vpfn + size - 1);
3100 /* free iova */
3101 __free_iova(&domain->iovad, iova);
3102 return 0;
3103 }
3104
3105 /* it's a non-present to present mapping. Only flush if caching mode */
3106 if (cap_caching_mode(iommu->cap))
3107 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3108 else
3109 iommu_flush_write_buffer(iommu);
3110
3111 return nelems;
3112 }
3113
3114 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3115 {
3116 return !dma_addr;
3117 }
3118
3119 struct dma_map_ops intel_dma_ops = {
3120 .alloc_coherent = intel_alloc_coherent,
3121 .free_coherent = intel_free_coherent,
3122 .map_sg = intel_map_sg,
3123 .unmap_sg = intel_unmap_sg,
3124 .map_page = intel_map_page,
3125 .unmap_page = intel_unmap_page,
3126 .mapping_error = intel_mapping_error,
3127 };
3128
3129 static inline int iommu_domain_cache_init(void)
3130 {
3131 int ret = 0;
3132
3133 iommu_domain_cache = kmem_cache_create("iommu_domain",
3134 sizeof(struct dmar_domain),
3135 0,
3136 SLAB_HWCACHE_ALIGN,
3137
3138 NULL);
3139 if (!iommu_domain_cache) {
3140 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3141 ret = -ENOMEM;
3142 }
3143
3144 return ret;
3145 }
3146
3147 static inline int iommu_devinfo_cache_init(void)
3148 {
3149 int ret = 0;
3150
3151 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3152 sizeof(struct device_domain_info),
3153 0,
3154 SLAB_HWCACHE_ALIGN,
3155 NULL);
3156 if (!iommu_devinfo_cache) {
3157 printk(KERN_ERR "Couldn't create devinfo cache\n");
3158 ret = -ENOMEM;
3159 }
3160
3161 return ret;
3162 }
3163
3164 static inline int iommu_iova_cache_init(void)
3165 {
3166 int ret = 0;
3167
3168 iommu_iova_cache = kmem_cache_create("iommu_iova",
3169 sizeof(struct iova),
3170 0,
3171 SLAB_HWCACHE_ALIGN,
3172 NULL);
3173 if (!iommu_iova_cache) {
3174 printk(KERN_ERR "Couldn't create iova cache\n");
3175 ret = -ENOMEM;
3176 }
3177
3178 return ret;
3179 }
3180
3181 static int __init iommu_init_mempool(void)
3182 {
3183 int ret;
3184 ret = iommu_iova_cache_init();
3185 if (ret)
3186 return ret;
3187
3188 ret = iommu_domain_cache_init();
3189 if (ret)
3190 goto domain_error;
3191
3192 ret = iommu_devinfo_cache_init();
3193 if (!ret)
3194 return ret;
3195
3196 kmem_cache_destroy(iommu_domain_cache);
3197 domain_error:
3198 kmem_cache_destroy(iommu_iova_cache);
3199
3200 return -ENOMEM;
3201 }
3202
3203 static void __init iommu_exit_mempool(void)
3204 {
3205 kmem_cache_destroy(iommu_devinfo_cache);
3206 kmem_cache_destroy(iommu_domain_cache);
3207 kmem_cache_destroy(iommu_iova_cache);
3208
3209 }
3210
3211 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3212 {
3213 struct dmar_drhd_unit *drhd;
3214 u32 vtbar;
3215 int rc;
3216
3217 /* We know that this device on this chipset has its own IOMMU.
3218 * If we find it under a different IOMMU, then the BIOS is lying
3219 * to us. Hope that the IOMMU for this device is actually
3220 * disabled, and it needs no translation...
3221 */
3222 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3223 if (rc) {
3224 /* "can't" happen */
3225 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3226 return;
3227 }
3228 vtbar &= 0xffff0000;
3229
3230 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3231 drhd = dmar_find_matched_drhd_unit(pdev);
3232 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3233 TAINT_FIRMWARE_WORKAROUND,
3234 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3235 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3236 }
3237 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3238
3239 static void __init init_no_remapping_devices(void)
3240 {
3241 struct dmar_drhd_unit *drhd;
3242
3243 for_each_drhd_unit(drhd) {
3244 if (!drhd->include_all) {
3245 int i;
3246 for (i = 0; i < drhd->devices_cnt; i++)
3247 if (drhd->devices[i] != NULL)
3248 break;
3249 /* ignore DMAR unit if no pci devices exist */
3250 if (i == drhd->devices_cnt)
3251 drhd->ignored = 1;
3252 }
3253 }
3254
3255 for_each_drhd_unit(drhd) {
3256 int i;
3257 if (drhd->ignored || drhd->include_all)
3258 continue;
3259
3260 for (i = 0; i < drhd->devices_cnt; i++)
3261 if (drhd->devices[i] &&
3262 !IS_GFX_DEVICE(drhd->devices[i]))
3263 break;
3264
3265 if (i < drhd->devices_cnt)
3266 continue;
3267
3268 /* This IOMMU has *only* gfx devices. Either bypass it or
3269 set the gfx_mapped flag, as appropriate */
3270 if (dmar_map_gfx) {
3271 intel_iommu_gfx_mapped = 1;
3272 } else {
3273 drhd->ignored = 1;
3274 for (i = 0; i < drhd->devices_cnt; i++) {
3275 if (!drhd->devices[i])
3276 continue;
3277 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3278 }
3279 }
3280 }
3281 }
3282
3283 #ifdef CONFIG_SUSPEND
3284 static int init_iommu_hw(void)
3285 {
3286 struct dmar_drhd_unit *drhd;
3287 struct intel_iommu *iommu = NULL;
3288
3289 for_each_active_iommu(iommu, drhd)
3290 if (iommu->qi)
3291 dmar_reenable_qi(iommu);
3292
3293 for_each_iommu(iommu, drhd) {
3294 if (drhd->ignored) {
3295 /*
3296 * we always have to disable PMRs or DMA may fail on
3297 * this device
3298 */
3299 if (force_on)
3300 iommu_disable_protect_mem_regions(iommu);
3301 continue;
3302 }
3303
3304 iommu_flush_write_buffer(iommu);
3305
3306 iommu_set_root_entry(iommu);
3307
3308 iommu->flush.flush_context(iommu, 0, 0, 0,
3309 DMA_CCMD_GLOBAL_INVL);
3310 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3311 DMA_TLB_GLOBAL_FLUSH);
3312 if (iommu_enable_translation(iommu))
3313 return 1;
3314 iommu_disable_protect_mem_regions(iommu);
3315 }
3316
3317 return 0;
3318 }
3319
3320 static void iommu_flush_all(void)
3321 {
3322 struct dmar_drhd_unit *drhd;
3323 struct intel_iommu *iommu;
3324
3325 for_each_active_iommu(iommu, drhd) {
3326 iommu->flush.flush_context(iommu, 0, 0, 0,
3327 DMA_CCMD_GLOBAL_INVL);
3328 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3329 DMA_TLB_GLOBAL_FLUSH);
3330 }
3331 }
3332
3333 static int iommu_suspend(void)
3334 {
3335 struct dmar_drhd_unit *drhd;
3336 struct intel_iommu *iommu = NULL;
3337 unsigned long flag;
3338
3339 for_each_active_iommu(iommu, drhd) {
3340 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3341 GFP_ATOMIC);
3342 if (!iommu->iommu_state)
3343 goto nomem;
3344 }
3345
3346 iommu_flush_all();
3347
3348 for_each_active_iommu(iommu, drhd) {
3349 iommu_disable_translation(iommu);
3350
3351 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3352
3353 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3354 readl(iommu->reg + DMAR_FECTL_REG);
3355 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3356 readl(iommu->reg + DMAR_FEDATA_REG);
3357 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3358 readl(iommu->reg + DMAR_FEADDR_REG);
3359 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3360 readl(iommu->reg + DMAR_FEUADDR_REG);
3361
3362 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3363 }
3364 return 0;
3365
3366 nomem:
3367 for_each_active_iommu(iommu, drhd)
3368 kfree(iommu->iommu_state);
3369
3370 return -ENOMEM;
3371 }
3372
3373 static void iommu_resume(void)
3374 {
3375 struct dmar_drhd_unit *drhd;
3376 struct intel_iommu *iommu = NULL;
3377 unsigned long flag;
3378
3379 if (init_iommu_hw()) {
3380 if (force_on)
3381 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3382 else
3383 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3384 return;
3385 }
3386
3387 for_each_active_iommu(iommu, drhd) {
3388
3389 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3390
3391 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3392 iommu->reg + DMAR_FECTL_REG);
3393 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3394 iommu->reg + DMAR_FEDATA_REG);
3395 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3396 iommu->reg + DMAR_FEADDR_REG);
3397 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3398 iommu->reg + DMAR_FEUADDR_REG);
3399
3400 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3401 }
3402
3403 for_each_active_iommu(iommu, drhd)
3404 kfree(iommu->iommu_state);
3405 }
3406
3407 static struct syscore_ops iommu_syscore_ops = {
3408 .resume = iommu_resume,
3409 .suspend = iommu_suspend,
3410 };
3411
3412 static void __init init_iommu_pm_ops(void)
3413 {
3414 register_syscore_ops(&iommu_syscore_ops);
3415 }
3416
3417 #else
3418 static inline void init_iommu_pm_ops(void) {}
3419 #endif /* CONFIG_PM */
3420
3421 LIST_HEAD(dmar_rmrr_units);
3422
3423 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3424 {
3425 list_add(&rmrr->list, &dmar_rmrr_units);
3426 }
3427
3428
3429 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3430 {
3431 struct acpi_dmar_reserved_memory *rmrr;
3432 struct dmar_rmrr_unit *rmrru;
3433
3434 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3435 if (!rmrru)
3436 return -ENOMEM;
3437
3438 rmrru->hdr = header;
3439 rmrr = (struct acpi_dmar_reserved_memory *)header;
3440 rmrru->base_address = rmrr->base_address;
3441 rmrru->end_address = rmrr->end_address;
3442
3443 dmar_register_rmrr_unit(rmrru);
3444 return 0;
3445 }
3446
3447 static int __init
3448 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3449 {
3450 struct acpi_dmar_reserved_memory *rmrr;
3451 int ret;
3452
3453 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3454 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3455 ((void *)rmrr) + rmrr->header.length,
3456 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3457
3458 if (ret || (rmrru->devices_cnt == 0)) {
3459 list_del(&rmrru->list);
3460 kfree(rmrru);
3461 }
3462 return ret;
3463 }
3464
3465 static LIST_HEAD(dmar_atsr_units);
3466
3467 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3468 {
3469 struct acpi_dmar_atsr *atsr;
3470 struct dmar_atsr_unit *atsru;
3471
3472 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3473 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3474 if (!atsru)
3475 return -ENOMEM;
3476
3477 atsru->hdr = hdr;
3478 atsru->include_all = atsr->flags & 0x1;
3479
3480 list_add(&atsru->list, &dmar_atsr_units);
3481
3482 return 0;
3483 }
3484
3485 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3486 {
3487 int rc;
3488 struct acpi_dmar_atsr *atsr;
3489
3490 if (atsru->include_all)
3491 return 0;
3492
3493 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3494 rc = dmar_parse_dev_scope((void *)(atsr + 1),
3495 (void *)atsr + atsr->header.length,
3496 &atsru->devices_cnt, &atsru->devices,
3497 atsr->segment);
3498 if (rc || !atsru->devices_cnt) {
3499 list_del(&atsru->list);
3500 kfree(atsru);
3501 }
3502
3503 return rc;
3504 }
3505
3506 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3507 {
3508 int i;
3509 struct pci_bus *bus;
3510 struct acpi_dmar_atsr *atsr;
3511 struct dmar_atsr_unit *atsru;
3512
3513 dev = pci_physfn(dev);
3514
3515 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3516 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3517 if (atsr->segment == pci_domain_nr(dev->bus))
3518 goto found;
3519 }
3520
3521 return 0;
3522
3523 found:
3524 for (bus = dev->bus; bus; bus = bus->parent) {
3525 struct pci_dev *bridge = bus->self;
3526
3527 if (!bridge || !pci_is_pcie(bridge) ||
3528 bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
3529 return 0;
3530
3531 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
3532 for (i = 0; i < atsru->devices_cnt; i++)
3533 if (atsru->devices[i] == bridge)
3534 return 1;
3535 break;
3536 }
3537 }
3538
3539 if (atsru->include_all)
3540 return 1;
3541
3542 return 0;
3543 }
3544
3545 int __init dmar_parse_rmrr_atsr_dev(void)
3546 {
3547 struct dmar_rmrr_unit *rmrr, *rmrr_n;
3548 struct dmar_atsr_unit *atsr, *atsr_n;
3549 int ret = 0;
3550
3551 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3552 ret = rmrr_parse_dev(rmrr);
3553 if (ret)
3554 return ret;
3555 }
3556
3557 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3558 ret = atsr_parse_dev(atsr);
3559 if (ret)
3560 return ret;
3561 }
3562
3563 return ret;
3564 }
3565
3566 /*
3567 * Here we only respond to action of unbound device from driver.
3568 *
3569 * Added device is not attached to its DMAR domain here yet. That will happen
3570 * when mapping the device to iova.
3571 */
3572 static int device_notifier(struct notifier_block *nb,
3573 unsigned long action, void *data)
3574 {
3575 struct device *dev = data;
3576 struct pci_dev *pdev = to_pci_dev(dev);
3577 struct dmar_domain *domain;
3578
3579 if (iommu_no_mapping(dev))
3580 return 0;
3581
3582 domain = find_domain(pdev);
3583 if (!domain)
3584 return 0;
3585
3586 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3587 domain_remove_one_dev_info(domain, pdev);
3588
3589 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3590 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3591 list_empty(&domain->devices))
3592 domain_exit(domain);
3593 }
3594
3595 return 0;
3596 }
3597
3598 static struct notifier_block device_nb = {
3599 .notifier_call = device_notifier,
3600 };
3601
3602 int __init intel_iommu_init(void)
3603 {
3604 int ret = 0;
3605
3606 /* VT-d is required for a TXT/tboot launch, so enforce that */
3607 force_on = tboot_force_iommu();
3608
3609 if (dmar_table_init()) {
3610 if (force_on)
3611 panic("tboot: Failed to initialize DMAR table\n");
3612 return -ENODEV;
3613 }
3614
3615 if (dmar_dev_scope_init() < 0) {
3616 if (force_on)
3617 panic("tboot: Failed to initialize DMAR device scope\n");
3618 return -ENODEV;
3619 }
3620
3621 if (no_iommu || dmar_disabled)
3622 return -ENODEV;
3623
3624 if (iommu_init_mempool()) {
3625 if (force_on)
3626 panic("tboot: Failed to initialize iommu memory\n");
3627 return -ENODEV;
3628 }
3629
3630 if (list_empty(&dmar_rmrr_units))
3631 printk(KERN_INFO "DMAR: No RMRR found\n");
3632
3633 if (list_empty(&dmar_atsr_units))
3634 printk(KERN_INFO "DMAR: No ATSR found\n");
3635
3636 if (dmar_init_reserved_ranges()) {
3637 if (force_on)
3638 panic("tboot: Failed to reserve iommu ranges\n");
3639 return -ENODEV;
3640 }
3641
3642 init_no_remapping_devices();
3643
3644 ret = init_dmars();
3645 if (ret) {
3646 if (force_on)
3647 panic("tboot: Failed to initialize DMARs\n");
3648 printk(KERN_ERR "IOMMU: dmar init failed\n");
3649 put_iova_domain(&reserved_iova_list);
3650 iommu_exit_mempool();
3651 return ret;
3652 }
3653 printk(KERN_INFO
3654 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3655
3656 init_timer(&unmap_timer);
3657 #ifdef CONFIG_SWIOTLB
3658 swiotlb = 0;
3659 #endif
3660 dma_ops = &intel_dma_ops;
3661
3662 init_iommu_pm_ops();
3663
3664 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3665
3666 bus_register_notifier(&pci_bus_type, &device_nb);
3667
3668 return 0;
3669 }
3670
3671 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3672 struct pci_dev *pdev)
3673 {
3674 struct pci_dev *tmp, *parent;
3675
3676 if (!iommu || !pdev)
3677 return;
3678
3679 /* dependent device detach */
3680 tmp = pci_find_upstream_pcie_bridge(pdev);
3681 /* Secondary interface's bus number and devfn 0 */
3682 if (tmp) {
3683 parent = pdev->bus->self;
3684 while (parent != tmp) {
3685 iommu_detach_dev(iommu, parent->bus->number,
3686 parent->devfn);
3687 parent = parent->bus->self;
3688 }
3689 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3690 iommu_detach_dev(iommu,
3691 tmp->subordinate->number, 0);
3692 else /* this is a legacy PCI bridge */
3693 iommu_detach_dev(iommu, tmp->bus->number,
3694 tmp->devfn);
3695 }
3696 }
3697
3698 static void domain_remove_one_dev_info(struct dmar_domain *domain,
3699 struct pci_dev *pdev)
3700 {
3701 struct device_domain_info *info;
3702 struct intel_iommu *iommu;
3703 unsigned long flags;
3704 int found = 0;
3705 struct list_head *entry, *tmp;
3706
3707 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3708 pdev->devfn);
3709 if (!iommu)
3710 return;
3711
3712 spin_lock_irqsave(&device_domain_lock, flags);
3713 list_for_each_safe(entry, tmp, &domain->devices) {
3714 info = list_entry(entry, struct device_domain_info, link);
3715 if (info->segment == pci_domain_nr(pdev->bus) &&
3716 info->bus == pdev->bus->number &&
3717 info->devfn == pdev->devfn) {
3718 list_del(&info->link);
3719 list_del(&info->global);
3720 if (info->dev)
3721 info->dev->dev.archdata.iommu = NULL;
3722 spin_unlock_irqrestore(&device_domain_lock, flags);
3723
3724 iommu_disable_dev_iotlb(info);
3725 iommu_detach_dev(iommu, info->bus, info->devfn);
3726 iommu_detach_dependent_devices(iommu, pdev);
3727 free_devinfo_mem(info);
3728
3729 spin_lock_irqsave(&device_domain_lock, flags);
3730
3731 if (found)
3732 break;
3733 else
3734 continue;
3735 }
3736
3737 /* if there is no other devices under the same iommu
3738 * owned by this domain, clear this iommu in iommu_bmp
3739 * update iommu count and coherency
3740 */
3741 if (iommu == device_to_iommu(info->segment, info->bus,
3742 info->devfn))
3743 found = 1;
3744 }
3745
3746 spin_unlock_irqrestore(&device_domain_lock, flags);
3747
3748 if (found == 0) {
3749 unsigned long tmp_flags;
3750 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3751 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3752 domain->iommu_count--;
3753 domain_update_iommu_cap(domain);
3754 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3755
3756 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3757 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3758 spin_lock_irqsave(&iommu->lock, tmp_flags);
3759 clear_bit(domain->id, iommu->domain_ids);
3760 iommu->domains[domain->id] = NULL;
3761 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3762 }
3763 }
3764 }
3765
3766 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3767 {
3768 struct device_domain_info *info;
3769 struct intel_iommu *iommu;
3770 unsigned long flags1, flags2;
3771
3772 spin_lock_irqsave(&device_domain_lock, flags1);
3773 while (!list_empty(&domain->devices)) {
3774 info = list_entry(domain->devices.next,
3775 struct device_domain_info, link);
3776 list_del(&info->link);
3777 list_del(&info->global);
3778 if (info->dev)
3779 info->dev->dev.archdata.iommu = NULL;
3780
3781 spin_unlock_irqrestore(&device_domain_lock, flags1);
3782
3783 iommu_disable_dev_iotlb(info);
3784 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3785 iommu_detach_dev(iommu, info->bus, info->devfn);
3786 iommu_detach_dependent_devices(iommu, info->dev);
3787
3788 /* clear this iommu in iommu_bmp, update iommu count
3789 * and capabilities
3790 */
3791 spin_lock_irqsave(&domain->iommu_lock, flags2);
3792 if (test_and_clear_bit(iommu->seq_id,
3793 &domain->iommu_bmp)) {
3794 domain->iommu_count--;
3795 domain_update_iommu_cap(domain);
3796 }
3797 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3798
3799 free_devinfo_mem(info);
3800 spin_lock_irqsave(&device_domain_lock, flags1);
3801 }
3802 spin_unlock_irqrestore(&device_domain_lock, flags1);
3803 }
3804
3805 /* domain id for virtual machine, it won't be set in context */
3806 static unsigned long vm_domid;
3807
3808 static struct dmar_domain *iommu_alloc_vm_domain(void)
3809 {
3810 struct dmar_domain *domain;
3811
3812 domain = alloc_domain_mem();
3813 if (!domain)
3814 return NULL;
3815
3816 domain->id = vm_domid++;
3817 domain->nid = -1;
3818 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3819 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3820
3821 return domain;
3822 }
3823
3824 static int md_domain_init(struct dmar_domain *domain, int guest_width)
3825 {
3826 int adjust_width;
3827
3828 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3829 spin_lock_init(&domain->iommu_lock);
3830
3831 domain_reserve_special_ranges(domain);
3832
3833 /* calculate AGAW */
3834 domain->gaw = guest_width;
3835 adjust_width = guestwidth_to_adjustwidth(guest_width);
3836 domain->agaw = width_to_agaw(adjust_width);
3837
3838 INIT_LIST_HEAD(&domain->devices);
3839
3840 domain->iommu_count = 0;
3841 domain->iommu_coherency = 0;
3842 domain->iommu_snooping = 0;
3843 domain->iommu_superpage = 0;
3844 domain->max_addr = 0;
3845 domain->nid = -1;
3846
3847 /* always allocate the top pgd */
3848 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3849 if (!domain->pgd)
3850 return -ENOMEM;
3851 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3852 return 0;
3853 }
3854
3855 static void iommu_free_vm_domain(struct dmar_domain *domain)
3856 {
3857 unsigned long flags;
3858 struct dmar_drhd_unit *drhd;
3859 struct intel_iommu *iommu;
3860 unsigned long i;
3861 unsigned long ndomains;
3862
3863 for_each_drhd_unit(drhd) {
3864 if (drhd->ignored)
3865 continue;
3866 iommu = drhd->iommu;
3867
3868 ndomains = cap_ndoms(iommu->cap);
3869 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3870 if (iommu->domains[i] == domain) {
3871 spin_lock_irqsave(&iommu->lock, flags);
3872 clear_bit(i, iommu->domain_ids);
3873 iommu->domains[i] = NULL;
3874 spin_unlock_irqrestore(&iommu->lock, flags);
3875 break;
3876 }
3877 }
3878 }
3879 }
3880
3881 static void vm_domain_exit(struct dmar_domain *domain)
3882 {
3883 /* Domain 0 is reserved, so dont process it */
3884 if (!domain)
3885 return;
3886
3887 vm_domain_remove_all_dev_info(domain);
3888 /* destroy iovas */
3889 put_iova_domain(&domain->iovad);
3890
3891 /* clear ptes */
3892 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3893
3894 /* free page tables */
3895 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3896
3897 iommu_free_vm_domain(domain);
3898 free_domain_mem(domain);
3899 }
3900
3901 static int intel_iommu_domain_init(struct iommu_domain *domain)
3902 {
3903 struct dmar_domain *dmar_domain;
3904
3905 dmar_domain = iommu_alloc_vm_domain();
3906 if (!dmar_domain) {
3907 printk(KERN_ERR
3908 "intel_iommu_domain_init: dmar_domain == NULL\n");
3909 return -ENOMEM;
3910 }
3911 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3912 printk(KERN_ERR
3913 "intel_iommu_domain_init() failed\n");
3914 vm_domain_exit(dmar_domain);
3915 return -ENOMEM;
3916 }
3917 domain_update_iommu_cap(dmar_domain);
3918 domain->priv = dmar_domain;
3919
3920 return 0;
3921 }
3922
3923 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3924 {
3925 struct dmar_domain *dmar_domain = domain->priv;
3926
3927 domain->priv = NULL;
3928 vm_domain_exit(dmar_domain);
3929 }
3930
3931 static int intel_iommu_attach_device(struct iommu_domain *domain,
3932 struct device *dev)
3933 {
3934 struct dmar_domain *dmar_domain = domain->priv;
3935 struct pci_dev *pdev = to_pci_dev(dev);
3936 struct intel_iommu *iommu;
3937 int addr_width;
3938
3939 /* normally pdev is not mapped */
3940 if (unlikely(domain_context_mapped(pdev))) {
3941 struct dmar_domain *old_domain;
3942
3943 old_domain = find_domain(pdev);
3944 if (old_domain) {
3945 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3946 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3947 domain_remove_one_dev_info(old_domain, pdev);
3948 else
3949 domain_remove_dev_info(old_domain);
3950 }
3951 }
3952
3953 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3954 pdev->devfn);
3955 if (!iommu)
3956 return -ENODEV;
3957
3958 /* check if this iommu agaw is sufficient for max mapped address */
3959 addr_width = agaw_to_width(iommu->agaw);
3960 if (addr_width > cap_mgaw(iommu->cap))
3961 addr_width = cap_mgaw(iommu->cap);
3962
3963 if (dmar_domain->max_addr > (1LL << addr_width)) {
3964 printk(KERN_ERR "%s: iommu width (%d) is not "
3965 "sufficient for the mapped address (%llx)\n",
3966 __func__, addr_width, dmar_domain->max_addr);
3967 return -EFAULT;
3968 }
3969 dmar_domain->gaw = addr_width;
3970
3971 /*
3972 * Knock out extra levels of page tables if necessary
3973 */
3974 while (iommu->agaw < dmar_domain->agaw) {
3975 struct dma_pte *pte;
3976
3977 pte = dmar_domain->pgd;
3978 if (dma_pte_present(pte)) {
3979 dmar_domain->pgd = (struct dma_pte *)
3980 phys_to_virt(dma_pte_addr(pte));
3981 free_pgtable_page(pte);
3982 }
3983 dmar_domain->agaw--;
3984 }
3985
3986 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3987 }
3988
3989 static void intel_iommu_detach_device(struct iommu_domain *domain,
3990 struct device *dev)
3991 {
3992 struct dmar_domain *dmar_domain = domain->priv;
3993 struct pci_dev *pdev = to_pci_dev(dev);
3994
3995 domain_remove_one_dev_info(dmar_domain, pdev);
3996 }
3997
3998 static int intel_iommu_map(struct iommu_domain *domain,
3999 unsigned long iova, phys_addr_t hpa,
4000 size_t size, int iommu_prot)
4001 {
4002 struct dmar_domain *dmar_domain = domain->priv;
4003 u64 max_addr;
4004 int prot = 0;
4005 int ret;
4006
4007 if (iommu_prot & IOMMU_READ)
4008 prot |= DMA_PTE_READ;
4009 if (iommu_prot & IOMMU_WRITE)
4010 prot |= DMA_PTE_WRITE;
4011 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4012 prot |= DMA_PTE_SNP;
4013
4014 max_addr = iova + size;
4015 if (dmar_domain->max_addr < max_addr) {
4016 u64 end;
4017
4018 /* check if minimum agaw is sufficient for mapped address */
4019 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4020 if (end < max_addr) {
4021 printk(KERN_ERR "%s: iommu width (%d) is not "
4022 "sufficient for the mapped address (%llx)\n",
4023 __func__, dmar_domain->gaw, max_addr);
4024 return -EFAULT;
4025 }
4026 dmar_domain->max_addr = max_addr;
4027 }
4028 /* Round up size to next multiple of PAGE_SIZE, if it and
4029 the low bits of hpa would take us onto the next page */
4030 size = aligned_nrpages(hpa, size);
4031 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4032 hpa >> VTD_PAGE_SHIFT, size, prot);
4033 return ret;
4034 }
4035
4036 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4037 unsigned long iova, size_t size)
4038 {
4039 struct dmar_domain *dmar_domain = domain->priv;
4040 int order;
4041
4042 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
4043 (iova + size - 1) >> VTD_PAGE_SHIFT);
4044
4045 if (dmar_domain->max_addr == iova + size)
4046 dmar_domain->max_addr = iova;
4047
4048 return PAGE_SIZE << order;
4049 }
4050
4051 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4052 unsigned long iova)
4053 {
4054 struct dmar_domain *dmar_domain = domain->priv;
4055 struct dma_pte *pte;
4056 u64 phys = 0;
4057
4058 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
4059 if (pte)
4060 phys = dma_pte_addr(pte);
4061
4062 return phys;
4063 }
4064
4065 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4066 unsigned long cap)
4067 {
4068 struct dmar_domain *dmar_domain = domain->priv;
4069
4070 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4071 return dmar_domain->iommu_snooping;
4072 if (cap == IOMMU_CAP_INTR_REMAP)
4073 return intr_remapping_enabled;
4074
4075 return 0;
4076 }
4077
4078 static struct iommu_ops intel_iommu_ops = {
4079 .domain_init = intel_iommu_domain_init,
4080 .domain_destroy = intel_iommu_domain_destroy,
4081 .attach_dev = intel_iommu_attach_device,
4082 .detach_dev = intel_iommu_detach_device,
4083 .map = intel_iommu_map,
4084 .unmap = intel_iommu_unmap,
4085 .iova_to_phys = intel_iommu_iova_to_phys,
4086 .domain_has_cap = intel_iommu_domain_has_cap,
4087 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
4088 };
4089
4090 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
4091 {
4092 /*
4093 * Mobile 4 Series Chipset neglects to set RWBF capability,
4094 * but needs it:
4095 */
4096 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4097 rwbf_quirk = 1;
4098
4099 /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
4100 if (dev->revision == 0x07) {
4101 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4102 dmar_map_gfx = 0;
4103 }
4104 }
4105
4106 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4107
4108 #define GGC 0x52
4109 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
4110 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4111 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
4112 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
4113 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4114 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4115 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4116 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4117
4118 static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4119 {
4120 unsigned short ggc;
4121
4122 if (pci_read_config_word(dev, GGC, &ggc))
4123 return;
4124
4125 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4126 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4127 dmar_map_gfx = 0;
4128 } else if (dmar_map_gfx) {
4129 /* we have to ensure the gfx device is idle before we flush */
4130 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4131 intel_iommu_strict = 1;
4132 }
4133 }
4134 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4135 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4136 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4137 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4138
4139 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4140 ISOCH DMAR unit for the Azalia sound device, but not give it any
4141 TLB entries, which causes it to deadlock. Check for that. We do
4142 this in a function called from init_dmars(), instead of in a PCI
4143 quirk, because we don't want to print the obnoxious "BIOS broken"
4144 message if VT-d is actually disabled.
4145 */
4146 static void __init check_tylersburg_isoch(void)
4147 {
4148 struct pci_dev *pdev;
4149 uint32_t vtisochctrl;
4150
4151 /* If there's no Azalia in the system anyway, forget it. */
4152 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4153 if (!pdev)
4154 return;
4155 pci_dev_put(pdev);
4156
4157 /* System Management Registers. Might be hidden, in which case
4158 we can't do the sanity check. But that's OK, because the
4159 known-broken BIOSes _don't_ actually hide it, so far. */
4160 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4161 if (!pdev)
4162 return;
4163
4164 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4165 pci_dev_put(pdev);
4166 return;
4167 }
4168
4169 pci_dev_put(pdev);
4170
4171 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4172 if (vtisochctrl & 1)
4173 return;
4174
4175 /* Drop all bits other than the number of TLB entries */
4176 vtisochctrl &= 0x1c;
4177
4178 /* If we have the recommended number of TLB entries (16), fine. */
4179 if (vtisochctrl == 0x10)
4180 return;
4181
4182 /* Zero TLB entries? You get to ride the short bus to school. */
4183 if (!vtisochctrl) {
4184 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4185 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4186 dmi_get_system_info(DMI_BIOS_VENDOR),
4187 dmi_get_system_info(DMI_BIOS_VERSION),
4188 dmi_get_system_info(DMI_PRODUCT_VERSION));
4189 iommu_identity_mapping |= IDENTMAP_AZALIA;
4190 return;
4191 }
4192
4193 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4194 vtisochctrl);
4195 }