intel-iommu: VT-d page table to support snooping control bit
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946 36#include <linux/iova.h>
5d450806 37#include <linux/iommu.h>
38717946 38#include <linux/intel-iommu.h>
ba395927 39#include <asm/cacheflush.h>
46a7fa27 40#include <asm/iommu.h>
ba395927
KA
41#include "pci.h"
42
5b6985ce
FY
43#define ROOT_SIZE VTD_PAGE_SIZE
44#define CONTEXT_SIZE VTD_PAGE_SIZE
45
ba395927
KA
46#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
47#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48
49#define IOAPIC_RANGE_START (0xfee00000)
50#define IOAPIC_RANGE_END (0xfeefffff)
51#define IOVA_START_ADDR (0x1000)
52
53#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54
ba395927
KA
55#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56
f27be03b
MM
57#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
58#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
59#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
5e0d2a6f 60
d9630fe9
WH
61/* global iommu list, set NULL for ignored DMAR units */
62static struct intel_iommu **g_iommus;
63
9af88143
DW
64static int rwbf_quirk;
65
46b08e1a
MM
66/*
67 * 0: Present
68 * 1-11: Reserved
69 * 12-63: Context Ptr (12 - (haw-1))
70 * 64-127: Reserved
71 */
72struct root_entry {
73 u64 val;
74 u64 rsvd1;
75};
76#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
77static inline bool root_present(struct root_entry *root)
78{
79 return (root->val & 1);
80}
81static inline void set_root_present(struct root_entry *root)
82{
83 root->val |= 1;
84}
85static inline void set_root_value(struct root_entry *root, unsigned long value)
86{
87 root->val |= value & VTD_PAGE_MASK;
88}
89
90static inline struct context_entry *
91get_context_addr_from_root(struct root_entry *root)
92{
93 return (struct context_entry *)
94 (root_present(root)?phys_to_virt(
95 root->val & VTD_PAGE_MASK) :
96 NULL);
97}
98
7a8fc25e
MM
99/*
100 * low 64 bits:
101 * 0: present
102 * 1: fault processing disable
103 * 2-3: translation type
104 * 12-63: address space root
105 * high 64 bits:
106 * 0-2: address width
107 * 3-6: aval
108 * 8-23: domain id
109 */
110struct context_entry {
111 u64 lo;
112 u64 hi;
113};
c07e7d21
MM
114
115static inline bool context_present(struct context_entry *context)
116{
117 return (context->lo & 1);
118}
119static inline void context_set_present(struct context_entry *context)
120{
121 context->lo |= 1;
122}
123
124static inline void context_set_fault_enable(struct context_entry *context)
125{
126 context->lo &= (((u64)-1) << 2) | 1;
127}
128
7a8fc25e 129#define CONTEXT_TT_MULTI_LEVEL 0
c07e7d21
MM
130
131static inline void context_set_translation_type(struct context_entry *context,
132 unsigned long value)
133{
134 context->lo &= (((u64)-1) << 4) | 3;
135 context->lo |= (value & 3) << 2;
136}
137
138static inline void context_set_address_root(struct context_entry *context,
139 unsigned long value)
140{
141 context->lo |= value & VTD_PAGE_MASK;
142}
143
144static inline void context_set_address_width(struct context_entry *context,
145 unsigned long value)
146{
147 context->hi |= value & 7;
148}
149
150static inline void context_set_domain_id(struct context_entry *context,
151 unsigned long value)
152{
153 context->hi |= (value & ((1 << 16) - 1)) << 8;
154}
155
156static inline void context_clear_entry(struct context_entry *context)
157{
158 context->lo = 0;
159 context->hi = 0;
160}
7a8fc25e 161
622ba12a
MM
162/*
163 * 0: readable
164 * 1: writable
165 * 2-6: reserved
166 * 7: super page
9cf06697
SY
167 * 8-10: available
168 * 11: snoop behavior
622ba12a
MM
169 * 12-63: Host physcial address
170 */
171struct dma_pte {
172 u64 val;
173};
622ba12a 174
19c239ce
MM
175static inline void dma_clear_pte(struct dma_pte *pte)
176{
177 pte->val = 0;
178}
179
180static inline void dma_set_pte_readable(struct dma_pte *pte)
181{
182 pte->val |= DMA_PTE_READ;
183}
184
185static inline void dma_set_pte_writable(struct dma_pte *pte)
186{
187 pte->val |= DMA_PTE_WRITE;
188}
189
9cf06697
SY
190static inline void dma_set_pte_snp(struct dma_pte *pte)
191{
192 pte->val |= DMA_PTE_SNP;
193}
194
19c239ce
MM
195static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
196{
197 pte->val = (pte->val & ~3) | (prot & 3);
198}
199
200static inline u64 dma_pte_addr(struct dma_pte *pte)
201{
202 return (pte->val & VTD_PAGE_MASK);
203}
204
205static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
206{
207 pte->val |= (addr & VTD_PAGE_MASK);
208}
209
210static inline bool dma_pte_present(struct dma_pte *pte)
211{
212 return (pte->val & 3) != 0;
213}
622ba12a 214
3b5410e7 215/* devices under the same p2p bridge are owned in one domain */
cdc7b837 216#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 217
1ce28feb
WH
218/* domain represents a virtual machine, more than one devices
219 * across iommus may be owned in one domain, e.g. kvm guest.
220 */
221#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
222
99126f7c
MM
223struct dmar_domain {
224 int id; /* domain id */
8c11e798 225 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
226
227 struct list_head devices; /* all devices' list */
228 struct iova_domain iovad; /* iova's that belong to this domain */
229
230 struct dma_pte *pgd; /* virtual address */
231 spinlock_t mapping_lock; /* page table lock */
232 int gaw; /* max guest address width */
233
234 /* adjusted guest address width, 0 is level 2 30-bit */
235 int agaw;
236
3b5410e7 237 int flags; /* flags to find out type of domain */
8e604097
WH
238
239 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 240 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d
WH
241 int iommu_count; /* reference count of iommu */
242 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 243 u64 max_addr; /* maximum mapped address */
99126f7c
MM
244};
245
a647dacb
MM
246/* PCI domain-device relationship */
247struct device_domain_info {
248 struct list_head link; /* link to domain siblings */
249 struct list_head global; /* link to global list */
250 u8 bus; /* PCI bus numer */
251 u8 devfn; /* PCI devfn number */
252 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
253 struct dmar_domain *domain; /* pointer to domain */
254};
255
5e0d2a6f 256static void flush_unmaps_timeout(unsigned long data);
257
258DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
259
80b20dd8 260#define HIGH_WATER_MARK 250
261struct deferred_flush_tables {
262 int next;
263 struct iova *iova[HIGH_WATER_MARK];
264 struct dmar_domain *domain[HIGH_WATER_MARK];
265};
266
267static struct deferred_flush_tables *deferred_flush;
268
5e0d2a6f 269/* bitmap for indexing intel_iommus */
5e0d2a6f 270static int g_num_of_iommus;
271
272static DEFINE_SPINLOCK(async_umap_flush_lock);
273static LIST_HEAD(unmaps_to_do);
274
275static int timer_on;
276static long list_size;
5e0d2a6f 277
ba395927
KA
278static void domain_remove_dev_info(struct dmar_domain *domain);
279
0cd5c3c8
KM
280#ifdef CONFIG_DMAR_DEFAULT_ON
281int dmar_disabled = 0;
282#else
283int dmar_disabled = 1;
284#endif /*CONFIG_DMAR_DEFAULT_ON*/
285
ba395927 286static int __initdata dmar_map_gfx = 1;
7d3b03ce 287static int dmar_forcedac;
5e0d2a6f 288static int intel_iommu_strict;
ba395927
KA
289
290#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
291static DEFINE_SPINLOCK(device_domain_lock);
292static LIST_HEAD(device_domain_list);
293
a8bcbb0d
JR
294static struct iommu_ops intel_iommu_ops;
295
ba395927
KA
296static int __init intel_iommu_setup(char *str)
297{
298 if (!str)
299 return -EINVAL;
300 while (*str) {
0cd5c3c8
KM
301 if (!strncmp(str, "on", 2)) {
302 dmar_disabled = 0;
303 printk(KERN_INFO "Intel-IOMMU: enabled\n");
304 } else if (!strncmp(str, "off", 3)) {
ba395927 305 dmar_disabled = 1;
0cd5c3c8 306 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
307 } else if (!strncmp(str, "igfx_off", 8)) {
308 dmar_map_gfx = 0;
309 printk(KERN_INFO
310 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 311 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 312 printk(KERN_INFO
7d3b03ce
KA
313 "Intel-IOMMU: Forcing DAC for PCI devices\n");
314 dmar_forcedac = 1;
5e0d2a6f 315 } else if (!strncmp(str, "strict", 6)) {
316 printk(KERN_INFO
317 "Intel-IOMMU: disable batched IOTLB flush\n");
318 intel_iommu_strict = 1;
ba395927
KA
319 }
320
321 str += strcspn(str, ",");
322 while (*str == ',')
323 str++;
324 }
325 return 0;
326}
327__setup("intel_iommu=", intel_iommu_setup);
328
329static struct kmem_cache *iommu_domain_cache;
330static struct kmem_cache *iommu_devinfo_cache;
331static struct kmem_cache *iommu_iova_cache;
332
eb3fa7cb
KA
333static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
334{
335 unsigned int flags;
336 void *vaddr;
337
338 /* trying to avoid low memory issues */
339 flags = current->flags & PF_MEMALLOC;
340 current->flags |= PF_MEMALLOC;
341 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
342 current->flags &= (~PF_MEMALLOC | flags);
343 return vaddr;
344}
345
346
ba395927
KA
347static inline void *alloc_pgtable_page(void)
348{
eb3fa7cb
KA
349 unsigned int flags;
350 void *vaddr;
351
352 /* trying to avoid low memory issues */
353 flags = current->flags & PF_MEMALLOC;
354 current->flags |= PF_MEMALLOC;
355 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
356 current->flags &= (~PF_MEMALLOC | flags);
357 return vaddr;
ba395927
KA
358}
359
360static inline void free_pgtable_page(void *vaddr)
361{
362 free_page((unsigned long)vaddr);
363}
364
365static inline void *alloc_domain_mem(void)
366{
eb3fa7cb 367 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
368}
369
38717946 370static void free_domain_mem(void *vaddr)
ba395927
KA
371{
372 kmem_cache_free(iommu_domain_cache, vaddr);
373}
374
375static inline void * alloc_devinfo_mem(void)
376{
eb3fa7cb 377 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
378}
379
380static inline void free_devinfo_mem(void *vaddr)
381{
382 kmem_cache_free(iommu_devinfo_cache, vaddr);
383}
384
385struct iova *alloc_iova_mem(void)
386{
eb3fa7cb 387 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
388}
389
390void free_iova_mem(struct iova *iova)
391{
392 kmem_cache_free(iommu_iova_cache, iova);
393}
394
1b573683
WH
395
396static inline int width_to_agaw(int width);
397
398/* calculate agaw for each iommu.
399 * "SAGAW" may be different across iommus, use a default agaw, and
400 * get a supported less agaw for iommus that don't support the default agaw.
401 */
402int iommu_calculate_agaw(struct intel_iommu *iommu)
403{
404 unsigned long sagaw;
405 int agaw = -1;
406
407 sagaw = cap_sagaw(iommu->cap);
408 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
409 agaw >= 0; agaw--) {
410 if (test_bit(agaw, &sagaw))
411 break;
412 }
413
414 return agaw;
415}
416
8c11e798
WH
417/* in native case, each domain is related to only one iommu */
418static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
419{
420 int iommu_id;
421
1ce28feb
WH
422 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
423
8c11e798
WH
424 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
425 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
426 return NULL;
427
428 return g_iommus[iommu_id];
429}
430
8e604097
WH
431static void domain_update_iommu_coherency(struct dmar_domain *domain)
432{
433 int i;
434
435 domain->iommu_coherency = 1;
436
437 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
438 for (; i < g_num_of_iommus; ) {
439 if (!ecap_coherent(g_iommus[i]->ecap)) {
440 domain->iommu_coherency = 0;
441 break;
442 }
443 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
444 }
445}
446
58c610bd
SY
447static void domain_update_iommu_snooping(struct dmar_domain *domain)
448{
449 int i;
450
451 domain->iommu_snooping = 1;
452
453 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
454 for (; i < g_num_of_iommus; ) {
455 if (!ecap_sc_support(g_iommus[i]->ecap)) {
456 domain->iommu_snooping = 0;
457 break;
458 }
459 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
460 }
461}
462
463/* Some capabilities may be different across iommus */
464static void domain_update_iommu_cap(struct dmar_domain *domain)
465{
466 domain_update_iommu_coherency(domain);
467 domain_update_iommu_snooping(domain);
468}
469
c7151a8d
WH
470static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
471{
472 struct dmar_drhd_unit *drhd = NULL;
473 int i;
474
475 for_each_drhd_unit(drhd) {
476 if (drhd->ignored)
477 continue;
478
479 for (i = 0; i < drhd->devices_cnt; i++)
288e4877
DH
480 if (drhd->devices[i] &&
481 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
482 drhd->devices[i]->devfn == devfn)
483 return drhd->iommu;
484
485 if (drhd->include_all)
486 return drhd->iommu;
487 }
488
489 return NULL;
490}
491
5331fe6f
WH
492static void domain_flush_cache(struct dmar_domain *domain,
493 void *addr, int size)
494{
495 if (!domain->iommu_coherency)
496 clflush_cache_range(addr, size);
497}
498
ba395927
KA
499/* Gets context entry for a given bus and devfn */
500static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
501 u8 bus, u8 devfn)
502{
503 struct root_entry *root;
504 struct context_entry *context;
505 unsigned long phy_addr;
506 unsigned long flags;
507
508 spin_lock_irqsave(&iommu->lock, flags);
509 root = &iommu->root_entry[bus];
510 context = get_context_addr_from_root(root);
511 if (!context) {
512 context = (struct context_entry *)alloc_pgtable_page();
513 if (!context) {
514 spin_unlock_irqrestore(&iommu->lock, flags);
515 return NULL;
516 }
5b6985ce 517 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
518 phy_addr = virt_to_phys((void *)context);
519 set_root_value(root, phy_addr);
520 set_root_present(root);
521 __iommu_flush_cache(iommu, root, sizeof(*root));
522 }
523 spin_unlock_irqrestore(&iommu->lock, flags);
524 return &context[devfn];
525}
526
527static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
528{
529 struct root_entry *root;
530 struct context_entry *context;
531 int ret;
532 unsigned long flags;
533
534 spin_lock_irqsave(&iommu->lock, flags);
535 root = &iommu->root_entry[bus];
536 context = get_context_addr_from_root(root);
537 if (!context) {
538 ret = 0;
539 goto out;
540 }
c07e7d21 541 ret = context_present(&context[devfn]);
ba395927
KA
542out:
543 spin_unlock_irqrestore(&iommu->lock, flags);
544 return ret;
545}
546
547static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
548{
549 struct root_entry *root;
550 struct context_entry *context;
551 unsigned long flags;
552
553 spin_lock_irqsave(&iommu->lock, flags);
554 root = &iommu->root_entry[bus];
555 context = get_context_addr_from_root(root);
556 if (context) {
c07e7d21 557 context_clear_entry(&context[devfn]);
ba395927
KA
558 __iommu_flush_cache(iommu, &context[devfn], \
559 sizeof(*context));
560 }
561 spin_unlock_irqrestore(&iommu->lock, flags);
562}
563
564static void free_context_table(struct intel_iommu *iommu)
565{
566 struct root_entry *root;
567 int i;
568 unsigned long flags;
569 struct context_entry *context;
570
571 spin_lock_irqsave(&iommu->lock, flags);
572 if (!iommu->root_entry) {
573 goto out;
574 }
575 for (i = 0; i < ROOT_ENTRY_NR; i++) {
576 root = &iommu->root_entry[i];
577 context = get_context_addr_from_root(root);
578 if (context)
579 free_pgtable_page(context);
580 }
581 free_pgtable_page(iommu->root_entry);
582 iommu->root_entry = NULL;
583out:
584 spin_unlock_irqrestore(&iommu->lock, flags);
585}
586
587/* page table handling */
588#define LEVEL_STRIDE (9)
589#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
590
591static inline int agaw_to_level(int agaw)
592{
593 return agaw + 2;
594}
595
596static inline int agaw_to_width(int agaw)
597{
598 return 30 + agaw * LEVEL_STRIDE;
599
600}
601
602static inline int width_to_agaw(int width)
603{
604 return (width - 30) / LEVEL_STRIDE;
605}
606
607static inline unsigned int level_to_offset_bits(int level)
608{
609 return (12 + (level - 1) * LEVEL_STRIDE);
610}
611
612static inline int address_level_offset(u64 addr, int level)
613{
614 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
615}
616
617static inline u64 level_mask(int level)
618{
619 return ((u64)-1 << level_to_offset_bits(level));
620}
621
622static inline u64 level_size(int level)
623{
624 return ((u64)1 << level_to_offset_bits(level));
625}
626
627static inline u64 align_to_level(u64 addr, int level)
628{
629 return ((addr + level_size(level) - 1) & level_mask(level));
630}
631
632static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
633{
634 int addr_width = agaw_to_width(domain->agaw);
635 struct dma_pte *parent, *pte = NULL;
636 int level = agaw_to_level(domain->agaw);
637 int offset;
638 unsigned long flags;
639
640 BUG_ON(!domain->pgd);
641
642 addr &= (((u64)1) << addr_width) - 1;
643 parent = domain->pgd;
644
645 spin_lock_irqsave(&domain->mapping_lock, flags);
646 while (level > 0) {
647 void *tmp_page;
648
649 offset = address_level_offset(addr, level);
650 pte = &parent[offset];
651 if (level == 1)
652 break;
653
19c239ce 654 if (!dma_pte_present(pte)) {
ba395927
KA
655 tmp_page = alloc_pgtable_page();
656
657 if (!tmp_page) {
658 spin_unlock_irqrestore(&domain->mapping_lock,
659 flags);
660 return NULL;
661 }
5331fe6f 662 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
19c239ce 663 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
ba395927
KA
664 /*
665 * high level table always sets r/w, last level page
666 * table control read/write
667 */
19c239ce
MM
668 dma_set_pte_readable(pte);
669 dma_set_pte_writable(pte);
5331fe6f 670 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 671 }
19c239ce 672 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
673 level--;
674 }
675
676 spin_unlock_irqrestore(&domain->mapping_lock, flags);
677 return pte;
678}
679
680/* return address's pte at specific level */
681static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
682 int level)
683{
684 struct dma_pte *parent, *pte = NULL;
685 int total = agaw_to_level(domain->agaw);
686 int offset;
687
688 parent = domain->pgd;
689 while (level <= total) {
690 offset = address_level_offset(addr, total);
691 pte = &parent[offset];
692 if (level == total)
693 return pte;
694
19c239ce 695 if (!dma_pte_present(pte))
ba395927 696 break;
19c239ce 697 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
698 total--;
699 }
700 return NULL;
701}
702
703/* clear one page's page table */
704static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
705{
706 struct dma_pte *pte = NULL;
707
708 /* get last level pte */
709 pte = dma_addr_level_pte(domain, addr, 1);
710
711 if (pte) {
19c239ce 712 dma_clear_pte(pte);
5331fe6f 713 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
714 }
715}
716
717/* clear last level pte, a tlb flush should be followed */
718static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
719{
720 int addr_width = agaw_to_width(domain->agaw);
721
722 start &= (((u64)1) << addr_width) - 1;
723 end &= (((u64)1) << addr_width) - 1;
724 /* in case it's partial page */
5b6985ce
FY
725 start = PAGE_ALIGN(start);
726 end &= PAGE_MASK;
ba395927
KA
727
728 /* we don't need lock here, nobody else touches the iova range */
729 while (start < end) {
730 dma_pte_clear_one(domain, start);
5b6985ce 731 start += VTD_PAGE_SIZE;
ba395927
KA
732 }
733}
734
735/* free page table pages. last level pte should already be cleared */
736static void dma_pte_free_pagetable(struct dmar_domain *domain,
737 u64 start, u64 end)
738{
739 int addr_width = agaw_to_width(domain->agaw);
740 struct dma_pte *pte;
741 int total = agaw_to_level(domain->agaw);
742 int level;
743 u64 tmp;
744
745 start &= (((u64)1) << addr_width) - 1;
746 end &= (((u64)1) << addr_width) - 1;
747
748 /* we don't need lock here, nobody else touches the iova range */
749 level = 2;
750 while (level <= total) {
751 tmp = align_to_level(start, level);
752 if (tmp >= end || (tmp + level_size(level) > end))
753 return;
754
755 while (tmp < end) {
756 pte = dma_addr_level_pte(domain, tmp, level);
757 if (pte) {
758 free_pgtable_page(
19c239ce
MM
759 phys_to_virt(dma_pte_addr(pte)));
760 dma_clear_pte(pte);
5331fe6f 761 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
762 }
763 tmp += level_size(level);
764 }
765 level++;
766 }
767 /* free pgd */
768 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
769 free_pgtable_page(domain->pgd);
770 domain->pgd = NULL;
771 }
772}
773
774/* iommu handling */
775static int iommu_alloc_root_entry(struct intel_iommu *iommu)
776{
777 struct root_entry *root;
778 unsigned long flags;
779
780 root = (struct root_entry *)alloc_pgtable_page();
781 if (!root)
782 return -ENOMEM;
783
5b6985ce 784 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
785
786 spin_lock_irqsave(&iommu->lock, flags);
787 iommu->root_entry = root;
788 spin_unlock_irqrestore(&iommu->lock, flags);
789
790 return 0;
791}
792
ba395927
KA
793static void iommu_set_root_entry(struct intel_iommu *iommu)
794{
795 void *addr;
796 u32 cmd, sts;
797 unsigned long flag;
798
799 addr = iommu->root_entry;
800
801 spin_lock_irqsave(&iommu->register_lock, flag);
802 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
803
804 cmd = iommu->gcmd | DMA_GCMD_SRTP;
805 writel(cmd, iommu->reg + DMAR_GCMD_REG);
806
807 /* Make sure hardware complete it */
808 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
809 readl, (sts & DMA_GSTS_RTPS), sts);
810
811 spin_unlock_irqrestore(&iommu->register_lock, flag);
812}
813
814static void iommu_flush_write_buffer(struct intel_iommu *iommu)
815{
816 u32 val;
817 unsigned long flag;
818
9af88143 819 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927
KA
820 return;
821 val = iommu->gcmd | DMA_GCMD_WBF;
822
823 spin_lock_irqsave(&iommu->register_lock, flag);
824 writel(val, iommu->reg + DMAR_GCMD_REG);
825
826 /* Make sure hardware complete it */
827 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
828 readl, (!(val & DMA_GSTS_WBFS)), val);
829
830 spin_unlock_irqrestore(&iommu->register_lock, flag);
831}
832
833/* return value determine if we need a write buffer flush */
834static int __iommu_flush_context(struct intel_iommu *iommu,
835 u16 did, u16 source_id, u8 function_mask, u64 type,
836 int non_present_entry_flush)
837{
838 u64 val = 0;
839 unsigned long flag;
840
841 /*
842 * In the non-present entry flush case, if hardware doesn't cache
843 * non-present entry we do nothing and if hardware cache non-present
844 * entry, we flush entries of domain 0 (the domain id is used to cache
845 * any non-present entries)
846 */
847 if (non_present_entry_flush) {
848 if (!cap_caching_mode(iommu->cap))
849 return 1;
850 else
851 did = 0;
852 }
853
854 switch (type) {
855 case DMA_CCMD_GLOBAL_INVL:
856 val = DMA_CCMD_GLOBAL_INVL;
857 break;
858 case DMA_CCMD_DOMAIN_INVL:
859 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
860 break;
861 case DMA_CCMD_DEVICE_INVL:
862 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
863 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
864 break;
865 default:
866 BUG();
867 }
868 val |= DMA_CCMD_ICC;
869
870 spin_lock_irqsave(&iommu->register_lock, flag);
871 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
872
873 /* Make sure hardware complete it */
874 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
875 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
876
877 spin_unlock_irqrestore(&iommu->register_lock, flag);
878
4d235ba6 879 /* flush context entry will implicitly flush write buffer */
ba395927
KA
880 return 0;
881}
882
ba395927
KA
883/* return value determine if we need a write buffer flush */
884static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
885 u64 addr, unsigned int size_order, u64 type,
886 int non_present_entry_flush)
887{
888 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
889 u64 val = 0, val_iva = 0;
890 unsigned long flag;
891
892 /*
893 * In the non-present entry flush case, if hardware doesn't cache
894 * non-present entry we do nothing and if hardware cache non-present
895 * entry, we flush entries of domain 0 (the domain id is used to cache
896 * any non-present entries)
897 */
898 if (non_present_entry_flush) {
899 if (!cap_caching_mode(iommu->cap))
900 return 1;
901 else
902 did = 0;
903 }
904
905 switch (type) {
906 case DMA_TLB_GLOBAL_FLUSH:
907 /* global flush doesn't need set IVA_REG */
908 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
909 break;
910 case DMA_TLB_DSI_FLUSH:
911 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
912 break;
913 case DMA_TLB_PSI_FLUSH:
914 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
915 /* Note: always flush non-leaf currently */
916 val_iva = size_order | addr;
917 break;
918 default:
919 BUG();
920 }
921 /* Note: set drain read/write */
922#if 0
923 /*
924 * This is probably to be super secure.. Looks like we can
925 * ignore it without any impact.
926 */
927 if (cap_read_drain(iommu->cap))
928 val |= DMA_TLB_READ_DRAIN;
929#endif
930 if (cap_write_drain(iommu->cap))
931 val |= DMA_TLB_WRITE_DRAIN;
932
933 spin_lock_irqsave(&iommu->register_lock, flag);
934 /* Note: Only uses first TLB reg currently */
935 if (val_iva)
936 dmar_writeq(iommu->reg + tlb_offset, val_iva);
937 dmar_writeq(iommu->reg + tlb_offset + 8, val);
938
939 /* Make sure hardware complete it */
940 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
941 dmar_readq, (!(val & DMA_TLB_IVT)), val);
942
943 spin_unlock_irqrestore(&iommu->register_lock, flag);
944
945 /* check IOTLB invalidation granularity */
946 if (DMA_TLB_IAIG(val) == 0)
947 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
948 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
949 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
950 (unsigned long long)DMA_TLB_IIRG(type),
951 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 952 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
953 return 0;
954}
955
ba395927
KA
956static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
957 u64 addr, unsigned int pages, int non_present_entry_flush)
958{
f76aec76 959 unsigned int mask;
ba395927 960
5b6985ce 961 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
962 BUG_ON(pages == 0);
963
964 /* Fallback to domain selective flush if no PSI support */
965 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
966 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
967 DMA_TLB_DSI_FLUSH,
968 non_present_entry_flush);
ba395927
KA
969
970 /*
971 * PSI requires page size to be 2 ^ x, and the base address is naturally
972 * aligned to the size
973 */
f76aec76 974 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 975 /* Fallback to domain selective flush if size is too big */
f76aec76 976 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
977 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
978 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 979
a77b67d4
YS
980 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
981 DMA_TLB_PSI_FLUSH,
982 non_present_entry_flush);
ba395927
KA
983}
984
f8bab735 985static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
986{
987 u32 pmen;
988 unsigned long flags;
989
990 spin_lock_irqsave(&iommu->register_lock, flags);
991 pmen = readl(iommu->reg + DMAR_PMEN_REG);
992 pmen &= ~DMA_PMEN_EPM;
993 writel(pmen, iommu->reg + DMAR_PMEN_REG);
994
995 /* wait for the protected region status bit to clear */
996 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
997 readl, !(pmen & DMA_PMEN_PRS), pmen);
998
999 spin_unlock_irqrestore(&iommu->register_lock, flags);
1000}
1001
ba395927
KA
1002static int iommu_enable_translation(struct intel_iommu *iommu)
1003{
1004 u32 sts;
1005 unsigned long flags;
1006
1007 spin_lock_irqsave(&iommu->register_lock, flags);
1008 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
1009
1010 /* Make sure hardware complete it */
1011 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1012 readl, (sts & DMA_GSTS_TES), sts);
1013
1014 iommu->gcmd |= DMA_GCMD_TE;
1015 spin_unlock_irqrestore(&iommu->register_lock, flags);
1016 return 0;
1017}
1018
1019static int iommu_disable_translation(struct intel_iommu *iommu)
1020{
1021 u32 sts;
1022 unsigned long flag;
1023
1024 spin_lock_irqsave(&iommu->register_lock, flag);
1025 iommu->gcmd &= ~DMA_GCMD_TE;
1026 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1027
1028 /* Make sure hardware complete it */
1029 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1030 readl, (!(sts & DMA_GSTS_TES)), sts);
1031
1032 spin_unlock_irqrestore(&iommu->register_lock, flag);
1033 return 0;
1034}
1035
3460a6d9
KA
1036/* iommu interrupt handling. Most stuff are MSI-like. */
1037
d94afc6c 1038static const char *fault_reason_strings[] =
3460a6d9
KA
1039{
1040 "Software",
1041 "Present bit in root entry is clear",
1042 "Present bit in context entry is clear",
1043 "Invalid context entry",
1044 "Access beyond MGAW",
1045 "PTE Write access is not set",
1046 "PTE Read access is not set",
1047 "Next page table ptr is invalid",
1048 "Root table address invalid",
1049 "Context table ptr is invalid",
1050 "non-zero reserved fields in RTP",
1051 "non-zero reserved fields in CTP",
1052 "non-zero reserved fields in PTE",
3460a6d9 1053};
f8bab735 1054#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 1055
d94afc6c 1056const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 1057{
d94afc6c 1058 if (fault_reason > MAX_FAULT_REASON_IDX)
1059 return "Unknown";
3460a6d9
KA
1060 else
1061 return fault_reason_strings[fault_reason];
1062}
1063
1064void dmar_msi_unmask(unsigned int irq)
1065{
1066 struct intel_iommu *iommu = get_irq_data(irq);
1067 unsigned long flag;
1068
1069 /* unmask it */
1070 spin_lock_irqsave(&iommu->register_lock, flag);
1071 writel(0, iommu->reg + DMAR_FECTL_REG);
1072 /* Read a reg to force flush the post write */
1073 readl(iommu->reg + DMAR_FECTL_REG);
1074 spin_unlock_irqrestore(&iommu->register_lock, flag);
1075}
1076
1077void dmar_msi_mask(unsigned int irq)
1078{
1079 unsigned long flag;
1080 struct intel_iommu *iommu = get_irq_data(irq);
1081
1082 /* mask it */
1083 spin_lock_irqsave(&iommu->register_lock, flag);
1084 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1085 /* Read a reg to force flush the post write */
1086 readl(iommu->reg + DMAR_FECTL_REG);
1087 spin_unlock_irqrestore(&iommu->register_lock, flag);
1088}
1089
1090void dmar_msi_write(int irq, struct msi_msg *msg)
1091{
1092 struct intel_iommu *iommu = get_irq_data(irq);
1093 unsigned long flag;
1094
1095 spin_lock_irqsave(&iommu->register_lock, flag);
1096 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1097 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1098 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1099 spin_unlock_irqrestore(&iommu->register_lock, flag);
1100}
1101
1102void dmar_msi_read(int irq, struct msi_msg *msg)
1103{
1104 struct intel_iommu *iommu = get_irq_data(irq);
1105 unsigned long flag;
1106
1107 spin_lock_irqsave(&iommu->register_lock, flag);
1108 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1109 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1110 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1111 spin_unlock_irqrestore(&iommu->register_lock, flag);
1112}
1113
1114static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 1115 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 1116{
d94afc6c 1117 const char *reason;
3460a6d9
KA
1118
1119 reason = dmar_get_fault_reason(fault_reason);
1120
1121 printk(KERN_ERR
1122 "DMAR:[%s] Request device [%02x:%02x.%d] "
1123 "fault addr %llx \n"
1124 "DMAR:[fault reason %02d] %s\n",
1125 (type ? "DMA Read" : "DMA Write"),
1126 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1127 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1128 return 0;
1129}
1130
1131#define PRIMARY_FAULT_REG_LEN (16)
1132static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1133{
1134 struct intel_iommu *iommu = dev_id;
1135 int reg, fault_index;
1136 u32 fault_status;
1137 unsigned long flag;
1138
1139 spin_lock_irqsave(&iommu->register_lock, flag);
1140 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1141
1142 /* TBD: ignore advanced fault log currently */
1143 if (!(fault_status & DMA_FSTS_PPF))
1144 goto clear_overflow;
1145
1146 fault_index = dma_fsts_fault_record_index(fault_status);
1147 reg = cap_fault_reg_offset(iommu->cap);
1148 while (1) {
1149 u8 fault_reason;
1150 u16 source_id;
1151 u64 guest_addr;
1152 int type;
1153 u32 data;
1154
1155 /* highest 32 bits */
1156 data = readl(iommu->reg + reg +
1157 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1158 if (!(data & DMA_FRCD_F))
1159 break;
1160
1161 fault_reason = dma_frcd_fault_reason(data);
1162 type = dma_frcd_type(data);
1163
1164 data = readl(iommu->reg + reg +
1165 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1166 source_id = dma_frcd_source_id(data);
1167
1168 guest_addr = dmar_readq(iommu->reg + reg +
1169 fault_index * PRIMARY_FAULT_REG_LEN);
1170 guest_addr = dma_frcd_page_addr(guest_addr);
1171 /* clear the fault */
1172 writel(DMA_FRCD_F, iommu->reg + reg +
1173 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1174
1175 spin_unlock_irqrestore(&iommu->register_lock, flag);
1176
1177 iommu_page_fault_do_one(iommu, type, fault_reason,
1178 source_id, guest_addr);
1179
1180 fault_index++;
1181 if (fault_index > cap_num_fault_regs(iommu->cap))
1182 fault_index = 0;
1183 spin_lock_irqsave(&iommu->register_lock, flag);
1184 }
1185clear_overflow:
1186 /* clear primary fault overflow */
1187 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1188 if (fault_status & DMA_FSTS_PFO)
1189 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1190
1191 spin_unlock_irqrestore(&iommu->register_lock, flag);
1192 return IRQ_HANDLED;
1193}
1194
1195int dmar_set_interrupt(struct intel_iommu *iommu)
1196{
1197 int irq, ret;
1198
1199 irq = create_irq();
1200 if (!irq) {
1201 printk(KERN_ERR "IOMMU: no free vectors\n");
1202 return -EINVAL;
1203 }
1204
1205 set_irq_data(irq, iommu);
1206 iommu->irq = irq;
1207
1208 ret = arch_setup_dmar_msi(irq);
1209 if (ret) {
1210 set_irq_data(irq, NULL);
1211 iommu->irq = 0;
1212 destroy_irq(irq);
1213 return 0;
1214 }
1215
1216 /* Force fault register is cleared */
1217 iommu_page_fault(irq, iommu);
1218
1219 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1220 if (ret)
1221 printk(KERN_ERR "IOMMU: can't request irq\n");
1222 return ret;
1223}
1224
ba395927
KA
1225static int iommu_init_domains(struct intel_iommu *iommu)
1226{
1227 unsigned long ndomains;
1228 unsigned long nlongs;
1229
1230 ndomains = cap_ndoms(iommu->cap);
1231 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1232 nlongs = BITS_TO_LONGS(ndomains);
1233
1234 /* TBD: there might be 64K domains,
1235 * consider other allocation for future chip
1236 */
1237 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1238 if (!iommu->domain_ids) {
1239 printk(KERN_ERR "Allocating domain id array failed\n");
1240 return -ENOMEM;
1241 }
1242 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1243 GFP_KERNEL);
1244 if (!iommu->domains) {
1245 printk(KERN_ERR "Allocating domain array failed\n");
1246 kfree(iommu->domain_ids);
1247 return -ENOMEM;
1248 }
1249
e61d98d8
SS
1250 spin_lock_init(&iommu->lock);
1251
ba395927
KA
1252 /*
1253 * if Caching mode is set, then invalid translations are tagged
1254 * with domainid 0. Hence we need to pre-allocate it.
1255 */
1256 if (cap_caching_mode(iommu->cap))
1257 set_bit(0, iommu->domain_ids);
1258 return 0;
1259}
ba395927 1260
ba395927
KA
1261
1262static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1263static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1264
1265void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1266{
1267 struct dmar_domain *domain;
1268 int i;
c7151a8d 1269 unsigned long flags;
ba395927 1270
ba395927
KA
1271 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1272 for (; i < cap_ndoms(iommu->cap); ) {
1273 domain = iommu->domains[i];
1274 clear_bit(i, iommu->domain_ids);
c7151a8d
WH
1275
1276 spin_lock_irqsave(&domain->iommu_lock, flags);
5e98c4b1
WH
1277 if (--domain->iommu_count == 0) {
1278 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1279 vm_domain_exit(domain);
1280 else
1281 domain_exit(domain);
1282 }
c7151a8d
WH
1283 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1284
ba395927
KA
1285 i = find_next_bit(iommu->domain_ids,
1286 cap_ndoms(iommu->cap), i+1);
1287 }
1288
1289 if (iommu->gcmd & DMA_GCMD_TE)
1290 iommu_disable_translation(iommu);
1291
1292 if (iommu->irq) {
1293 set_irq_data(iommu->irq, NULL);
1294 /* This will mask the irq */
1295 free_irq(iommu->irq, iommu);
1296 destroy_irq(iommu->irq);
1297 }
1298
1299 kfree(iommu->domains);
1300 kfree(iommu->domain_ids);
1301
d9630fe9
WH
1302 g_iommus[iommu->seq_id] = NULL;
1303
1304 /* if all iommus are freed, free g_iommus */
1305 for (i = 0; i < g_num_of_iommus; i++) {
1306 if (g_iommus[i])
1307 break;
1308 }
1309
1310 if (i == g_num_of_iommus)
1311 kfree(g_iommus);
1312
ba395927
KA
1313 /* free context mapping */
1314 free_context_table(iommu);
ba395927
KA
1315}
1316
1317static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1318{
1319 unsigned long num;
1320 unsigned long ndomains;
1321 struct dmar_domain *domain;
1322 unsigned long flags;
1323
1324 domain = alloc_domain_mem();
1325 if (!domain)
1326 return NULL;
1327
1328 ndomains = cap_ndoms(iommu->cap);
1329
1330 spin_lock_irqsave(&iommu->lock, flags);
1331 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1332 if (num >= ndomains) {
1333 spin_unlock_irqrestore(&iommu->lock, flags);
1334 free_domain_mem(domain);
1335 printk(KERN_ERR "IOMMU: no free domain ids\n");
1336 return NULL;
1337 }
1338
1339 set_bit(num, iommu->domain_ids);
1340 domain->id = num;
8c11e798
WH
1341 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1342 set_bit(iommu->seq_id, &domain->iommu_bmp);
d71a2f33 1343 domain->flags = 0;
ba395927
KA
1344 iommu->domains[num] = domain;
1345 spin_unlock_irqrestore(&iommu->lock, flags);
1346
1347 return domain;
1348}
1349
1350static void iommu_free_domain(struct dmar_domain *domain)
1351{
1352 unsigned long flags;
8c11e798
WH
1353 struct intel_iommu *iommu;
1354
1355 iommu = domain_get_iommu(domain);
ba395927 1356
8c11e798
WH
1357 spin_lock_irqsave(&iommu->lock, flags);
1358 clear_bit(domain->id, iommu->domain_ids);
1359 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1360}
1361
1362static struct iova_domain reserved_iova_list;
8a443df4
MG
1363static struct lock_class_key reserved_alloc_key;
1364static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1365
1366static void dmar_init_reserved_ranges(void)
1367{
1368 struct pci_dev *pdev = NULL;
1369 struct iova *iova;
1370 int i;
1371 u64 addr, size;
1372
f661197e 1373 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1374
8a443df4
MG
1375 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1376 &reserved_alloc_key);
1377 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1378 &reserved_rbtree_key);
1379
ba395927
KA
1380 /* IOAPIC ranges shouldn't be accessed by DMA */
1381 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1382 IOVA_PFN(IOAPIC_RANGE_END));
1383 if (!iova)
1384 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1385
1386 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1387 for_each_pci_dev(pdev) {
1388 struct resource *r;
1389
1390 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1391 r = &pdev->resource[i];
1392 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1393 continue;
1394 addr = r->start;
5b6985ce 1395 addr &= PAGE_MASK;
ba395927 1396 size = r->end - addr;
5b6985ce 1397 size = PAGE_ALIGN(size);
ba395927
KA
1398 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1399 IOVA_PFN(size + addr) - 1);
1400 if (!iova)
1401 printk(KERN_ERR "Reserve iova failed\n");
1402 }
1403 }
1404
1405}
1406
1407static void domain_reserve_special_ranges(struct dmar_domain *domain)
1408{
1409 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1410}
1411
1412static inline int guestwidth_to_adjustwidth(int gaw)
1413{
1414 int agaw;
1415 int r = (gaw - 12) % 9;
1416
1417 if (r == 0)
1418 agaw = gaw;
1419 else
1420 agaw = gaw + 9 - r;
1421 if (agaw > 64)
1422 agaw = 64;
1423 return agaw;
1424}
1425
1426static int domain_init(struct dmar_domain *domain, int guest_width)
1427{
1428 struct intel_iommu *iommu;
1429 int adjust_width, agaw;
1430 unsigned long sagaw;
1431
f661197e 1432 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927 1433 spin_lock_init(&domain->mapping_lock);
c7151a8d 1434 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1435
1436 domain_reserve_special_ranges(domain);
1437
1438 /* calculate AGAW */
8c11e798 1439 iommu = domain_get_iommu(domain);
ba395927
KA
1440 if (guest_width > cap_mgaw(iommu->cap))
1441 guest_width = cap_mgaw(iommu->cap);
1442 domain->gaw = guest_width;
1443 adjust_width = guestwidth_to_adjustwidth(guest_width);
1444 agaw = width_to_agaw(adjust_width);
1445 sagaw = cap_sagaw(iommu->cap);
1446 if (!test_bit(agaw, &sagaw)) {
1447 /* hardware doesn't support it, choose a bigger one */
1448 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1449 agaw = find_next_bit(&sagaw, 5, agaw);
1450 if (agaw >= 5)
1451 return -ENODEV;
1452 }
1453 domain->agaw = agaw;
1454 INIT_LIST_HEAD(&domain->devices);
1455
8e604097
WH
1456 if (ecap_coherent(iommu->ecap))
1457 domain->iommu_coherency = 1;
1458 else
1459 domain->iommu_coherency = 0;
1460
58c610bd
SY
1461 if (ecap_sc_support(iommu->ecap))
1462 domain->iommu_snooping = 1;
1463 else
1464 domain->iommu_snooping = 0;
1465
c7151a8d
WH
1466 domain->iommu_count = 1;
1467
ba395927
KA
1468 /* always allocate the top pgd */
1469 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1470 if (!domain->pgd)
1471 return -ENOMEM;
5b6985ce 1472 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1473 return 0;
1474}
1475
1476static void domain_exit(struct dmar_domain *domain)
1477{
1478 u64 end;
1479
1480 /* Domain 0 is reserved, so dont process it */
1481 if (!domain)
1482 return;
1483
1484 domain_remove_dev_info(domain);
1485 /* destroy iovas */
1486 put_iova_domain(&domain->iovad);
1487 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1488 end = end & (~PAGE_MASK);
ba395927
KA
1489
1490 /* clear ptes */
1491 dma_pte_clear_range(domain, 0, end);
1492
1493 /* free page tables */
1494 dma_pte_free_pagetable(domain, 0, end);
1495
1496 iommu_free_domain(domain);
1497 free_domain_mem(domain);
1498}
1499
1500static int domain_context_mapping_one(struct dmar_domain *domain,
1501 u8 bus, u8 devfn)
1502{
1503 struct context_entry *context;
ba395927 1504 unsigned long flags;
5331fe6f 1505 struct intel_iommu *iommu;
ea6606b0
WH
1506 struct dma_pte *pgd;
1507 unsigned long num;
1508 unsigned long ndomains;
1509 int id;
1510 int agaw;
ba395927
KA
1511
1512 pr_debug("Set context mapping for %02x:%02x.%d\n",
1513 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1514 BUG_ON(!domain->pgd);
5331fe6f
WH
1515
1516 iommu = device_to_iommu(bus, devfn);
1517 if (!iommu)
1518 return -ENODEV;
1519
ba395927
KA
1520 context = device_to_context_entry(iommu, bus, devfn);
1521 if (!context)
1522 return -ENOMEM;
1523 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1524 if (context_present(context)) {
ba395927
KA
1525 spin_unlock_irqrestore(&iommu->lock, flags);
1526 return 0;
1527 }
1528
ea6606b0
WH
1529 id = domain->id;
1530 pgd = domain->pgd;
1531
1532 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
1533 int found = 0;
1534
1535 /* find an available domain id for this device in iommu */
1536 ndomains = cap_ndoms(iommu->cap);
1537 num = find_first_bit(iommu->domain_ids, ndomains);
1538 for (; num < ndomains; ) {
1539 if (iommu->domains[num] == domain) {
1540 id = num;
1541 found = 1;
1542 break;
1543 }
1544 num = find_next_bit(iommu->domain_ids,
1545 cap_ndoms(iommu->cap), num+1);
1546 }
1547
1548 if (found == 0) {
1549 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1550 if (num >= ndomains) {
1551 spin_unlock_irqrestore(&iommu->lock, flags);
1552 printk(KERN_ERR "IOMMU: no free domain ids\n");
1553 return -EFAULT;
1554 }
1555
1556 set_bit(num, iommu->domain_ids);
1557 iommu->domains[num] = domain;
1558 id = num;
1559 }
1560
1561 /* Skip top levels of page tables for
1562 * iommu which has less agaw than default.
1563 */
1564 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1565 pgd = phys_to_virt(dma_pte_addr(pgd));
1566 if (!dma_pte_present(pgd)) {
1567 spin_unlock_irqrestore(&iommu->lock, flags);
1568 return -ENOMEM;
1569 }
1570 }
1571 }
1572
1573 context_set_domain_id(context, id);
1574 context_set_address_width(context, iommu->agaw);
1575 context_set_address_root(context, virt_to_phys(pgd));
c07e7d21
MM
1576 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1577 context_set_fault_enable(context);
1578 context_set_present(context);
5331fe6f 1579 domain_flush_cache(domain, context, sizeof(*context));
ba395927
KA
1580
1581 /* it's a non-present to present mapping */
a77b67d4
YS
1582 if (iommu->flush.flush_context(iommu, domain->id,
1583 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1584 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1585 iommu_flush_write_buffer(iommu);
1586 else
a77b67d4
YS
1587 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1588
ba395927 1589 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1590
1591 spin_lock_irqsave(&domain->iommu_lock, flags);
1592 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1593 domain->iommu_count++;
58c610bd 1594 domain_update_iommu_cap(domain);
c7151a8d
WH
1595 }
1596 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1597 return 0;
1598}
1599
1600static int
1601domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1602{
1603 int ret;
1604 struct pci_dev *tmp, *parent;
1605
1606 ret = domain_context_mapping_one(domain, pdev->bus->number,
1607 pdev->devfn);
1608 if (ret)
1609 return ret;
1610
1611 /* dependent device mapping */
1612 tmp = pci_find_upstream_pcie_bridge(pdev);
1613 if (!tmp)
1614 return 0;
1615 /* Secondary interface's bus number and devfn 0 */
1616 parent = pdev->bus->self;
1617 while (parent != tmp) {
1618 ret = domain_context_mapping_one(domain, parent->bus->number,
1619 parent->devfn);
1620 if (ret)
1621 return ret;
1622 parent = parent->bus->self;
1623 }
1624 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1625 return domain_context_mapping_one(domain,
1626 tmp->subordinate->number, 0);
1627 else /* this is a legacy PCI bridge */
1628 return domain_context_mapping_one(domain,
1629 tmp->bus->number, tmp->devfn);
1630}
1631
5331fe6f 1632static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1633{
1634 int ret;
1635 struct pci_dev *tmp, *parent;
5331fe6f
WH
1636 struct intel_iommu *iommu;
1637
1638 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1639 if (!iommu)
1640 return -ENODEV;
ba395927 1641
8c11e798 1642 ret = device_context_mapped(iommu,
ba395927
KA
1643 pdev->bus->number, pdev->devfn);
1644 if (!ret)
1645 return ret;
1646 /* dependent device mapping */
1647 tmp = pci_find_upstream_pcie_bridge(pdev);
1648 if (!tmp)
1649 return ret;
1650 /* Secondary interface's bus number and devfn 0 */
1651 parent = pdev->bus->self;
1652 while (parent != tmp) {
8c11e798 1653 ret = device_context_mapped(iommu, parent->bus->number,
ba395927
KA
1654 parent->devfn);
1655 if (!ret)
1656 return ret;
1657 parent = parent->bus->self;
1658 }
1659 if (tmp->is_pcie)
8c11e798 1660 return device_context_mapped(iommu,
ba395927
KA
1661 tmp->subordinate->number, 0);
1662 else
8c11e798 1663 return device_context_mapped(iommu,
ba395927
KA
1664 tmp->bus->number, tmp->devfn);
1665}
1666
1667static int
1668domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1669 u64 hpa, size_t size, int prot)
1670{
1671 u64 start_pfn, end_pfn;
1672 struct dma_pte *pte;
1673 int index;
5b6985ce
FY
1674 int addr_width = agaw_to_width(domain->agaw);
1675
1676 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1677
1678 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1679 return -EINVAL;
5b6985ce
FY
1680 iova &= PAGE_MASK;
1681 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1682 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1683 index = 0;
1684 while (start_pfn < end_pfn) {
5b6985ce 1685 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1686 if (!pte)
1687 return -ENOMEM;
1688 /* We don't need lock here, nobody else
1689 * touches the iova range
1690 */
19c239ce
MM
1691 BUG_ON(dma_pte_addr(pte));
1692 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1693 dma_set_pte_prot(pte, prot);
9cf06697
SY
1694 if (prot & DMA_PTE_SNP)
1695 dma_set_pte_snp(pte);
5331fe6f 1696 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
1697 start_pfn++;
1698 index++;
1699 }
1700 return 0;
1701}
1702
c7151a8d 1703static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1704{
c7151a8d
WH
1705 if (!iommu)
1706 return;
8c11e798
WH
1707
1708 clear_context_table(iommu, bus, devfn);
1709 iommu->flush.flush_context(iommu, 0, 0, 0,
a77b67d4 1710 DMA_CCMD_GLOBAL_INVL, 0);
8c11e798 1711 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
a77b67d4 1712 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1713}
1714
1715static void domain_remove_dev_info(struct dmar_domain *domain)
1716{
1717 struct device_domain_info *info;
1718 unsigned long flags;
c7151a8d 1719 struct intel_iommu *iommu;
ba395927
KA
1720
1721 spin_lock_irqsave(&device_domain_lock, flags);
1722 while (!list_empty(&domain->devices)) {
1723 info = list_entry(domain->devices.next,
1724 struct device_domain_info, link);
1725 list_del(&info->link);
1726 list_del(&info->global);
1727 if (info->dev)
358dd8ac 1728 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1729 spin_unlock_irqrestore(&device_domain_lock, flags);
1730
c7151a8d
WH
1731 iommu = device_to_iommu(info->bus, info->devfn);
1732 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1733 free_devinfo_mem(info);
1734
1735 spin_lock_irqsave(&device_domain_lock, flags);
1736 }
1737 spin_unlock_irqrestore(&device_domain_lock, flags);
1738}
1739
1740/*
1741 * find_domain
358dd8ac 1742 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1743 */
38717946 1744static struct dmar_domain *
ba395927
KA
1745find_domain(struct pci_dev *pdev)
1746{
1747 struct device_domain_info *info;
1748
1749 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1750 info = pdev->dev.archdata.iommu;
ba395927
KA
1751 if (info)
1752 return info->domain;
1753 return NULL;
1754}
1755
ba395927
KA
1756/* domain is initialized */
1757static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1758{
1759 struct dmar_domain *domain, *found = NULL;
1760 struct intel_iommu *iommu;
1761 struct dmar_drhd_unit *drhd;
1762 struct device_domain_info *info, *tmp;
1763 struct pci_dev *dev_tmp;
1764 unsigned long flags;
1765 int bus = 0, devfn = 0;
1766
1767 domain = find_domain(pdev);
1768 if (domain)
1769 return domain;
1770
1771 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1772 if (dev_tmp) {
1773 if (dev_tmp->is_pcie) {
1774 bus = dev_tmp->subordinate->number;
1775 devfn = 0;
1776 } else {
1777 bus = dev_tmp->bus->number;
1778 devfn = dev_tmp->devfn;
1779 }
1780 spin_lock_irqsave(&device_domain_lock, flags);
1781 list_for_each_entry(info, &device_domain_list, global) {
1782 if (info->bus == bus && info->devfn == devfn) {
1783 found = info->domain;
1784 break;
1785 }
1786 }
1787 spin_unlock_irqrestore(&device_domain_lock, flags);
1788 /* pcie-pci bridge already has a domain, uses it */
1789 if (found) {
1790 domain = found;
1791 goto found_domain;
1792 }
1793 }
1794
1795 /* Allocate new domain for the device */
1796 drhd = dmar_find_matched_drhd_unit(pdev);
1797 if (!drhd) {
1798 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1799 pci_name(pdev));
1800 return NULL;
1801 }
1802 iommu = drhd->iommu;
1803
1804 domain = iommu_alloc_domain(iommu);
1805 if (!domain)
1806 goto error;
1807
1808 if (domain_init(domain, gaw)) {
1809 domain_exit(domain);
1810 goto error;
1811 }
1812
1813 /* register pcie-to-pci device */
1814 if (dev_tmp) {
1815 info = alloc_devinfo_mem();
1816 if (!info) {
1817 domain_exit(domain);
1818 goto error;
1819 }
1820 info->bus = bus;
1821 info->devfn = devfn;
1822 info->dev = NULL;
1823 info->domain = domain;
1824 /* This domain is shared by devices under p2p bridge */
3b5410e7 1825 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1826
1827 /* pcie-to-pci bridge already has a domain, uses it */
1828 found = NULL;
1829 spin_lock_irqsave(&device_domain_lock, flags);
1830 list_for_each_entry(tmp, &device_domain_list, global) {
1831 if (tmp->bus == bus && tmp->devfn == devfn) {
1832 found = tmp->domain;
1833 break;
1834 }
1835 }
1836 if (found) {
1837 free_devinfo_mem(info);
1838 domain_exit(domain);
1839 domain = found;
1840 } else {
1841 list_add(&info->link, &domain->devices);
1842 list_add(&info->global, &device_domain_list);
1843 }
1844 spin_unlock_irqrestore(&device_domain_lock, flags);
1845 }
1846
1847found_domain:
1848 info = alloc_devinfo_mem();
1849 if (!info)
1850 goto error;
1851 info->bus = pdev->bus->number;
1852 info->devfn = pdev->devfn;
1853 info->dev = pdev;
1854 info->domain = domain;
1855 spin_lock_irqsave(&device_domain_lock, flags);
1856 /* somebody is fast */
1857 found = find_domain(pdev);
1858 if (found != NULL) {
1859 spin_unlock_irqrestore(&device_domain_lock, flags);
1860 if (found != domain) {
1861 domain_exit(domain);
1862 domain = found;
1863 }
1864 free_devinfo_mem(info);
1865 return domain;
1866 }
1867 list_add(&info->link, &domain->devices);
1868 list_add(&info->global, &device_domain_list);
358dd8ac 1869 pdev->dev.archdata.iommu = info;
ba395927
KA
1870 spin_unlock_irqrestore(&device_domain_lock, flags);
1871 return domain;
1872error:
1873 /* recheck it here, maybe others set it */
1874 return find_domain(pdev);
1875}
1876
5b6985ce
FY
1877static int iommu_prepare_identity_map(struct pci_dev *pdev,
1878 unsigned long long start,
1879 unsigned long long end)
ba395927
KA
1880{
1881 struct dmar_domain *domain;
1882 unsigned long size;
5b6985ce 1883 unsigned long long base;
ba395927
KA
1884 int ret;
1885
1886 printk(KERN_INFO
1887 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1888 pci_name(pdev), start, end);
1889 /* page table init */
1890 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1891 if (!domain)
1892 return -ENOMEM;
1893
1894 /* The address might not be aligned */
5b6985ce 1895 base = start & PAGE_MASK;
ba395927 1896 size = end - base;
5b6985ce 1897 size = PAGE_ALIGN(size);
ba395927
KA
1898 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1899 IOVA_PFN(base + size) - 1)) {
1900 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1901 ret = -ENOMEM;
1902 goto error;
1903 }
1904
1905 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1906 size, base, pci_name(pdev));
1907 /*
1908 * RMRR range might have overlap with physical memory range,
1909 * clear it first
1910 */
1911 dma_pte_clear_range(domain, base, base + size);
1912
1913 ret = domain_page_mapping(domain, base, base, size,
1914 DMA_PTE_READ|DMA_PTE_WRITE);
1915 if (ret)
1916 goto error;
1917
1918 /* context entry init */
1919 ret = domain_context_mapping(domain, pdev);
1920 if (!ret)
1921 return 0;
1922error:
1923 domain_exit(domain);
1924 return ret;
1925
1926}
1927
1928static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1929 struct pci_dev *pdev)
1930{
358dd8ac 1931 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1932 return 0;
1933 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1934 rmrr->end_address + 1);
1935}
1936
e820482c 1937#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1938struct iommu_prepare_data {
1939 struct pci_dev *pdev;
1940 int ret;
1941};
1942
1943static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1944 unsigned long end_pfn, void *datax)
1945{
1946 struct iommu_prepare_data *data;
1947
1948 data = (struct iommu_prepare_data *)datax;
1949
1950 data->ret = iommu_prepare_identity_map(data->pdev,
1951 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1952 return data->ret;
1953
1954}
1955
1956static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1957{
1958 int nid;
1959 struct iommu_prepare_data data;
1960
1961 data.pdev = pdev;
1962 data.ret = 0;
1963
1964 for_each_online_node(nid) {
1965 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1966 if (data.ret)
1967 return data.ret;
1968 }
1969 return data.ret;
1970}
1971
e820482c
KA
1972static void __init iommu_prepare_gfx_mapping(void)
1973{
1974 struct pci_dev *pdev = NULL;
e820482c
KA
1975 int ret;
1976
1977 for_each_pci_dev(pdev) {
358dd8ac 1978 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1979 !IS_GFX_DEVICE(pdev))
1980 continue;
1981 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1982 pci_name(pdev));
d52d53b8
YL
1983 ret = iommu_prepare_with_active_regions(pdev);
1984 if (ret)
1985 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1986 }
1987}
2abd7e16
MM
1988#else /* !CONFIG_DMAR_GFX_WA */
1989static inline void iommu_prepare_gfx_mapping(void)
1990{
1991 return;
1992}
e820482c
KA
1993#endif
1994
49a0429e
KA
1995#ifdef CONFIG_DMAR_FLOPPY_WA
1996static inline void iommu_prepare_isa(void)
1997{
1998 struct pci_dev *pdev;
1999 int ret;
2000
2001 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2002 if (!pdev)
2003 return;
2004
2005 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
2006 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
2007
2008 if (ret)
2009 printk("IOMMU: Failed to create 0-64M identity map, "
2010 "floppy might not work\n");
2011
2012}
2013#else
2014static inline void iommu_prepare_isa(void)
2015{
2016 return;
2017}
2018#endif /* !CONFIG_DMAR_FLPY_WA */
2019
519a0549 2020static int __init init_dmars(void)
ba395927
KA
2021{
2022 struct dmar_drhd_unit *drhd;
2023 struct dmar_rmrr_unit *rmrr;
2024 struct pci_dev *pdev;
2025 struct intel_iommu *iommu;
80b20dd8 2026 int i, ret, unit = 0;
ba395927
KA
2027
2028 /*
2029 * for each drhd
2030 * allocate root
2031 * initialize and program root entry to not present
2032 * endfor
2033 */
2034 for_each_drhd_unit(drhd) {
5e0d2a6f 2035 g_num_of_iommus++;
2036 /*
2037 * lock not needed as this is only incremented in the single
2038 * threaded kernel __init code path all other access are read
2039 * only
2040 */
2041 }
2042
d9630fe9
WH
2043 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2044 GFP_KERNEL);
2045 if (!g_iommus) {
2046 printk(KERN_ERR "Allocating global iommu array failed\n");
2047 ret = -ENOMEM;
2048 goto error;
2049 }
2050
80b20dd8 2051 deferred_flush = kzalloc(g_num_of_iommus *
2052 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2053 if (!deferred_flush) {
d9630fe9 2054 kfree(g_iommus);
5e0d2a6f 2055 ret = -ENOMEM;
2056 goto error;
2057 }
2058
5e0d2a6f 2059 for_each_drhd_unit(drhd) {
2060 if (drhd->ignored)
2061 continue;
1886e8a9
SS
2062
2063 iommu = drhd->iommu;
d9630fe9 2064 g_iommus[iommu->seq_id] = iommu;
ba395927 2065
e61d98d8
SS
2066 ret = iommu_init_domains(iommu);
2067 if (ret)
2068 goto error;
2069
ba395927
KA
2070 /*
2071 * TBD:
2072 * we could share the same root & context tables
2073 * amoung all IOMMU's. Need to Split it later.
2074 */
2075 ret = iommu_alloc_root_entry(iommu);
2076 if (ret) {
2077 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2078 goto error;
2079 }
2080 }
2081
a77b67d4
YS
2082 for_each_drhd_unit(drhd) {
2083 if (drhd->ignored)
2084 continue;
2085
2086 iommu = drhd->iommu;
2087 if (dmar_enable_qi(iommu)) {
2088 /*
2089 * Queued Invalidate not enabled, use Register Based
2090 * Invalidate
2091 */
2092 iommu->flush.flush_context = __iommu_flush_context;
2093 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2094 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
2095 "invalidation\n",
2096 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2097 } else {
2098 iommu->flush.flush_context = qi_flush_context;
2099 iommu->flush.flush_iotlb = qi_flush_iotlb;
2100 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
2101 "invalidation\n",
2102 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2103 }
2104 }
2105
ba395927
KA
2106 /*
2107 * For each rmrr
2108 * for each dev attached to rmrr
2109 * do
2110 * locate drhd for dev, alloc domain for dev
2111 * allocate free domain
2112 * allocate page table entries for rmrr
2113 * if context not allocated for bus
2114 * allocate and init context
2115 * set present in root table for this bus
2116 * init context with domain, translation etc
2117 * endfor
2118 * endfor
2119 */
2120 for_each_rmrr_units(rmrr) {
ba395927
KA
2121 for (i = 0; i < rmrr->devices_cnt; i++) {
2122 pdev = rmrr->devices[i];
2123 /* some BIOS lists non-exist devices in DMAR table */
2124 if (!pdev)
2125 continue;
2126 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2127 if (ret)
2128 printk(KERN_ERR
2129 "IOMMU: mapping reserved region failed\n");
2130 }
2131 }
2132
e820482c
KA
2133 iommu_prepare_gfx_mapping();
2134
49a0429e
KA
2135 iommu_prepare_isa();
2136
ba395927
KA
2137 /*
2138 * for each drhd
2139 * enable fault log
2140 * global invalidate context cache
2141 * global invalidate iotlb
2142 * enable translation
2143 */
2144 for_each_drhd_unit(drhd) {
2145 if (drhd->ignored)
2146 continue;
2147 iommu = drhd->iommu;
2148 sprintf (iommu->name, "dmar%d", unit++);
2149
2150 iommu_flush_write_buffer(iommu);
2151
3460a6d9
KA
2152 ret = dmar_set_interrupt(iommu);
2153 if (ret)
2154 goto error;
2155
ba395927
KA
2156 iommu_set_root_entry(iommu);
2157
a77b67d4
YS
2158 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2159 0);
2160 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2161 0);
f8bab735 2162 iommu_disable_protect_mem_regions(iommu);
2163
ba395927
KA
2164 ret = iommu_enable_translation(iommu);
2165 if (ret)
2166 goto error;
2167 }
2168
2169 return 0;
2170error:
2171 for_each_drhd_unit(drhd) {
2172 if (drhd->ignored)
2173 continue;
2174 iommu = drhd->iommu;
2175 free_iommu(iommu);
2176 }
d9630fe9 2177 kfree(g_iommus);
ba395927
KA
2178 return ret;
2179}
2180
2181static inline u64 aligned_size(u64 host_addr, size_t size)
2182{
2183 u64 addr;
5b6985ce
FY
2184 addr = (host_addr & (~PAGE_MASK)) + size;
2185 return PAGE_ALIGN(addr);
ba395927
KA
2186}
2187
2188struct iova *
f76aec76 2189iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 2190{
ba395927
KA
2191 struct iova *piova;
2192
2193 /* Make sure it's in range */
ba395927 2194 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 2195 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
2196 return NULL;
2197
2198 piova = alloc_iova(&domain->iovad,
5b6985ce 2199 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
2200 return piova;
2201}
2202
f76aec76
KA
2203static struct iova *
2204__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 2205 size_t size, u64 dma_mask)
ba395927 2206{
ba395927 2207 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2208 struct iova *iova = NULL;
ba395927 2209
bb9e6d65
FT
2210 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2211 iova = iommu_alloc_iova(domain, size, dma_mask);
2212 else {
ba395927
KA
2213 /*
2214 * First try to allocate an io virtual address in
2215 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 2216 * from higher range
ba395927 2217 */
f76aec76 2218 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 2219 if (!iova)
bb9e6d65 2220 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
2221 }
2222
2223 if (!iova) {
2224 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
2225 return NULL;
2226 }
2227
2228 return iova;
2229}
2230
2231static struct dmar_domain *
2232get_valid_domain_for_dev(struct pci_dev *pdev)
2233{
2234 struct dmar_domain *domain;
2235 int ret;
2236
2237 domain = get_domain_for_dev(pdev,
2238 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2239 if (!domain) {
2240 printk(KERN_ERR
2241 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2242 return NULL;
ba395927
KA
2243 }
2244
2245 /* make sure context mapping is ok */
5331fe6f 2246 if (unlikely(!domain_context_mapped(pdev))) {
ba395927 2247 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
2248 if (ret) {
2249 printk(KERN_ERR
2250 "Domain context map for %s failed",
2251 pci_name(pdev));
4fe05bbc 2252 return NULL;
f76aec76 2253 }
ba395927
KA
2254 }
2255
f76aec76
KA
2256 return domain;
2257}
2258
bb9e6d65
FT
2259static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2260 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2261{
2262 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2263 struct dmar_domain *domain;
5b6985ce 2264 phys_addr_t start_paddr;
f76aec76
KA
2265 struct iova *iova;
2266 int prot = 0;
6865f0d1 2267 int ret;
8c11e798 2268 struct intel_iommu *iommu;
f76aec76
KA
2269
2270 BUG_ON(dir == DMA_NONE);
358dd8ac 2271 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 2272 return paddr;
f76aec76
KA
2273
2274 domain = get_valid_domain_for_dev(pdev);
2275 if (!domain)
2276 return 0;
2277
8c11e798 2278 iommu = domain_get_iommu(domain);
6865f0d1 2279 size = aligned_size((u64)paddr, size);
f76aec76 2280
bb9e6d65 2281 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
2282 if (!iova)
2283 goto error;
2284
5b6985ce 2285 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 2286
ba395927
KA
2287 /*
2288 * Check if DMAR supports zero-length reads on write only
2289 * mappings..
2290 */
2291 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2292 !cap_zlr(iommu->cap))
ba395927
KA
2293 prot |= DMA_PTE_READ;
2294 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2295 prot |= DMA_PTE_WRITE;
2296 /*
6865f0d1 2297 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2298 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2299 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2300 * is not a big problem
2301 */
6865f0d1 2302 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 2303 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
2304 if (ret)
2305 goto error;
2306
f76aec76 2307 /* it's a non-present to present mapping */
8c11e798 2308 ret = iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2309 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76 2310 if (ret)
8c11e798 2311 iommu_flush_write_buffer(iommu);
f76aec76 2312
5b6985ce 2313 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 2314
ba395927 2315error:
f76aec76
KA
2316 if (iova)
2317 __free_iova(&domain->iovad, iova);
ba395927 2318 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 2319 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2320 return 0;
2321}
2322
bb9e6d65
FT
2323dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2324 size_t size, int dir)
2325{
2326 return __intel_map_single(hwdev, paddr, size, dir,
2327 to_pci_dev(hwdev)->dma_mask);
2328}
2329
5e0d2a6f 2330static void flush_unmaps(void)
2331{
80b20dd8 2332 int i, j;
5e0d2a6f 2333
5e0d2a6f 2334 timer_on = 0;
2335
2336 /* just flush them all */
2337 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2338 struct intel_iommu *iommu = g_iommus[i];
2339 if (!iommu)
2340 continue;
c42d9f32 2341
a2bb8459 2342 if (deferred_flush[i].next) {
a77b67d4
YS
2343 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2344 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 2345 for (j = 0; j < deferred_flush[i].next; j++) {
2346 __free_iova(&deferred_flush[i].domain[j]->iovad,
2347 deferred_flush[i].iova[j]);
2348 }
2349 deferred_flush[i].next = 0;
2350 }
5e0d2a6f 2351 }
2352
5e0d2a6f 2353 list_size = 0;
5e0d2a6f 2354}
2355
2356static void flush_unmaps_timeout(unsigned long data)
2357{
80b20dd8 2358 unsigned long flags;
2359
2360 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2361 flush_unmaps();
80b20dd8 2362 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2363}
2364
2365static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2366{
2367 unsigned long flags;
80b20dd8 2368 int next, iommu_id;
8c11e798 2369 struct intel_iommu *iommu;
5e0d2a6f 2370
2371 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2372 if (list_size == HIGH_WATER_MARK)
2373 flush_unmaps();
2374
8c11e798
WH
2375 iommu = domain_get_iommu(dom);
2376 iommu_id = iommu->seq_id;
c42d9f32 2377
80b20dd8 2378 next = deferred_flush[iommu_id].next;
2379 deferred_flush[iommu_id].domain[next] = dom;
2380 deferred_flush[iommu_id].iova[next] = iova;
2381 deferred_flush[iommu_id].next++;
5e0d2a6f 2382
2383 if (!timer_on) {
2384 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2385 timer_on = 1;
2386 }
2387 list_size++;
2388 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2389}
2390
5b6985ce
FY
2391void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2392 int dir)
ba395927 2393{
ba395927 2394 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
2395 struct dmar_domain *domain;
2396 unsigned long start_addr;
ba395927 2397 struct iova *iova;
8c11e798 2398 struct intel_iommu *iommu;
ba395927 2399
358dd8ac 2400 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 2401 return;
ba395927
KA
2402 domain = find_domain(pdev);
2403 BUG_ON(!domain);
2404
8c11e798
WH
2405 iommu = domain_get_iommu(domain);
2406
ba395927 2407 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 2408 if (!iova)
ba395927 2409 return;
ba395927 2410
5b6985ce 2411 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2412 size = aligned_size((u64)dev_addr, size);
ba395927 2413
f76aec76 2414 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 2415 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 2416
f76aec76
KA
2417 /* clear the whole page */
2418 dma_pte_clear_range(domain, start_addr, start_addr + size);
2419 /* free page tables */
2420 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 2421 if (intel_iommu_strict) {
8c11e798 2422 if (iommu_flush_iotlb_psi(iommu,
5b6985ce 2423 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
8c11e798 2424 iommu_flush_write_buffer(iommu);
5e0d2a6f 2425 /* free iova */
2426 __free_iova(&domain->iovad, iova);
2427 } else {
2428 add_unmap(domain, iova);
2429 /*
2430 * queue up the release of the unmap to save the 1/6th of the
2431 * cpu used up by the iotlb flush operation...
2432 */
5e0d2a6f 2433 }
ba395927
KA
2434}
2435
5b6985ce
FY
2436void *intel_alloc_coherent(struct device *hwdev, size_t size,
2437 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2438{
2439 void *vaddr;
2440 int order;
2441
5b6985ce 2442 size = PAGE_ALIGN(size);
ba395927
KA
2443 order = get_order(size);
2444 flags &= ~(GFP_DMA | GFP_DMA32);
2445
2446 vaddr = (void *)__get_free_pages(flags, order);
2447 if (!vaddr)
2448 return NULL;
2449 memset(vaddr, 0, size);
2450
bb9e6d65
FT
2451 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2452 DMA_BIDIRECTIONAL,
2453 hwdev->coherent_dma_mask);
ba395927
KA
2454 if (*dma_handle)
2455 return vaddr;
2456 free_pages((unsigned long)vaddr, order);
2457 return NULL;
2458}
2459
5b6985ce
FY
2460void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2461 dma_addr_t dma_handle)
ba395927
KA
2462{
2463 int order;
2464
5b6985ce 2465 size = PAGE_ALIGN(size);
ba395927
KA
2466 order = get_order(size);
2467
2468 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2469 free_pages((unsigned long)vaddr, order);
2470}
2471
12d4d40e 2472#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2473
2474void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2475 int nelems, int dir)
ba395927
KA
2476{
2477 int i;
2478 struct pci_dev *pdev = to_pci_dev(hwdev);
2479 struct dmar_domain *domain;
f76aec76
KA
2480 unsigned long start_addr;
2481 struct iova *iova;
2482 size_t size = 0;
2483 void *addr;
c03ab37c 2484 struct scatterlist *sg;
8c11e798 2485 struct intel_iommu *iommu;
ba395927 2486
358dd8ac 2487 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2488 return;
2489
2490 domain = find_domain(pdev);
8c11e798
WH
2491 BUG_ON(!domain);
2492
2493 iommu = domain_get_iommu(domain);
ba395927 2494
c03ab37c 2495 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2496 if (!iova)
2497 return;
c03ab37c 2498 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2499 addr = SG_ENT_VIRT_ADDRESS(sg);
2500 size += aligned_size((u64)addr, sg->length);
2501 }
2502
5b6985ce 2503 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2504
2505 /* clear the whole page */
2506 dma_pte_clear_range(domain, start_addr, start_addr + size);
2507 /* free page tables */
2508 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2509
8c11e798 2510 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
5b6985ce 2511 size >> VTD_PAGE_SHIFT, 0))
8c11e798 2512 iommu_flush_write_buffer(iommu);
f76aec76
KA
2513
2514 /* free iova */
2515 __free_iova(&domain->iovad, iova);
ba395927
KA
2516}
2517
ba395927 2518static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2519 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2520{
2521 int i;
c03ab37c 2522 struct scatterlist *sg;
ba395927 2523
c03ab37c 2524 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2525 BUG_ON(!sg_page(sg));
c03ab37c
FT
2526 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2527 sg->dma_length = sg->length;
ba395927
KA
2528 }
2529 return nelems;
2530}
2531
5b6985ce
FY
2532int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2533 int dir)
ba395927
KA
2534{
2535 void *addr;
2536 int i;
ba395927
KA
2537 struct pci_dev *pdev = to_pci_dev(hwdev);
2538 struct dmar_domain *domain;
f76aec76
KA
2539 size_t size = 0;
2540 int prot = 0;
2541 size_t offset = 0;
2542 struct iova *iova = NULL;
2543 int ret;
c03ab37c 2544 struct scatterlist *sg;
f76aec76 2545 unsigned long start_addr;
8c11e798 2546 struct intel_iommu *iommu;
ba395927
KA
2547
2548 BUG_ON(dir == DMA_NONE);
358dd8ac 2549 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2550 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2551
f76aec76
KA
2552 domain = get_valid_domain_for_dev(pdev);
2553 if (!domain)
2554 return 0;
2555
8c11e798
WH
2556 iommu = domain_get_iommu(domain);
2557
c03ab37c 2558 for_each_sg(sglist, sg, nelems, i) {
ba395927 2559 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2560 addr = (void *)virt_to_phys(addr);
2561 size += aligned_size((u64)addr, sg->length);
2562 }
2563
bb9e6d65 2564 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2565 if (!iova) {
c03ab37c 2566 sglist->dma_length = 0;
f76aec76
KA
2567 return 0;
2568 }
2569
2570 /*
2571 * Check if DMAR supports zero-length reads on write only
2572 * mappings..
2573 */
2574 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2575 !cap_zlr(iommu->cap))
f76aec76
KA
2576 prot |= DMA_PTE_READ;
2577 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2578 prot |= DMA_PTE_WRITE;
2579
5b6985ce 2580 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2581 offset = 0;
c03ab37c 2582 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2583 addr = SG_ENT_VIRT_ADDRESS(sg);
2584 addr = (void *)virt_to_phys(addr);
2585 size = aligned_size((u64)addr, sg->length);
2586 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2587 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2588 size, prot);
2589 if (ret) {
2590 /* clear the page */
2591 dma_pte_clear_range(domain, start_addr,
2592 start_addr + offset);
2593 /* free page tables */
2594 dma_pte_free_pagetable(domain, start_addr,
2595 start_addr + offset);
2596 /* free iova */
2597 __free_iova(&domain->iovad, iova);
ba395927
KA
2598 return 0;
2599 }
f76aec76 2600 sg->dma_address = start_addr + offset +
5b6985ce 2601 ((u64)addr & (~PAGE_MASK));
ba395927 2602 sg->dma_length = sg->length;
f76aec76 2603 offset += size;
ba395927
KA
2604 }
2605
ba395927 2606 /* it's a non-present to present mapping */
8c11e798 2607 if (iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2608 start_addr, offset >> VTD_PAGE_SHIFT, 1))
8c11e798 2609 iommu_flush_write_buffer(iommu);
ba395927
KA
2610 return nelems;
2611}
2612
2613static struct dma_mapping_ops intel_dma_ops = {
2614 .alloc_coherent = intel_alloc_coherent,
2615 .free_coherent = intel_free_coherent,
2616 .map_single = intel_map_single,
2617 .unmap_single = intel_unmap_single,
2618 .map_sg = intel_map_sg,
2619 .unmap_sg = intel_unmap_sg,
2620};
2621
2622static inline int iommu_domain_cache_init(void)
2623{
2624 int ret = 0;
2625
2626 iommu_domain_cache = kmem_cache_create("iommu_domain",
2627 sizeof(struct dmar_domain),
2628 0,
2629 SLAB_HWCACHE_ALIGN,
2630
2631 NULL);
2632 if (!iommu_domain_cache) {
2633 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2634 ret = -ENOMEM;
2635 }
2636
2637 return ret;
2638}
2639
2640static inline int iommu_devinfo_cache_init(void)
2641{
2642 int ret = 0;
2643
2644 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2645 sizeof(struct device_domain_info),
2646 0,
2647 SLAB_HWCACHE_ALIGN,
ba395927
KA
2648 NULL);
2649 if (!iommu_devinfo_cache) {
2650 printk(KERN_ERR "Couldn't create devinfo cache\n");
2651 ret = -ENOMEM;
2652 }
2653
2654 return ret;
2655}
2656
2657static inline int iommu_iova_cache_init(void)
2658{
2659 int ret = 0;
2660
2661 iommu_iova_cache = kmem_cache_create("iommu_iova",
2662 sizeof(struct iova),
2663 0,
2664 SLAB_HWCACHE_ALIGN,
ba395927
KA
2665 NULL);
2666 if (!iommu_iova_cache) {
2667 printk(KERN_ERR "Couldn't create iova cache\n");
2668 ret = -ENOMEM;
2669 }
2670
2671 return ret;
2672}
2673
2674static int __init iommu_init_mempool(void)
2675{
2676 int ret;
2677 ret = iommu_iova_cache_init();
2678 if (ret)
2679 return ret;
2680
2681 ret = iommu_domain_cache_init();
2682 if (ret)
2683 goto domain_error;
2684
2685 ret = iommu_devinfo_cache_init();
2686 if (!ret)
2687 return ret;
2688
2689 kmem_cache_destroy(iommu_domain_cache);
2690domain_error:
2691 kmem_cache_destroy(iommu_iova_cache);
2692
2693 return -ENOMEM;
2694}
2695
2696static void __init iommu_exit_mempool(void)
2697{
2698 kmem_cache_destroy(iommu_devinfo_cache);
2699 kmem_cache_destroy(iommu_domain_cache);
2700 kmem_cache_destroy(iommu_iova_cache);
2701
2702}
2703
ba395927
KA
2704static void __init init_no_remapping_devices(void)
2705{
2706 struct dmar_drhd_unit *drhd;
2707
2708 for_each_drhd_unit(drhd) {
2709 if (!drhd->include_all) {
2710 int i;
2711 for (i = 0; i < drhd->devices_cnt; i++)
2712 if (drhd->devices[i] != NULL)
2713 break;
2714 /* ignore DMAR unit if no pci devices exist */
2715 if (i == drhd->devices_cnt)
2716 drhd->ignored = 1;
2717 }
2718 }
2719
2720 if (dmar_map_gfx)
2721 return;
2722
2723 for_each_drhd_unit(drhd) {
2724 int i;
2725 if (drhd->ignored || drhd->include_all)
2726 continue;
2727
2728 for (i = 0; i < drhd->devices_cnt; i++)
2729 if (drhd->devices[i] &&
2730 !IS_GFX_DEVICE(drhd->devices[i]))
2731 break;
2732
2733 if (i < drhd->devices_cnt)
2734 continue;
2735
2736 /* bypass IOMMU if it is just for gfx devices */
2737 drhd->ignored = 1;
2738 for (i = 0; i < drhd->devices_cnt; i++) {
2739 if (!drhd->devices[i])
2740 continue;
358dd8ac 2741 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2742 }
2743 }
2744}
2745
2746int __init intel_iommu_init(void)
2747{
2748 int ret = 0;
2749
ba395927
KA
2750 if (dmar_table_init())
2751 return -ENODEV;
2752
1886e8a9
SS
2753 if (dmar_dev_scope_init())
2754 return -ENODEV;
2755
2ae21010
SS
2756 /*
2757 * Check the need for DMA-remapping initialization now.
2758 * Above initialization will also be used by Interrupt-remapping.
2759 */
2760 if (no_iommu || swiotlb || dmar_disabled)
2761 return -ENODEV;
2762
ba395927
KA
2763 iommu_init_mempool();
2764 dmar_init_reserved_ranges();
2765
2766 init_no_remapping_devices();
2767
2768 ret = init_dmars();
2769 if (ret) {
2770 printk(KERN_ERR "IOMMU: dmar init failed\n");
2771 put_iova_domain(&reserved_iova_list);
2772 iommu_exit_mempool();
2773 return ret;
2774 }
2775 printk(KERN_INFO
2776 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2777
5e0d2a6f 2778 init_timer(&unmap_timer);
ba395927
KA
2779 force_iommu = 1;
2780 dma_ops = &intel_dma_ops;
a8bcbb0d
JR
2781
2782 register_iommu(&intel_iommu_ops);
2783
ba395927
KA
2784 return 0;
2785}
e820482c 2786
c7151a8d
WH
2787static int vm_domain_add_dev_info(struct dmar_domain *domain,
2788 struct pci_dev *pdev)
2789{
2790 struct device_domain_info *info;
2791 unsigned long flags;
2792
2793 info = alloc_devinfo_mem();
2794 if (!info)
2795 return -ENOMEM;
2796
2797 info->bus = pdev->bus->number;
2798 info->devfn = pdev->devfn;
2799 info->dev = pdev;
2800 info->domain = domain;
2801
2802 spin_lock_irqsave(&device_domain_lock, flags);
2803 list_add(&info->link, &domain->devices);
2804 list_add(&info->global, &device_domain_list);
2805 pdev->dev.archdata.iommu = info;
2806 spin_unlock_irqrestore(&device_domain_lock, flags);
2807
2808 return 0;
2809}
2810
2811static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2812 struct pci_dev *pdev)
2813{
2814 struct device_domain_info *info;
2815 struct intel_iommu *iommu;
2816 unsigned long flags;
2817 int found = 0;
2818 struct list_head *entry, *tmp;
2819
2820 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2821 if (!iommu)
2822 return;
2823
2824 spin_lock_irqsave(&device_domain_lock, flags);
2825 list_for_each_safe(entry, tmp, &domain->devices) {
2826 info = list_entry(entry, struct device_domain_info, link);
2827 if (info->bus == pdev->bus->number &&
2828 info->devfn == pdev->devfn) {
2829 list_del(&info->link);
2830 list_del(&info->global);
2831 if (info->dev)
2832 info->dev->dev.archdata.iommu = NULL;
2833 spin_unlock_irqrestore(&device_domain_lock, flags);
2834
2835 iommu_detach_dev(iommu, info->bus, info->devfn);
2836 free_devinfo_mem(info);
2837
2838 spin_lock_irqsave(&device_domain_lock, flags);
2839
2840 if (found)
2841 break;
2842 else
2843 continue;
2844 }
2845
2846 /* if there is no other devices under the same iommu
2847 * owned by this domain, clear this iommu in iommu_bmp
2848 * update iommu count and coherency
2849 */
2850 if (device_to_iommu(info->bus, info->devfn) == iommu)
2851 found = 1;
2852 }
2853
2854 if (found == 0) {
2855 unsigned long tmp_flags;
2856 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2857 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2858 domain->iommu_count--;
58c610bd 2859 domain_update_iommu_cap(domain);
c7151a8d
WH
2860 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2861 }
2862
2863 spin_unlock_irqrestore(&device_domain_lock, flags);
2864}
2865
2866static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2867{
2868 struct device_domain_info *info;
2869 struct intel_iommu *iommu;
2870 unsigned long flags1, flags2;
2871
2872 spin_lock_irqsave(&device_domain_lock, flags1);
2873 while (!list_empty(&domain->devices)) {
2874 info = list_entry(domain->devices.next,
2875 struct device_domain_info, link);
2876 list_del(&info->link);
2877 list_del(&info->global);
2878 if (info->dev)
2879 info->dev->dev.archdata.iommu = NULL;
2880
2881 spin_unlock_irqrestore(&device_domain_lock, flags1);
2882
2883 iommu = device_to_iommu(info->bus, info->devfn);
2884 iommu_detach_dev(iommu, info->bus, info->devfn);
2885
2886 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 2887 * and capabilities
c7151a8d
WH
2888 */
2889 spin_lock_irqsave(&domain->iommu_lock, flags2);
2890 if (test_and_clear_bit(iommu->seq_id,
2891 &domain->iommu_bmp)) {
2892 domain->iommu_count--;
58c610bd 2893 domain_update_iommu_cap(domain);
c7151a8d
WH
2894 }
2895 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2896
2897 free_devinfo_mem(info);
2898 spin_lock_irqsave(&device_domain_lock, flags1);
2899 }
2900 spin_unlock_irqrestore(&device_domain_lock, flags1);
2901}
2902
5e98c4b1
WH
2903/* domain id for virtual machine, it won't be set in context */
2904static unsigned long vm_domid;
2905
fe40f1e0
WH
2906static int vm_domain_min_agaw(struct dmar_domain *domain)
2907{
2908 int i;
2909 int min_agaw = domain->agaw;
2910
2911 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
2912 for (; i < g_num_of_iommus; ) {
2913 if (min_agaw > g_iommus[i]->agaw)
2914 min_agaw = g_iommus[i]->agaw;
2915
2916 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
2917 }
2918
2919 return min_agaw;
2920}
2921
5e98c4b1
WH
2922static struct dmar_domain *iommu_alloc_vm_domain(void)
2923{
2924 struct dmar_domain *domain;
2925
2926 domain = alloc_domain_mem();
2927 if (!domain)
2928 return NULL;
2929
2930 domain->id = vm_domid++;
2931 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
2932 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
2933
2934 return domain;
2935}
2936
2937static int vm_domain_init(struct dmar_domain *domain, int guest_width)
2938{
2939 int adjust_width;
2940
2941 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
2942 spin_lock_init(&domain->mapping_lock);
2943 spin_lock_init(&domain->iommu_lock);
2944
2945 domain_reserve_special_ranges(domain);
2946
2947 /* calculate AGAW */
2948 domain->gaw = guest_width;
2949 adjust_width = guestwidth_to_adjustwidth(guest_width);
2950 domain->agaw = width_to_agaw(adjust_width);
2951
2952 INIT_LIST_HEAD(&domain->devices);
2953
2954 domain->iommu_count = 0;
2955 domain->iommu_coherency = 0;
fe40f1e0 2956 domain->max_addr = 0;
5e98c4b1
WH
2957
2958 /* always allocate the top pgd */
2959 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
2960 if (!domain->pgd)
2961 return -ENOMEM;
2962 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
2963 return 0;
2964}
2965
2966static void iommu_free_vm_domain(struct dmar_domain *domain)
2967{
2968 unsigned long flags;
2969 struct dmar_drhd_unit *drhd;
2970 struct intel_iommu *iommu;
2971 unsigned long i;
2972 unsigned long ndomains;
2973
2974 for_each_drhd_unit(drhd) {
2975 if (drhd->ignored)
2976 continue;
2977 iommu = drhd->iommu;
2978
2979 ndomains = cap_ndoms(iommu->cap);
2980 i = find_first_bit(iommu->domain_ids, ndomains);
2981 for (; i < ndomains; ) {
2982 if (iommu->domains[i] == domain) {
2983 spin_lock_irqsave(&iommu->lock, flags);
2984 clear_bit(i, iommu->domain_ids);
2985 iommu->domains[i] = NULL;
2986 spin_unlock_irqrestore(&iommu->lock, flags);
2987 break;
2988 }
2989 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
2990 }
2991 }
2992}
2993
2994static void vm_domain_exit(struct dmar_domain *domain)
2995{
2996 u64 end;
2997
2998 /* Domain 0 is reserved, so dont process it */
2999 if (!domain)
3000 return;
3001
3002 vm_domain_remove_all_dev_info(domain);
3003 /* destroy iovas */
3004 put_iova_domain(&domain->iovad);
3005 end = DOMAIN_MAX_ADDR(domain->gaw);
3006 end = end & (~VTD_PAGE_MASK);
3007
3008 /* clear ptes */
3009 dma_pte_clear_range(domain, 0, end);
3010
3011 /* free page tables */
3012 dma_pte_free_pagetable(domain, 0, end);
3013
3014 iommu_free_vm_domain(domain);
3015 free_domain_mem(domain);
3016}
3017
5d450806 3018static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3019{
5d450806 3020 struct dmar_domain *dmar_domain;
38717946 3021
5d450806
JR
3022 dmar_domain = iommu_alloc_vm_domain();
3023 if (!dmar_domain) {
38717946 3024 printk(KERN_ERR
5d450806
JR
3025 "intel_iommu_domain_init: dmar_domain == NULL\n");
3026 return -ENOMEM;
38717946 3027 }
5d450806 3028 if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3029 printk(KERN_ERR
5d450806
JR
3030 "intel_iommu_domain_init() failed\n");
3031 vm_domain_exit(dmar_domain);
3032 return -ENOMEM;
38717946 3033 }
5d450806 3034 domain->priv = dmar_domain;
faa3d6f5 3035
5d450806 3036 return 0;
38717946 3037}
38717946 3038
5d450806 3039static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3040{
5d450806
JR
3041 struct dmar_domain *dmar_domain = domain->priv;
3042
3043 domain->priv = NULL;
3044 vm_domain_exit(dmar_domain);
38717946 3045}
38717946 3046
4c5478c9
JR
3047static int intel_iommu_attach_device(struct iommu_domain *domain,
3048 struct device *dev)
38717946 3049{
4c5478c9
JR
3050 struct dmar_domain *dmar_domain = domain->priv;
3051 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3052 struct intel_iommu *iommu;
3053 int addr_width;
3054 u64 end;
faa3d6f5
WH
3055 int ret;
3056
3057 /* normally pdev is not mapped */
3058 if (unlikely(domain_context_mapped(pdev))) {
3059 struct dmar_domain *old_domain;
3060
3061 old_domain = find_domain(pdev);
3062 if (old_domain) {
4c5478c9 3063 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
faa3d6f5
WH
3064 vm_domain_remove_one_dev_info(old_domain, pdev);
3065 else
3066 domain_remove_dev_info(old_domain);
3067 }
3068 }
3069
fe40f1e0
WH
3070 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
3071 if (!iommu)
3072 return -ENODEV;
3073
3074 /* check if this iommu agaw is sufficient for max mapped address */
3075 addr_width = agaw_to_width(iommu->agaw);
3076 end = DOMAIN_MAX_ADDR(addr_width);
3077 end = end & VTD_PAGE_MASK;
4c5478c9 3078 if (end < dmar_domain->max_addr) {
fe40f1e0
WH
3079 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3080 "sufficient for the mapped address (%llx)\n",
4c5478c9 3081 __func__, iommu->agaw, dmar_domain->max_addr);
fe40f1e0
WH
3082 return -EFAULT;
3083 }
3084
4c5478c9 3085 ret = domain_context_mapping(dmar_domain, pdev);
faa3d6f5
WH
3086 if (ret)
3087 return ret;
3088
4c5478c9 3089 ret = vm_domain_add_dev_info(dmar_domain, pdev);
faa3d6f5 3090 return ret;
38717946 3091}
38717946 3092
4c5478c9
JR
3093static void intel_iommu_detach_device(struct iommu_domain *domain,
3094 struct device *dev)
38717946 3095{
4c5478c9
JR
3096 struct dmar_domain *dmar_domain = domain->priv;
3097 struct pci_dev *pdev = to_pci_dev(dev);
3098
3099 vm_domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3100}
c7151a8d 3101
dde57a21
JR
3102static int intel_iommu_map_range(struct iommu_domain *domain,
3103 unsigned long iova, phys_addr_t hpa,
3104 size_t size, int iommu_prot)
faa3d6f5 3105{
dde57a21 3106 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
3107 u64 max_addr;
3108 int addr_width;
dde57a21 3109 int prot = 0;
faa3d6f5 3110 int ret;
fe40f1e0 3111
dde57a21
JR
3112 if (iommu_prot & IOMMU_READ)
3113 prot |= DMA_PTE_READ;
3114 if (iommu_prot & IOMMU_WRITE)
3115 prot |= DMA_PTE_WRITE;
9cf06697
SY
3116 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3117 prot |= DMA_PTE_SNP;
dde57a21 3118
fe40f1e0 3119 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
dde57a21 3120 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3121 int min_agaw;
3122 u64 end;
3123
3124 /* check if minimum agaw is sufficient for mapped address */
dde57a21 3125 min_agaw = vm_domain_min_agaw(dmar_domain);
fe40f1e0
WH
3126 addr_width = agaw_to_width(min_agaw);
3127 end = DOMAIN_MAX_ADDR(addr_width);
3128 end = end & VTD_PAGE_MASK;
3129 if (end < max_addr) {
3130 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3131 "sufficient for the mapped address (%llx)\n",
3132 __func__, min_agaw, max_addr);
3133 return -EFAULT;
3134 }
dde57a21 3135 dmar_domain->max_addr = max_addr;
fe40f1e0
WH
3136 }
3137
dde57a21 3138 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
faa3d6f5 3139 return ret;
38717946 3140}
38717946 3141
dde57a21
JR
3142static void intel_iommu_unmap_range(struct iommu_domain *domain,
3143 unsigned long iova, size_t size)
38717946 3144{
dde57a21 3145 struct dmar_domain *dmar_domain = domain->priv;
faa3d6f5
WH
3146 dma_addr_t base;
3147
3148 /* The address might not be aligned */
3149 base = iova & VTD_PAGE_MASK;
3150 size = VTD_PAGE_ALIGN(size);
dde57a21 3151 dma_pte_clear_range(dmar_domain, base, base + size);
fe40f1e0 3152
dde57a21
JR
3153 if (dmar_domain->max_addr == base + size)
3154 dmar_domain->max_addr = base;
38717946 3155}
38717946 3156
d14d6577
JR
3157static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3158 unsigned long iova)
38717946 3159{
d14d6577 3160 struct dmar_domain *dmar_domain = domain->priv;
38717946 3161 struct dma_pte *pte;
faa3d6f5 3162 u64 phys = 0;
38717946 3163
d14d6577 3164 pte = addr_to_dma_pte(dmar_domain, iova);
38717946 3165 if (pte)
faa3d6f5 3166 phys = dma_pte_addr(pte);
38717946 3167
faa3d6f5 3168 return phys;
38717946 3169}
a8bcbb0d 3170
dbb9fd86
SY
3171static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3172 unsigned long cap)
3173{
3174 struct dmar_domain *dmar_domain = domain->priv;
3175
3176 if (cap == IOMMU_CAP_CACHE_COHERENCY)
3177 return dmar_domain->iommu_snooping;
3178
3179 return 0;
3180}
3181
a8bcbb0d
JR
3182static struct iommu_ops intel_iommu_ops = {
3183 .domain_init = intel_iommu_domain_init,
3184 .domain_destroy = intel_iommu_domain_destroy,
3185 .attach_dev = intel_iommu_attach_device,
3186 .detach_dev = intel_iommu_detach_device,
3187 .map = intel_iommu_map_range,
3188 .unmap = intel_iommu_unmap_range,
3189 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 3190 .domain_has_cap = intel_iommu_domain_has_cap,
a8bcbb0d 3191};
9af88143
DW
3192
3193static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3194{
3195 /*
3196 * Mobile 4 Series Chipset neglects to set RWBF capability,
3197 * but needs it:
3198 */
3199 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3200 rwbf_quirk = 1;
3201}
3202
3203DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);